[ { "id": "0-hero/Matter-0.1-7B-boost-DPO-preview", "name": "0-hero/Matter-0.1-7B-boost-DPO-preview", "developer": "0-hero", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7448, "reward-bench/Chat": 0.9106, "reward-bench/Chat Hard": 0.6096, "reward-bench/Safety": 0.7135, "reward-bench/Reasoning": 0.8395, "reward-bench/Prior Sets (0.5 weight)": 0.5566 } }, { "id": "0-hero/Matter-0.1-7B-DPO-preview", "name": "0-hero/Matter-0.1-7B-DPO-preview", "developer": "0-hero", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7247, "reward-bench/Chat": 0.8939, "reward-bench/Chat Hard": 0.5768, "reward-bench/Safety": 0.6378, "reward-bench/Reasoning": 0.8854, "reward-bench/Prior Sets (0.5 weight)": 0.5348 } }, { "id": "0-hero/Matter-0.2-7B-DPO", "name": "Matter-0.2-7B-DPO", "developer": "0-hero", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3303, "hfopenllm_v2/BBH": 0.3596, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3814, "hfopenllm_v2/MMLU-PRO": 0.1164 } }, { "id": "01-ai/Yi-1.5-34B", "name": "Yi-1.5-34B", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2841, "hfopenllm_v2/BBH": 0.5976, "hfopenllm_v2/MATH Level 5": 0.1533, "hfopenllm_v2/GPQA": 0.3658, "hfopenllm_v2/MUSR": 0.4236, "hfopenllm_v2/MMLU-PRO": 0.4666 } }, { "id": "01-ai/Yi-1.5-34B-32K", "name": "Yi-1.5-34B-32K", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3119, "hfopenllm_v2/BBH": 0.6016, "hfopenllm_v2/MATH Level 5": 0.1541, "hfopenllm_v2/GPQA": 0.3633, "hfopenllm_v2/MUSR": 0.4398, "hfopenllm_v2/MMLU-PRO": 0.4709 } }, { "id": "01-ai/Yi-1.5-34B-Chat", "name": "Yi-1.5-34B-Chat", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6067, "hfopenllm_v2/BBH": 0.6084, "hfopenllm_v2/MATH Level 5": 0.2772, "hfopenllm_v2/GPQA": 0.3649, "hfopenllm_v2/MUSR": 0.4282, "hfopenllm_v2/MMLU-PRO": 0.452 } }, { "id": "01-ai/Yi-1.5-34B-Chat-16K", "name": "Yi-1.5-34B-Chat-16K", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4564, "hfopenllm_v2/BBH": 0.61, "hfopenllm_v2/MATH Level 5": 0.2137, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4398, "hfopenllm_v2/MMLU-PRO": 0.4545 } }, { "id": "01-ai/Yi-1.5-6B", "name": "Yi-1.5-6B", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2617, "hfopenllm_v2/BBH": 0.4493, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4374, "hfopenllm_v2/MMLU-PRO": 0.3144 } }, { "id": "01-ai/Yi-1.5-6B-Chat", "name": "Yi-1.5-6B-Chat", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5145, "hfopenllm_v2/BBH": 0.4571, "hfopenllm_v2/MATH Level 5": 0.1624, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4392, "hfopenllm_v2/MMLU-PRO": 0.3193 } }, { "id": "01-ai/Yi-1.5-9B", "name": "Yi-1.5-9B", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2936, "hfopenllm_v2/BBH": 0.5143, "hfopenllm_v2/MATH Level 5": 0.114, "hfopenllm_v2/GPQA": 0.3792, "hfopenllm_v2/MUSR": 0.4328, "hfopenllm_v2/MMLU-PRO": 0.3916 } }, { "id": "01-ai/Yi-1.5-9B-32K", "name": "Yi-1.5-9B-32K", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2303, "hfopenllm_v2/BBH": 0.4963, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.3591, "hfopenllm_v2/MUSR": 0.4186, "hfopenllm_v2/MMLU-PRO": 0.3765 } }, { "id": "01-ai/Yi-1.5-9B-Chat", "name": "Yi-1.5-9B-Chat", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6046, "hfopenllm_v2/BBH": 0.5559, "hfopenllm_v2/MATH Level 5": 0.2258, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.4259, "hfopenllm_v2/MMLU-PRO": 0.3975 } }, { "id": "01-ai/Yi-1.5-9B-Chat-16K", "name": "Yi-1.5-9B-Chat-16K", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4214, "hfopenllm_v2/BBH": 0.5153, "hfopenllm_v2/MATH Level 5": 0.1782, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4099, "hfopenllm_v2/MMLU-PRO": 0.3994 } }, { "id": "01-ai/yi-34b", "name": "Yi 34B", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.57, "helm_lite/NarrativeQA": 0.782, "helm_lite/NaturalQuestions (closed-book)": 0.443, "helm_lite/OpenbookQA": 0.92, "helm_lite/MMLU": 0.65, "helm_lite/MATH": 0.375, "helm_lite/GSM8K": 0.648, "helm_lite/LegalBench": 0.618, "helm_lite/MedQA": 0.656, "helm_lite/WMT 2014": 0.172, "helm_mmlu/MMLU All Subjects": 0.762, "helm_mmlu/Abstract Algebra": 0.4, "helm_mmlu/Anatomy": 0.748, "helm_mmlu/College Physics": 0.5, "helm_mmlu/Computer Security": 0.83, "helm_mmlu/Econometrics": 0.588, "helm_mmlu/Global Facts": 0.53, "helm_mmlu/Jurisprudence": 0.898, "helm_mmlu/Philosophy": 0.82, "helm_mmlu/Professional Psychology": 0.835, "helm_mmlu/Us Foreign Policy": 0.91, "helm_mmlu/Astronomy": 0.901, "helm_mmlu/Business Ethics": 0.75, "helm_mmlu/Clinical Knowledge": 0.8, "helm_mmlu/Conceptual Physics": 0.77, "helm_mmlu/Electrical Engineering": 0.779, "helm_mmlu/Elementary Mathematics": 0.656, "helm_mmlu/Formal Logic": 0.548, "helm_mmlu/High School World History": 0.907, "helm_mmlu/Human Sexuality": 0.87, "helm_mmlu/International Law": 0.909, "helm_mmlu/Logical Fallacies": 0.883, "helm_mmlu/Machine Learning": 0.58, "helm_mmlu/Management": 0.893, "helm_mmlu/Marketing": 0.936, "helm_mmlu/Medical Genetics": 0.87, "helm_mmlu/Miscellaneous": 0.902, "helm_mmlu/Moral Scenarios": 0.606, "helm_mmlu/Nutrition": 0.869, "helm_mmlu/Prehistory": 0.877, "helm_mmlu/Public Relations": 0.745, "helm_mmlu/Security Studies": 0.833, "helm_mmlu/Sociology": 0.9, "helm_mmlu/Virology": 0.572, "helm_mmlu/World Religions": 0.877, "helm_mmlu/Mean win rate": 0.315, "hfopenllm_v2/IFEval": 0.3046, "hfopenllm_v2/BBH": 0.5457, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.3666, "hfopenllm_v2/MUSR": 0.4119, "hfopenllm_v2/MMLU-PRO": 0.4412 } }, { "id": "01-ai/Yi-34B-200K", "name": "Yi-34B-200K", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1542, "hfopenllm_v2/BBH": 0.5442, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.3565, "hfopenllm_v2/MUSR": 0.3817, "hfopenllm_v2/MMLU-PRO": 0.4535 } }, { "id": "01-ai/Yi-34B-Chat", "name": "Yi-34B-Chat", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4699, "hfopenllm_v2/BBH": 0.5561, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.3978, "hfopenllm_v2/MMLU-PRO": 0.4093 } }, { "id": "01-ai/yi-6b", "name": "Yi 6B", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.253, "helm_lite/NarrativeQA": 0.702, "helm_lite/NaturalQuestions (closed-book)": 0.31, "helm_lite/OpenbookQA": 0.8, "helm_lite/MMLU": 0.53, "helm_lite/MATH": 0.126, "helm_lite/GSM8K": 0.375, "helm_lite/LegalBench": 0.519, "helm_lite/MedQA": 0.497, "helm_lite/WMT 2014": 0.117, "helm_mmlu/MMLU All Subjects": 0.64, "helm_mmlu/Abstract Algebra": 0.3, "helm_mmlu/Anatomy": 0.6, "helm_mmlu/College Physics": 0.422, "helm_mmlu/Computer Security": 0.73, "helm_mmlu/Econometrics": 0.351, "helm_mmlu/Global Facts": 0.43, "helm_mmlu/Jurisprudence": 0.796, "helm_mmlu/Philosophy": 0.678, "helm_mmlu/Professional Psychology": 0.668, "helm_mmlu/Us Foreign Policy": 0.87, "helm_mmlu/Astronomy": 0.684, "helm_mmlu/Business Ethics": 0.67, "helm_mmlu/Clinical Knowledge": 0.66, "helm_mmlu/Conceptual Physics": 0.621, "helm_mmlu/Electrical Engineering": 0.662, "helm_mmlu/Elementary Mathematics": 0.452, "helm_mmlu/Formal Logic": 0.452, "helm_mmlu/High School World History": 0.785, "helm_mmlu/Human Sexuality": 0.763, "helm_mmlu/International Law": 0.769, "helm_mmlu/Logical Fallacies": 0.779, "helm_mmlu/Machine Learning": 0.411, "helm_mmlu/Management": 0.806, "helm_mmlu/Marketing": 0.893, "helm_mmlu/Medical Genetics": 0.77, "helm_mmlu/Miscellaneous": 0.796, "helm_mmlu/Moral Scenarios": 0.335, "helm_mmlu/Nutrition": 0.739, "helm_mmlu/Prehistory": 0.713, "helm_mmlu/Public Relations": 0.718, "helm_mmlu/Security Studies": 0.735, "helm_mmlu/Sociology": 0.831, "helm_mmlu/Virology": 0.452, "helm_mmlu/World Religions": 0.836, "helm_mmlu/Mean win rate": 0.651, "hfopenllm_v2/IFEval": 0.2893, "hfopenllm_v2/BBH": 0.4309, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3937, "hfopenllm_v2/MMLU-PRO": 0.2991 } }, { "id": "01-ai/Yi-6B-200K", "name": "Yi-6B-200K", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0843, "hfopenllm_v2/BBH": 0.4289, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4587, "hfopenllm_v2/MMLU-PRO": 0.2844 } }, { "id": "01-ai/Yi-6B-Chat", "name": "Yi-6B-Chat", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3395, "hfopenllm_v2/BBH": 0.4133, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3688, "hfopenllm_v2/MMLU-PRO": 0.3061 } }, { "id": "01-ai/Yi-9B", "name": "Yi-9B", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2709, "hfopenllm_v2/BBH": 0.494, "hfopenllm_v2/MATH Level 5": 0.0559, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.4054, "hfopenllm_v2/MMLU-PRO": 0.3574 } }, { "id": "01-ai/Yi-9B-200K", "name": "Yi-9B-200K", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2327, "hfopenllm_v2/BBH": 0.4793, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.4294, "hfopenllm_v2/MMLU-PRO": 0.3622 } }, { "id": "01-ai/Yi-Coder-9B-Chat", "name": "Yi-Coder-9B-Chat", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4817, "hfopenllm_v2/BBH": 0.4814, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.2475, "hfopenllm_v2/MUSR": 0.3992, "hfopenllm_v2/MMLU-PRO": 0.2425 } }, { "id": "01-ai/yi-large-preview", "name": "Yi Large Preview", "developer": "01-ai", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.471, "helm_lite/NarrativeQA": 0.373, "helm_lite/NaturalQuestions (closed-book)": 0.428, "helm_lite/OpenbookQA": 0.946, "helm_lite/MMLU": 0.712, "helm_lite/MATH": 0.712, "helm_lite/GSM8K": 0.69, "helm_lite/LegalBench": 0.519, "helm_lite/MedQA": 0.66, "helm_lite/WMT 2014": 0.176, "helm_mmlu/MMLU All Subjects": 0.793, "helm_mmlu/Abstract Algebra": 0.6, "helm_mmlu/Anatomy": 0.83, "helm_mmlu/College Physics": 0.569, "helm_mmlu/Computer Security": 0.86, "helm_mmlu/Econometrics": 0.728, "helm_mmlu/Global Facts": 0.52, "helm_mmlu/Jurisprudence": 0.852, "helm_mmlu/Philosophy": 0.842, "helm_mmlu/Professional Psychology": 0.853, "helm_mmlu/Us Foreign Policy": 0.85, "helm_mmlu/Astronomy": 0.914, "helm_mmlu/Business Ethics": 0.8, "helm_mmlu/Clinical Knowledge": 0.857, "helm_mmlu/Conceptual Physics": 0.864, "helm_mmlu/Electrical Engineering": 0.779, "helm_mmlu/Elementary Mathematics": 0.685, "helm_mmlu/Formal Logic": 0.603, "helm_mmlu/High School World History": 0.928, "helm_mmlu/Human Sexuality": 0.901, "helm_mmlu/International Law": 0.917, "helm_mmlu/Logical Fallacies": 0.865, "helm_mmlu/Machine Learning": 0.616, "helm_mmlu/Management": 0.903, "helm_mmlu/Marketing": 0.927, "helm_mmlu/Medical Genetics": 0.83, "helm_mmlu/Miscellaneous": 0.916, "helm_mmlu/Moral Scenarios": 0.831, "helm_mmlu/Nutrition": 0.846, "helm_mmlu/Prehistory": 0.892, "helm_mmlu/Public Relations": 0.827, "helm_mmlu/Security Studies": 0.82, "helm_mmlu/Sociology": 0.881, "helm_mmlu/Virology": 0.59, "helm_mmlu/World Religions": 0.871, "helm_mmlu/Mean win rate": 0.258 } }, { "id": "1-800-LLMs/Qwen-2.5-14B-Hindi", "name": "Qwen-2.5-14B-Hindi", "developer": "1-800-LLMs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5826, "hfopenllm_v2/BBH": 0.6524, "hfopenllm_v2/MATH Level 5": 0.3331, "hfopenllm_v2/GPQA": 0.3624, "hfopenllm_v2/MUSR": 0.4489, "hfopenllm_v2/MMLU-PRO": 0.5263 } }, { "id": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct", "name": "Qwen-2.5-14B-Hindi-Custom-Instruct", "developer": "1-800-LLMs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3077, "hfopenllm_v2/BBH": 0.6284, "hfopenllm_v2/MATH Level 5": 0.3112, "hfopenllm_v2/GPQA": 0.37, "hfopenllm_v2/MUSR": 0.4491, "hfopenllm_v2/MMLU-PRO": 0.5164 } }, { "id": "1024m/PHI-4-Hindi", "name": "PHI-4-Hindi", "developer": "1024m", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0082, "hfopenllm_v2/BBH": 0.671, "hfopenllm_v2/MATH Level 5": 0.2334, "hfopenllm_v2/GPQA": 0.3977, "hfopenllm_v2/MUSR": 0.4914, "hfopenllm_v2/MMLU-PRO": 0.5239 } }, { "id": "1024m/QWEN-14B-B100", "name": "QWEN-14B-B100", "developer": "1024m", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7762, "hfopenllm_v2/BBH": 0.6533, "hfopenllm_v2/MATH Level 5": 0.5438, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.41, "hfopenllm_v2/MMLU-PRO": 0.5179 } }, { "id": "152334H/miqu-1-70b-sf", "name": "miqu-1-70b-sf", "developer": "152334H", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5182, "hfopenllm_v2/BBH": 0.6102, "hfopenllm_v2/MATH Level 5": 0.1246, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.4582, "hfopenllm_v2/MMLU-PRO": 0.4228 } }, { "id": "1TuanPham/T-VisStar-7B-v0.1", "name": "T-VisStar-7B-v0.1", "developer": "1TuanPham", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3607, "hfopenllm_v2/BBH": 0.5052, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4375, "hfopenllm_v2/MMLU-PRO": 0.3211 } }, { "id": "1TuanPham/T-VisStar-v0.1", "name": "T-VisStar-v0.1", "developer": "1TuanPham", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3607, "hfopenllm_v2/BBH": 0.5052, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4375, "hfopenllm_v2/MMLU-PRO": 0.3211 } }, { "id": "3rd-Degree-Burn/L-3.1-Science-Writer-8B", "name": "L-3.1-Science-Writer-8B", "developer": "3rd-Degree-Burn", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4263, "hfopenllm_v2/BBH": 0.5041, "hfopenllm_v2/MATH Level 5": 0.1035, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3959, "hfopenllm_v2/MMLU-PRO": 0.3649 } }, { "id": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot", "name": "Llama-3.1-8B-Squareroot", "developer": "3rd-Degree-Burn", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2213, "hfopenllm_v2/BBH": 0.3461, "hfopenllm_v2/MATH Level 5": 0.2659, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3089, "hfopenllm_v2/MMLU-PRO": 0.175 } }, { "id": "3rd-Degree-Burn/Llama-3.1-8B-Squareroot-v1", "name": "Llama-3.1-8B-Squareroot-v1", "developer": "3rd-Degree-Burn", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2892, "hfopenllm_v2/BBH": 0.3343, "hfopenllm_v2/MATH Level 5": 0.0884, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3341, "hfopenllm_v2/MMLU-PRO": 0.1127 } }, { "id": "3rd-Degree-Burn/Llama-Squared-8B", "name": "Llama-Squared-8B", "developer": "3rd-Degree-Burn", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2755, "hfopenllm_v2/BBH": 0.4431, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3089, "hfopenllm_v2/MMLU-PRO": 0.2366 } }, { "id": "4season/final_model_test_v2", "name": "final_model_test_v2", "developer": "4season", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3191, "hfopenllm_v2/BBH": 0.6342, "hfopenllm_v2/MATH Level 5": 0.0838, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.4314, "hfopenllm_v2/MMLU-PRO": 0.3528 } }, { "id": "aaditya/Llama3-OpenBioLLM-70B", "name": "Llama3-OpenBioLLM-70B", "developer": "aaditya", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7597, "hfopenllm_v2/BBH": 0.6399, "hfopenllm_v2/MATH Level 5": 0.1971, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4417, "hfopenllm_v2/MMLU-PRO": 0.4867 } }, { "id": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview", "name": "FuseChat-Llama-3.1-8B-Instruct-preview", "developer": "AALF", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.719, "hfopenllm_v2/BBH": 0.512, "hfopenllm_v2/MATH Level 5": 0.2477, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.382, "hfopenllm_v2/MMLU-PRO": 0.3733 } }, { "id": "AALF/FuseChat-Llama-3.1-8B-SFT-preview", "name": "FuseChat-Llama-3.1-8B-SFT-preview", "developer": "AALF", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7281, "hfopenllm_v2/BBH": 0.524, "hfopenllm_v2/MATH Level 5": 0.2251, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.402, "hfopenllm_v2/MMLU-PRO": 0.3743 } }, { "id": "AALF/gemma-2-27b-it-SimPO-37K", "name": "gemma-2-27b-it-SimPO-37K", "developer": "AALF", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2407, "hfopenllm_v2/BBH": 0.3911, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3488, "hfopenllm_v2/MMLU-PRO": 0.1971 } }, { "id": "AALF/gemma-2-27b-it-SimPO-37K-100steps", "name": "gemma-2-27b-it-SimPO-37K-100steps", "developer": "AALF", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2568, "hfopenllm_v2/BBH": 0.3931, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.3329, "hfopenllm_v2/MMLU-PRO": 0.2125 } }, { "id": "Aashraf995/Creative-7B-nerd", "name": "Creative-7B-nerd", "developer": "Aashraf995", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4722, "hfopenllm_v2/BBH": 0.5607, "hfopenllm_v2/MATH Level 5": 0.3165, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.4515, "hfopenllm_v2/MMLU-PRO": 0.4492 } }, { "id": "Aashraf995/Gemma-Evo-10B", "name": "Gemma-Evo-10B", "developer": "Aashraf995", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7332, "hfopenllm_v2/BBH": 0.6044, "hfopenllm_v2/MATH Level 5": 0.2228, "hfopenllm_v2/GPQA": 0.354, "hfopenllm_v2/MUSR": 0.4595, "hfopenllm_v2/MMLU-PRO": 0.4275 } }, { "id": "Aashraf995/Qwen-Evo-7B", "name": "Qwen-Evo-7B", "developer": "Aashraf995", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4757, "hfopenllm_v2/BBH": 0.5709, "hfopenllm_v2/MATH Level 5": 0.3142, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.4541, "hfopenllm_v2/MMLU-PRO": 0.4462 } }, { "id": "Aashraf995/QwenStock-14B", "name": "QwenStock-14B", "developer": "Aashraf995", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5009, "hfopenllm_v2/BBH": 0.655, "hfopenllm_v2/MATH Level 5": 0.3573, "hfopenllm_v2/GPQA": 0.3893, "hfopenllm_v2/MUSR": 0.4793, "hfopenllm_v2/MMLU-PRO": 0.5382 } }, { "id": "abacusai/bigstral-12b-32k", "name": "bigstral-12b-32k", "developer": "abacusai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4194, "hfopenllm_v2/BBH": 0.47, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.456, "hfopenllm_v2/MMLU-PRO": 0.2641 } }, { "id": "abacusai/bigyi-15b", "name": "bigyi-15b", "developer": "abacusai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2094, "hfopenllm_v2/BBH": 0.4345, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.3538, "hfopenllm_v2/MMLU-PRO": 0.3003 } }, { "id": "abacusai/Dracarys-72B-Instruct", "name": "Dracarys-72B-Instruct", "developer": "abacusai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7856, "hfopenllm_v2/BBH": 0.6944, "hfopenllm_v2/MATH Level 5": 0.3965, "hfopenllm_v2/GPQA": 0.3909, "hfopenllm_v2/MUSR": 0.4558, "hfopenllm_v2/MMLU-PRO": 0.5456 } }, { "id": "abacusai/Liberated-Qwen1.5-14B", "name": "Liberated-Qwen1.5-14B", "developer": "abacusai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3631, "hfopenllm_v2/BBH": 0.4948, "hfopenllm_v2/MATH Level 5": 0.1601, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4175, "hfopenllm_v2/MMLU-PRO": 0.3512 } }, { "id": "abacusai/Llama-3-Smaug-8B", "name": "Llama-3-Smaug-8B", "developer": "abacusai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4867, "hfopenllm_v2/BBH": 0.4931, "hfopenllm_v2/MATH Level 5": 0.0853, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.3622, "hfopenllm_v2/MMLU-PRO": 0.3185 } }, { "id": "abacusai/Smaug-34B-v0.1", "name": "Smaug-34B-v0.1", "developer": "abacusai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5016, "hfopenllm_v2/BBH": 0.5358, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.3297, "hfopenllm_v2/MUSR": 0.3979, "hfopenllm_v2/MMLU-PRO": 0.4543 } }, { "id": "abacusai/Smaug-72B-v0.1", "name": "Smaug-72B-v0.1", "developer": "abacusai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5167, "hfopenllm_v2/BBH": 0.5996, "hfopenllm_v2/MATH Level 5": 0.1911, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4473, "hfopenllm_v2/MMLU-PRO": 0.4624 } }, { "id": "abacusai/Smaug-Llama-3-70B-Instruct-32K", "name": "Smaug-Llama-3-70B-Instruct-32K", "developer": "abacusai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7761, "hfopenllm_v2/BBH": 0.6493, "hfopenllm_v2/MATH Level 5": 0.2749, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4208, "hfopenllm_v2/MMLU-PRO": 0.4765 } }, { "id": "abacusai/Smaug-Mixtral-v0.1", "name": "Smaug-Mixtral-v0.1", "developer": "abacusai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5554, "hfopenllm_v2/BBH": 0.5162, "hfopenllm_v2/MATH Level 5": 0.0952, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4298, "hfopenllm_v2/MMLU-PRO": 0.3352 } }, { "id": "abacusai/Smaug-Qwen2-72B-Instruct", "name": "Smaug-Qwen2-72B-Instruct", "developer": "abacusai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7825, "hfopenllm_v2/BBH": 0.691, "hfopenllm_v2/MATH Level 5": 0.4131, "hfopenllm_v2/GPQA": 0.3616, "hfopenllm_v2/MUSR": 0.4401, "hfopenllm_v2/MMLU-PRO": 0.519 } }, { "id": "AbacusResearch/Jallabi-34B", "name": "Jallabi-34B", "developer": "AbacusResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3529, "hfopenllm_v2/BBH": 0.6023, "hfopenllm_v2/MATH Level 5": 0.0521, "hfopenllm_v2/GPQA": 0.3389, "hfopenllm_v2/MUSR": 0.4822, "hfopenllm_v2/MMLU-PRO": 0.4682 } }, { "id": "abhishek/autotrain-0tmgq-5tpbg", "name": "autotrain-0tmgq-5tpbg", "developer": "abhishek", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1952, "hfopenllm_v2/BBH": 0.3127, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3584, "hfopenllm_v2/MMLU-PRO": 0.1144 } }, { "id": "abhishek/autotrain-llama3-70b-orpo-v1", "name": "autotrain-llama3-70b-orpo-v1", "developer": "abhishek", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4233, "hfopenllm_v2/BBH": 0.5998, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2441, "hfopenllm_v2/MUSR": 0.3579, "hfopenllm_v2/MMLU-PRO": 0.1122 } }, { "id": "abhishek/autotrain-llama3-70b-orpo-v2", "name": "autotrain-llama3-70b-orpo-v2", "developer": "abhishek", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5406, "hfopenllm_v2/BBH": 0.5899, "hfopenllm_v2/MATH Level 5": 0.2107, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4113, "hfopenllm_v2/MMLU-PRO": 0.4818 } }, { "id": "abhishek/autotrain-llama3-orpo-v2", "name": "autotrain-llama3-orpo-v2", "developer": "abhishek", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4372, "hfopenllm_v2/BBH": 0.3159, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3792, "hfopenllm_v2/MMLU-PRO": 0.2218 } }, { "id": "abhishek/autotrain-vr4a1-e5mms", "name": "autotrain-vr4a1-e5mms", "developer": "abhishek", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2142, "hfopenllm_v2/BBH": 0.5001, "hfopenllm_v2/MATH Level 5": 0.1412, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.3891, "hfopenllm_v2/MMLU-PRO": 0.3667 } }, { "id": "abideen/MedPhi-4-14B-v1", "name": "MedPhi-4-14B-v1", "developer": "abideen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6277, "hfopenllm_v2/BBH": 0.6897, "hfopenllm_v2/MATH Level 5": 0.2931, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.4155, "hfopenllm_v2/MMLU-PRO": 0.5338 } }, { "id": "adamo1139/Yi-34B-200K-AEZAKMI-v2", "name": "Yi-34B-200K-AEZAKMI-v2", "developer": "adamo1139", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4555, "hfopenllm_v2/BBH": 0.5384, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.3886, "hfopenllm_v2/MMLU-PRO": 0.4513 } }, { "id": "adriszmar/QAIMath-Qwen2.5-7B-TIES", "name": "QAIMath-Qwen2.5-7B-TIES", "developer": "adriszmar", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1746, "hfopenllm_v2/BBH": 0.3126, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.245, "hfopenllm_v2/MUSR": 0.4096, "hfopenllm_v2/MMLU-PRO": 0.1087 } }, { "id": "AELLM/gemma-2-aeria-infinity-9b", "name": "gemma-2-aeria-infinity-9b", "developer": "AELLM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7594, "hfopenllm_v2/BBH": 0.5983, "hfopenllm_v2/MATH Level 5": 0.2145, "hfopenllm_v2/GPQA": 0.3339, "hfopenllm_v2/MUSR": 0.402, "hfopenllm_v2/MMLU-PRO": 0.3862 } }, { "id": "AELLM/gemma-2-lyco-infinity-9b", "name": "gemma-2-lyco-infinity-9b", "developer": "AELLM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7316, "hfopenllm_v2/BBH": 0.584, "hfopenllm_v2/MATH Level 5": 0.1707, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4006, "hfopenllm_v2/MMLU-PRO": 0.3787 } }, { "id": "aevalone/distill_qw_test", "name": "distill_qw_test", "developer": "aevalone", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7409, "hfopenllm_v2/BBH": 0.5246, "hfopenllm_v2/MATH Level 5": 0.4781, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.386, "hfopenllm_v2/MMLU-PRO": 0.4092 } }, { "id": "agentlans/Gemma2-9B-AdvancedFuse", "name": "Gemma2-9B-AdvancedFuse", "developer": "agentlans", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1543, "hfopenllm_v2/BBH": 0.5859, "hfopenllm_v2/MATH Level 5": 0.1005, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.4231, "hfopenllm_v2/MMLU-PRO": 0.4 } }, { "id": "agentlans/Llama-3.2-1B-Instruct-CrashCourse12K", "name": "Llama-3.2-1B-Instruct-CrashCourse12K", "developer": "agentlans", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5395, "hfopenllm_v2/BBH": 0.3548, "hfopenllm_v2/MATH Level 5": 0.071, "hfopenllm_v2/GPQA": 0.2408, "hfopenllm_v2/MUSR": 0.321, "hfopenllm_v2/MMLU-PRO": 0.1809 } }, { "id": "agentlans/Llama3.1-8B-drill", "name": "Llama3.1-8B-drill", "developer": "agentlans", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7652, "hfopenllm_v2/BBH": 0.5016, "hfopenllm_v2/MATH Level 5": 0.1715, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3672, "hfopenllm_v2/MMLU-PRO": 0.3776 } }, { "id": "agentlans/Llama3.1-Daredevilish", "name": "Llama3.1-Daredevilish", "developer": "agentlans", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6292, "hfopenllm_v2/BBH": 0.5013, "hfopenllm_v2/MATH Level 5": 0.1292, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4091, "hfopenllm_v2/MMLU-PRO": 0.3697 } }, { "id": "agentlans/Llama3.1-Daredevilish-Instruct", "name": "Llama3.1-Daredevilish-Instruct", "developer": "agentlans", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7926, "hfopenllm_v2/BBH": 0.5235, "hfopenllm_v2/MATH Level 5": 0.1722, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.3911, "hfopenllm_v2/MMLU-PRO": 0.3877 } }, { "id": "agentlans/Llama3.1-LexiHermes-SuperStorm", "name": "Llama3.1-LexiHermes-SuperStorm", "developer": "agentlans", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7835, "hfopenllm_v2/BBH": 0.5266, "hfopenllm_v2/MATH Level 5": 0.1616, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.3963, "hfopenllm_v2/MMLU-PRO": 0.3844 } }, { "id": "agentlans/Llama3.1-SuperDeepFuse", "name": "Llama3.1-SuperDeepFuse", "developer": "agentlans", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7762, "hfopenllm_v2/BBH": 0.5049, "hfopenllm_v2/MATH Level 5": 0.1828, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3699, "hfopenllm_v2/MMLU-PRO": 0.3775 } }, { "id": "agentlans/Llama3.1-SuperDeepFuse-CrashCourse12K", "name": "Llama3.1-SuperDeepFuse-CrashCourse12K", "developer": "agentlans", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7187, "hfopenllm_v2/BBH": 0.5216, "hfopenllm_v2/MATH Level 5": 0.1805, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4026, "hfopenllm_v2/MMLU-PRO": 0.3631 } }, { "id": "agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout", "name": "Qwen2.5-0.5B-Instruct-CrashCourse-dropout", "developer": "agentlans", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2949, "hfopenllm_v2/BBH": 0.3312, "hfopenllm_v2/MATH Level 5": 0.0423, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3342, "hfopenllm_v2/MMLU-PRO": 0.1608 } }, { "id": "AGI-0/Art-v0-3B", "name": "Art-v0-3B", "developer": "AGI-0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3192, "hfopenllm_v2/BBH": 0.3401, "hfopenllm_v2/MATH Level 5": 0.2462, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3768, "hfopenllm_v2/MMLU-PRO": 0.1179 } }, { "id": "AGI-0/Artificium-llama3.1-8B-001", "name": "Artificium-llama3.1-8B-001", "developer": "AGI-0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5248, "hfopenllm_v2/BBH": 0.4256, "hfopenllm_v2/MATH Level 5": 0.136, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3795, "hfopenllm_v2/MMLU-PRO": 0.3182 } }, { "id": "AGI-0/smartllama3.1-8B-001", "name": "smartllama3.1-8B-001", "developer": "AGI-0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3518, "hfopenllm_v2/BBH": 0.467, "hfopenllm_v2/MATH Level 5": 0.1299, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4386, "hfopenllm_v2/MMLU-PRO": 0.3487 } }, { "id": "Ahdoot/StructuredThinker-v0.3-MoreStructure", "name": "StructuredThinker-v0.3-MoreStructure", "developer": "Ahdoot", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4193, "hfopenllm_v2/BBH": 0.4838, "hfopenllm_v2/MATH Level 5": 0.2908, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4158, "hfopenllm_v2/MMLU-PRO": 0.361 } }, { "id": "Ahdoot/Test_StealthThinker", "name": "Test_StealthThinker", "developer": "Ahdoot", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.422, "hfopenllm_v2/BBH": 0.4647, "hfopenllm_v2/MATH Level 5": 0.179, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.428, "hfopenllm_v2/MMLU-PRO": 0.3597 } }, { "id": "Ahjeong/MMPO_Gemma_7b", "name": "Ahjeong/MMPO_Gemma_7b", "developer": "Ahjeong", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7587, "reward-bench/Chat": 0.9693, "reward-bench/Chat Hard": 0.614, "reward-bench/Safety": 0.7135, "reward-bench/Reasoning": 0.7756, "reward-bench/Prior Sets (0.5 weight)": 0.6831 } }, { "id": "Ahjeong/MMPO_Gemma_7b_gamma1.1_epoch3", "name": "Ahjeong/MMPO_Gemma_7b_gamma1.1_epoch3", "developer": "Ahjeong", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7652, "reward-bench/Chat": 0.9721, "reward-bench/Chat Hard": 0.6338, "reward-bench/Safety": 0.7635, "reward-bench/Reasoning": 0.7284, "reward-bench/Prior Sets (0.5 weight)": 0.6913 } }, { "id": "ahmeda335/13_outOf_32_pruned_layers_llama3.1-8b", "name": "13_outOf_32_pruned_layers_llama3.1-8b", "developer": "ahmeda335", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1748, "hfopenllm_v2/BBH": 0.2883, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3803, "hfopenllm_v2/MMLU-PRO": 0.1129 } }, { "id": "AI-MO/NuminaMath-7B-CoT", "name": "NuminaMath-7B-CoT", "developer": "AI-MO", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2689, "hfopenllm_v2/BBH": 0.4314, "hfopenllm_v2/MATH Level 5": 0.2696, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3303, "hfopenllm_v2/MMLU-PRO": 0.2868 } }, { "id": "AI-MO/NuminaMath-7B-TIR", "name": "NuminaMath-7B-TIR", "developer": "AI-MO", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2756, "hfopenllm_v2/BBH": 0.4144, "hfopenllm_v2/MATH Level 5": 0.1609, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3509, "hfopenllm_v2/MMLU-PRO": 0.2733 } }, { "id": "AI-Sweden-Models/gpt-sw3-40b", "name": "gpt-sw3-40b", "developer": "AI-Sweden-Models", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.147, "hfopenllm_v2/BBH": 0.3268, "hfopenllm_v2/MATH Level 5": 0.0174, "hfopenllm_v2/GPQA": 0.2349, "hfopenllm_v2/MUSR": 0.3632, "hfopenllm_v2/MMLU-PRO": 0.1276 } }, { "id": "AI-Sweden-Models/Llama-3-8B-instruct", "name": "Llama-3-8B-instruct", "developer": "AI-Sweden-Models", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2401, "hfopenllm_v2/BBH": 0.4173, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.4771, "hfopenllm_v2/MMLU-PRO": 0.2597 } }, { "id": "ai2/llama-2-chat-7b-nectar-3.8m.json", "name": "ai2/llama-2-chat-7b-nectar-3.8m.json", "developer": "AI2", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5843, "reward-bench/Chat": 0.8631, "reward-bench/Chat Hard": 0.2654, "reward-bench/Safety": 0.6243 } }, { "id": "ai2/llama-2-chat-nectar-180k.json", "name": "ai2/llama-2-chat-nectar-180k.json", "developer": "AI2", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5235, "reward-bench/Chat": 0.8827, "reward-bench/Chat Hard": 0.2851, "reward-bench/Safety": 0.4027 } }, { "id": "ai2/llama-2-chat-ultrafeedback-60k.jsonl", "name": "ai2/llama-2-chat-ultrafeedback-60k.jsonl", "developer": "AI2", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.644, "reward-bench/Chat": 0.9441, "reward-bench/Chat Hard": 0.4539, "reward-bench/Safety": 0.5338 } }, { "id": "ai2/tulu-2-7b-rm-v0-nectar-binarized-3.8m-check...", "name": "ai2/tulu-2-7b-rm-v0-nectar-binarized-3.8m-check...", "developer": "AI2", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7008, "reward-bench/Chat": 0.9385, "reward-bench/Chat Hard": 0.3882, "reward-bench/Safety": 0.7757 } }, { "id": "ai2/tulu-2-7b-rm-v0-nectar-binarized-700k.json", "name": "ai2/tulu-2-7b-rm-v0-nectar-binarized-700k.json", "developer": "AI2", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7127, "reward-bench/Chat": 0.9358, "reward-bench/Chat Hard": 0.4079, "reward-bench/Safety": 0.7946 } }, { "id": "ai2/tulu-2-7b-rm-v0-nectar-binarized.json", "name": "ai2/tulu-2-7b-rm-v0-nectar-binarized.json", "developer": "AI2", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6756, "reward-bench/Chat": 0.9134, "reward-bench/Chat Hard": 0.3904, "reward-bench/Safety": 0.723 } }, { "id": "ai2/tulu-2-7b-rm-v0.json", "name": "ai2/tulu-2-7b-rm-v0.json", "developer": "AI2", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6655, "reward-bench/Chat": 0.933, "reward-bench/Chat Hard": 0.4539, "reward-bench/Safety": 0.6095 } }, { "id": "ai21/J1-Grande-v1-17B", "name": "J1-Grande v1 17B", "developer": "ai21", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.433, "helm_classic/MMLU": 0.27, "helm_classic/BoolQ": 0.722, "helm_classic/NarrativeQA": 0.672, "helm_classic/NaturalQuestions (open-book)": 0.578, "helm_classic/QuAC": 0.362, "helm_classic/HellaSwag": 0.739, "helm_classic/OpenbookQA": 0.52, "helm_classic/TruthfulQA": 0.193, "helm_classic/MS MARCO (TREC)": 0.341, "helm_classic/CNN/DailyMail": 0.143, "helm_classic/XSUM": 0.122, "helm_classic/IMDB": 0.953, "helm_classic/CivilComments": 0.529, "helm_classic/RAFT": 0.658 } }, { "id": "ai21/J1-Grande-v2-beta-17B", "name": "J1-Grande v2 beta 17B", "developer": "ai21", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.706, "helm_classic/MMLU": 0.445, "helm_classic/BoolQ": 0.812, "helm_classic/NarrativeQA": 0.725, "helm_classic/NaturalQuestions (open-book)": 0.625, "helm_classic/QuAC": 0.392, "helm_classic/HellaSwag": 0.764, "helm_classic/OpenbookQA": 0.56, "helm_classic/TruthfulQA": 0.306, "helm_classic/MS MARCO (TREC)": 0.46, "helm_classic/CNN/DailyMail": 0.146, "helm_classic/XSUM": 0.152, "helm_classic/IMDB": 0.957, "helm_classic/CivilComments": 0.546, "helm_classic/RAFT": 0.679 } }, { "id": "ai21/J1-Jumbo-v1-178B", "name": "J1-Jumbo v1 178B", "developer": "ai21", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.517, "helm_classic/MMLU": 0.259, "helm_classic/BoolQ": 0.776, "helm_classic/NarrativeQA": 0.695, "helm_classic/NaturalQuestions (open-book)": 0.595, "helm_classic/QuAC": 0.358, "helm_classic/HellaSwag": 0.765, "helm_classic/OpenbookQA": 0.534, "helm_classic/TruthfulQA": 0.175, "helm_classic/MS MARCO (TREC)": 0.363, "helm_classic/CNN/DailyMail": 0.144, "helm_classic/XSUM": 0.129, "helm_classic/IMDB": 0.943, "helm_classic/CivilComments": 0.553, "helm_classic/RAFT": 0.681 } }, { "id": "ai21/J1-Large-v1-7.5B", "name": "J1-Large v1 7.5B", "developer": "ai21", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.285, "helm_classic/MMLU": 0.241, "helm_classic/BoolQ": 0.683, "helm_classic/NarrativeQA": 0.623, "helm_classic/NaturalQuestions (open-book)": 0.532, "helm_classic/QuAC": 0.328, "helm_classic/HellaSwag": 0.7, "helm_classic/OpenbookQA": 0.514, "helm_classic/TruthfulQA": 0.197, "helm_classic/MS MARCO (TREC)": 0.292, "helm_classic/CNN/DailyMail": 0.134, "helm_classic/XSUM": 0.102, "helm_classic/IMDB": 0.956, "helm_classic/CivilComments": 0.532, "helm_classic/RAFT": 0.545 } }, { "id": "ai21/j2-grande", "name": "Jurassic-2 Grande 17B", "developer": "ai21", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.172, "helm_lite/NarrativeQA": 0.744, "helm_lite/NaturalQuestions (closed-book)": 0.35, "helm_lite/OpenbookQA": 0.614, "helm_lite/MMLU": 0.471, "helm_lite/MATH": 0.064, "helm_lite/GSM8K": 0.159, "helm_lite/LegalBench": 0.468, "helm_lite/MedQA": 0.39, "helm_lite/WMT 2014": 0.102 } }, { "id": "ai21/j2-jumbo", "name": "Jurassic-2 Jumbo 178B", "developer": "ai21", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.215, "helm_lite/NarrativeQA": 0.728, "helm_lite/NaturalQuestions (closed-book)": 0.385, "helm_lite/OpenbookQA": 0.688, "helm_lite/MMLU": 0.483, "helm_lite/MATH": 0.103, "helm_lite/GSM8K": 0.239, "helm_lite/LegalBench": 0.533, "helm_lite/MedQA": 0.431, "helm_lite/WMT 2014": 0.114 } }, { "id": "ai21/jamba-1.5-large", "name": "Jamba 1.5 Large", "developer": "ai21", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.637, "helm_lite/NarrativeQA": 0.664, "helm_lite/NaturalQuestions (closed-book)": 0.394, "helm_lite/OpenbookQA": 0.948, "helm_lite/MMLU": 0.683, "helm_lite/MATH": 0.692, "helm_lite/GSM8K": 0.846, "helm_lite/LegalBench": 0.675, "helm_lite/MedQA": 0.698, "helm_lite/WMT 2014": 0.203, "helm_mmlu/MMLU All Subjects": 0.782, "helm_mmlu/Abstract Algebra": 0.53, "helm_mmlu/Anatomy": 0.793, "helm_mmlu/College Physics": 0.51, "helm_mmlu/Computer Security": 0.8, "helm_mmlu/Econometrics": 0.614, "helm_mmlu/Global Facts": 0.54, "helm_mmlu/Jurisprudence": 0.87, "helm_mmlu/Philosophy": 0.849, "helm_mmlu/Professional Psychology": 0.842, "helm_mmlu/Us Foreign Policy": 0.92, "helm_mmlu/Astronomy": 0.882, "helm_mmlu/Business Ethics": 0.77, "helm_mmlu/Clinical Knowledge": 0.849, "helm_mmlu/Conceptual Physics": 0.779, "helm_mmlu/Electrical Engineering": 0.793, "helm_mmlu/Elementary Mathematics": 0.656, "helm_mmlu/Formal Logic": 0.619, "helm_mmlu/High School World History": 0.911, "helm_mmlu/Human Sexuality": 0.832, "helm_mmlu/International Law": 0.884, "helm_mmlu/Logical Fallacies": 0.859, "helm_mmlu/Machine Learning": 0.688, "helm_mmlu/Management": 0.864, "helm_mmlu/Marketing": 0.94, "helm_mmlu/Medical Genetics": 0.89, "helm_mmlu/Miscellaneous": 0.931, "helm_mmlu/Moral Scenarios": 0.686, "helm_mmlu/Nutrition": 0.869, "helm_mmlu/Prehistory": 0.892, "helm_mmlu/Public Relations": 0.755, "helm_mmlu/Security Studies": 0.771, "helm_mmlu/Sociology": 0.93, "helm_mmlu/Virology": 0.554, "helm_mmlu/World Religions": 0.865, "helm_mmlu/Mean win rate": 0.147 } }, { "id": "ai21/jamba-1.5-mini", "name": "Jamba 1.5 Mini", "developer": "ai21", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.414, "helm_lite/NarrativeQA": 0.746, "helm_lite/NaturalQuestions (closed-book)": 0.388, "helm_lite/OpenbookQA": 0.89, "helm_lite/MMLU": 0.582, "helm_lite/MATH": 0.318, "helm_lite/GSM8K": 0.691, "helm_lite/LegalBench": 0.503, "helm_lite/MedQA": 0.632, "helm_lite/WMT 2014": 0.179, "helm_mmlu/MMLU All Subjects": 0.699, "helm_mmlu/Abstract Algebra": 0.33, "helm_mmlu/Anatomy": 0.711, "helm_mmlu/College Physics": 0.48, "helm_mmlu/Computer Security": 0.73, "helm_mmlu/Econometrics": 0.491, "helm_mmlu/Global Facts": 0.43, "helm_mmlu/Jurisprudence": 0.88, "helm_mmlu/Philosophy": 0.752, "helm_mmlu/Professional Psychology": 0.76, "helm_mmlu/Us Foreign Policy": 0.9, "helm_mmlu/Astronomy": 0.822, "helm_mmlu/Business Ethics": 0.76, "helm_mmlu/Clinical Knowledge": 0.74, "helm_mmlu/Conceptual Physics": 0.677, "helm_mmlu/Electrical Engineering": 0.683, "helm_mmlu/Elementary Mathematics": 0.553, "helm_mmlu/Formal Logic": 0.452, "helm_mmlu/High School World History": 0.84, "helm_mmlu/Human Sexuality": 0.809, "helm_mmlu/International Law": 0.893, "helm_mmlu/Logical Fallacies": 0.81, "helm_mmlu/Machine Learning": 0.509, "helm_mmlu/Management": 0.825, "helm_mmlu/Marketing": 0.915, "helm_mmlu/Medical Genetics": 0.69, "helm_mmlu/Miscellaneous": 0.902, "helm_mmlu/Moral Scenarios": 0.269, "helm_mmlu/Nutrition": 0.801, "helm_mmlu/Prehistory": 0.824, "helm_mmlu/Public Relations": 0.727, "helm_mmlu/Security Studies": 0.755, "helm_mmlu/Sociology": 0.876, "helm_mmlu/Virology": 0.578, "helm_mmlu/World Religions": 0.842, "helm_mmlu/Mean win rate": 0.206 } }, { "id": "ai21/jamba-instruct", "name": "Jamba Instruct", "developer": "ai21", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.287, "helm_lite/NarrativeQA": 0.658, "helm_lite/NaturalQuestions (closed-book)": 0.384, "helm_lite/OpenbookQA": 0.796, "helm_lite/MMLU": 0.582, "helm_lite/MATH": 0.38, "helm_lite/GSM8K": 0.67, "helm_lite/LegalBench": 0.54, "helm_lite/MedQA": 0.519, "helm_lite/WMT 2014": 0.164, "helm_mmlu/MMLU All Subjects": 0.659, "helm_mmlu/Abstract Algebra": 0.36, "helm_mmlu/Anatomy": 0.615, "helm_mmlu/College Physics": 0.422, "helm_mmlu/Computer Security": 0.76, "helm_mmlu/Econometrics": 0.439, "helm_mmlu/Global Facts": 0.4, "helm_mmlu/Jurisprudence": 0.796, "helm_mmlu/Philosophy": 0.749, "helm_mmlu/Professional Psychology": 0.716, "helm_mmlu/Us Foreign Policy": 0.91, "helm_mmlu/Astronomy": 0.73, "helm_mmlu/Business Ethics": 0.6, "helm_mmlu/Clinical Knowledge": 0.702, "helm_mmlu/Conceptual Physics": 0.677, "helm_mmlu/Electrical Engineering": 0.621, "helm_mmlu/Elementary Mathematics": 0.497, "helm_mmlu/Formal Logic": 0.444, "helm_mmlu/High School World History": 0.797, "helm_mmlu/Human Sexuality": 0.794, "helm_mmlu/International Law": 0.835, "helm_mmlu/Logical Fallacies": 0.706, "helm_mmlu/Machine Learning": 0.536, "helm_mmlu/Management": 0.786, "helm_mmlu/Marketing": 0.885, "helm_mmlu/Medical Genetics": 0.67, "helm_mmlu/Miscellaneous": 0.865, "helm_mmlu/Moral Scenarios": 0.465, "helm_mmlu/Nutrition": 0.745, "helm_mmlu/Prehistory": 0.796, "helm_mmlu/Public Relations": 0.682, "helm_mmlu/Security Studies": 0.743, "helm_mmlu/Sociology": 0.891, "helm_mmlu/Virology": 0.53, "helm_mmlu/World Religions": 0.813, "helm_mmlu/Mean win rate": 0.887 } }, { "id": "ai21/Jurassic-2-Grande-17B", "name": "Jurassic-2 Grande 17B", "developer": "ai21", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.743, "helm_classic/MMLU": 0.475, "helm_classic/BoolQ": 0.826, "helm_classic/NarrativeQA": 0.737, "helm_classic/NaturalQuestions (open-book)": 0.639, "helm_classic/QuAC": 0.418, "helm_classic/HellaSwag": 0.781, "helm_classic/OpenbookQA": 0.542, "helm_classic/TruthfulQA": 0.348, "helm_classic/MS MARCO (TREC)": 0.514, "helm_classic/CNN/DailyMail": 0.144, "helm_classic/XSUM": 0.167, "helm_classic/IMDB": 0.938, "helm_classic/CivilComments": 0.547, "helm_classic/RAFT": 0.712 } }, { "id": "ai21/Jurassic-2-Jumbo-178B", "name": "Jurassic-2 Jumbo 178B", "developer": "ai21", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.824, "helm_classic/MMLU": 0.48, "helm_classic/BoolQ": 0.829, "helm_classic/NarrativeQA": 0.733, "helm_classic/NaturalQuestions (open-book)": 0.669, "helm_classic/QuAC": 0.435, "helm_classic/HellaSwag": 0.788, "helm_classic/OpenbookQA": 0.558, "helm_classic/TruthfulQA": 0.437, "helm_classic/MS MARCO (TREC)": 0.661, "helm_classic/CNN/DailyMail": 0.149, "helm_classic/XSUM": 0.182, "helm_classic/IMDB": 0.938, "helm_classic/CivilComments": 0.57, "helm_classic/RAFT": 0.746 } }, { "id": "ai21/Jurassic-2-Large-7.5B", "name": "Jurassic-2 Large 7.5B", "developer": "ai21", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.553, "helm_classic/MMLU": 0.339, "helm_classic/BoolQ": 0.742, "helm_classic/NarrativeQA": -1.0, "helm_classic/NaturalQuestions (open-book)": 0.589, "helm_classic/QuAC": -1.0, "helm_classic/HellaSwag": 0.729, "helm_classic/OpenbookQA": 0.53, "helm_classic/TruthfulQA": 0.245, "helm_classic/MS MARCO (TREC)": 0.464, "helm_classic/CNN/DailyMail": 0.136, "helm_classic/XSUM": 0.142, "helm_classic/IMDB": 0.956, "helm_classic/CivilComments": 0.57, "helm_classic/RAFT": 0.622 } }, { "id": "ai21labs/Jamba-v0.1", "name": "Jamba-v0.1", "developer": "ai21labs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2026, "hfopenllm_v2/BBH": 0.3602, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.359, "hfopenllm_v2/MMLU-PRO": 0.2492 } }, { "id": "ai4bharat/Airavata", "name": "Airavata", "developer": "ai4bharat", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0559, "hfopenllm_v2/BBH": 0.3628, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3763, "hfopenllm_v2/MMLU-PRO": 0.1635 } }, { "id": "AI4free/Dhanishtha", "name": "Dhanishtha", "developer": "AI4free", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2451, "hfopenllm_v2/BBH": 0.3404, "hfopenllm_v2/MATH Level 5": 0.256, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3569, "hfopenllm_v2/MMLU-PRO": 0.1643 } }, { "id": "AI4free/t2", "name": "t2", "developer": "AI4free", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3867, "hfopenllm_v2/BBH": 0.291, "hfopenllm_v2/MATH Level 5": 0.1896, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3846, "hfopenllm_v2/MMLU-PRO": 0.1144 } }, { "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0", "name": "Cybernet-Sec-3B-R1-V0", "developer": "AicoresSecurity", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6358, "hfopenllm_v2/BBH": 0.4497, "hfopenllm_v2/MATH Level 5": 0.1156, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3314, "hfopenllm_v2/MMLU-PRO": 0.301 } }, { "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder", "name": "Cybernet-Sec-3B-R1-V0-Coder", "developer": "AicoresSecurity", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7098, "hfopenllm_v2/BBH": 0.4478, "hfopenllm_v2/MATH Level 5": 0.1488, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3408, "hfopenllm_v2/MMLU-PRO": 0.3178 } }, { "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1", "name": "Cybernet-Sec-3B-R1-V1", "developer": "AicoresSecurity", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6146, "hfopenllm_v2/BBH": 0.4282, "hfopenllm_v2/MATH Level 5": 0.1518, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3287, "hfopenllm_v2/MMLU-PRO": 0.2876 } }, { "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1.1", "name": "Cybernet-Sec-3B-R1-V1.1", "developer": "AicoresSecurity", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.673, "hfopenllm_v2/BBH": 0.4392, "hfopenllm_v2/MATH Level 5": 0.176, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3541, "hfopenllm_v2/MMLU-PRO": 0.3088 } }, { "id": "AIDC-AI/Marco-o1", "name": "Marco-o1", "developer": "AIDC-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4771, "hfopenllm_v2/BBH": 0.5364, "hfopenllm_v2/MATH Level 5": 0.3746, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.4138, "hfopenllm_v2/MMLU-PRO": 0.4117 } }, { "id": "aixonlab/Aether-12b", "name": "Aether-12b", "developer": "aixonlab", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2347, "hfopenllm_v2/BBH": 0.5179, "hfopenllm_v2/MATH Level 5": 0.1065, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.3829, "hfopenllm_v2/MMLU-PRO": 0.341 } }, { "id": "aixonlab/Grey-12b", "name": "Grey-12b", "developer": "aixonlab", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3968, "hfopenllm_v2/BBH": 0.5699, "hfopenllm_v2/MATH Level 5": 0.0982, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4516, "hfopenllm_v2/MMLU-PRO": 0.3779 } }, { "id": "aixonlab/Zara-14b-v1.2", "name": "Zara-14b-v1.2", "developer": "aixonlab", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6197, "hfopenllm_v2/BBH": 0.6405, "hfopenllm_v2/MATH Level 5": 0.3535, "hfopenllm_v2/GPQA": 0.3817, "hfopenllm_v2/MUSR": 0.4675, "hfopenllm_v2/MMLU-PRO": 0.5263 } }, { "id": "akhadangi/Llama3.2.1B.0.01-First", "name": "Llama3.2.1B.0.01-First", "developer": "akhadangi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0814, "hfopenllm_v2/BBH": 0.3189, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.3194, "hfopenllm_v2/MMLU-PRO": 0.1197 } }, { "id": "akhadangi/Llama3.2.1B.0.01-Last", "name": "Llama3.2.1B.0.01-Last", "developer": "akhadangi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0917, "hfopenllm_v2/BBH": 0.3159, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2433, "hfopenllm_v2/MUSR": 0.3206, "hfopenllm_v2/MMLU-PRO": 0.1227 } }, { "id": "akhadangi/Llama3.2.1B.0.1-First", "name": "Llama3.2.1B.0.1-First", "developer": "akhadangi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1001, "hfopenllm_v2/BBH": 0.312, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.245, "hfopenllm_v2/MUSR": 0.3301, "hfopenllm_v2/MMLU-PRO": 0.1169 } }, { "id": "akhadangi/Llama3.2.1B.0.1-Last", "name": "Llama3.2.1B.0.1-Last", "developer": "akhadangi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.095, "hfopenllm_v2/BBH": 0.3164, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.2383, "hfopenllm_v2/MUSR": 0.3341, "hfopenllm_v2/MMLU-PRO": 0.1178 } }, { "id": "akhadangi/Llama3.2.1B.BaseFiT", "name": "Llama3.2.1B.BaseFiT", "developer": "akhadangi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0883, "hfopenllm_v2/BBH": 0.3175, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3221, "hfopenllm_v2/MMLU-PRO": 0.1172 } }, { "id": "akjindal53244/Llama-3.1-Storm-8B", "name": "Llama-3.1-Storm-8B", "developer": "akjindal53244", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8051, "hfopenllm_v2/BBH": 0.5189, "hfopenllm_v2/MATH Level 5": 0.1722, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.4028, "hfopenllm_v2/MMLU-PRO": 0.3803 } }, { "id": "alcholjung/llama3_medical_tuned", "name": "llama3_medical_tuned", "developer": "alcholjung", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0106, "hfopenllm_v2/BBH": 0.4513, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.466, "hfopenllm_v2/MMLU-PRO": 0.2946 } }, { "id": "Alepach/notHumpback-M0", "name": "notHumpback-M0", "developer": "Alepach", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.235, "hfopenllm_v2/BBH": 0.2785, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.3552, "hfopenllm_v2/MMLU-PRO": 0.1119 } }, { "id": "Alepach/notHumpback-M1", "name": "notHumpback-M1", "developer": "Alepach", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2207, "hfopenllm_v2/BBH": 0.2882, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2374, "hfopenllm_v2/MUSR": 0.342, "hfopenllm_v2/MMLU-PRO": 0.1091 } }, { "id": "Alepach/notHumpback-M1-v2", "name": "notHumpback-M1-v2", "developer": "Alepach", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2277, "hfopenllm_v2/BBH": 0.2776, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3473, "hfopenllm_v2/MMLU-PRO": 0.1119 } }, { "id": "aleph-alpha/Luminous-Base-13B", "name": "Luminous Base 13B", "developer": "aleph-alpha", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.315, "helm_classic/MMLU": 0.27, "helm_classic/BoolQ": 0.719, "helm_classic/NarrativeQA": 0.605, "helm_classic/NaturalQuestions (open-book)": 0.568, "helm_classic/QuAC": 0.334, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.182, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": 0.11, "helm_classic/XSUM": 0.105, "helm_classic/IMDB": 0.939, "helm_classic/CivilComments": 0.544, "helm_classic/RAFT": 0.473 } }, { "id": "aleph-alpha/Luminous-Extended-30B", "name": "Luminous Extended 30B", "developer": "aleph-alpha", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.485, "helm_classic/MMLU": 0.321, "helm_classic/BoolQ": 0.767, "helm_classic/NarrativeQA": 0.665, "helm_classic/NaturalQuestions (open-book)": 0.609, "helm_classic/QuAC": 0.349, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.221, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": 0.139, "helm_classic/XSUM": 0.124, "helm_classic/IMDB": 0.947, "helm_classic/CivilComments": 0.524, "helm_classic/RAFT": 0.523 } }, { "id": "aleph-alpha/Luminous-Supreme-70B", "name": "Luminous Supreme 70B", "developer": "aleph-alpha", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.662, "helm_classic/MMLU": 0.38, "helm_classic/BoolQ": 0.775, "helm_classic/NarrativeQA": 0.711, "helm_classic/NaturalQuestions (open-book)": 0.649, "helm_classic/QuAC": 0.37, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.222, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": 0.15, "helm_classic/XSUM": 0.136, "helm_classic/IMDB": 0.959, "helm_classic/CivilComments": 0.562, "helm_classic/RAFT": 0.653 } }, { "id": "AlephAlpha/luminous-base", "name": "Luminous Base 13B", "developer": "AlephAlpha", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.041, "helm_lite/NarrativeQA": 0.633, "helm_lite/NaturalQuestions (closed-book)": 0.197, "helm_lite/OpenbookQA": 0.286, "helm_lite/MMLU": 0.243, "helm_lite/MATH": 0.026, "helm_lite/GSM8K": 0.028, "helm_lite/LegalBench": 0.332, "helm_lite/MedQA": 0.26, "helm_lite/WMT 2014": 0.066 } }, { "id": "AlephAlpha/luminous-extended", "name": "Luminous Extended 30B", "developer": "AlephAlpha", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.078, "helm_lite/NarrativeQA": 0.684, "helm_lite/NaturalQuestions (closed-book)": 0.253, "helm_lite/OpenbookQA": 0.272, "helm_lite/MMLU": 0.248, "helm_lite/MATH": 0.04, "helm_lite/GSM8K": 0.075, "helm_lite/LegalBench": 0.421, "helm_lite/MedQA": 0.276, "helm_lite/WMT 2014": 0.083 } }, { "id": "AlephAlpha/luminous-supreme", "name": "Luminous Supreme 70B", "developer": "AlephAlpha", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.145, "helm_lite/NarrativeQA": 0.743, "helm_lite/NaturalQuestions (closed-book)": 0.299, "helm_lite/OpenbookQA": 0.284, "helm_lite/MMLU": 0.316, "helm_lite/MATH": 0.078, "helm_lite/GSM8K": 0.137, "helm_lite/LegalBench": 0.452, "helm_lite/MedQA": 0.276, "helm_lite/WMT 2014": 0.102 } }, { "id": "Alibaba-NLP/gte-Qwen2-7B-instruct", "name": "gte-Qwen2-7B-instruct", "developer": "Alibaba-NLP", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2255, "hfopenllm_v2/BBH": 0.4495, "hfopenllm_v2/MATH Level 5": 0.0642, "hfopenllm_v2/GPQA": 0.245, "hfopenllm_v2/MUSR": 0.3559, "hfopenllm_v2/MMLU-PRO": 0.3321 } }, { "id": "alibaba/qwen-3-coder-480b", "name": "Qwen 3 Coder 480B", "developer": "Alibaba", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 23.9 } }, { "id": "alibaba/qwen3-235b-a22b-instruct-2507", "name": "qwen3-235b-a22b-instruct-2507", "developer": "alibaba", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.8798, "global-mmlu-lite/Culturally Sensitive": 0.8522, "global-mmlu-lite/Culturally Agnostic": 0.9075, "global-mmlu-lite/Arabic": 0.88, "global-mmlu-lite/English": 0.89, "global-mmlu-lite/Bengali": 0.8875, "global-mmlu-lite/German": 0.885, "global-mmlu-lite/French": 0.88, "global-mmlu-lite/Hindi": 0.8775, "global-mmlu-lite/Indonesian": 0.88, "global-mmlu-lite/Italian": 0.88, "global-mmlu-lite/Japanese": 0.88, "global-mmlu-lite/Korean": 0.875, "global-mmlu-lite/Portuguese": 0.8875, "global-mmlu-lite/Spanish": 0.875, "global-mmlu-lite/Swahili": 0.87, "global-mmlu-lite/Yoruba": 0.8725, "global-mmlu-lite/Chinese": 0.8775, "global-mmlu-lite/Burmese": 0.88 } }, { "id": "alibaba/qwen3-235b-a22b-thinking-2507", "name": "qwen3-235b-a22b-thinking-2507", "developer": "Alibaba", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.1267605633802817, "livecodebenchpro/Easy Problems": 0.7605633802816901 } }, { "id": "alibaba/qwen3-30b-a3b", "name": "qwen3-30b-a3b", "developer": "Alibaba", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.028169014084507043, "livecodebenchpro/Easy Problems": 0.5774647887323944 } }, { "id": "alibaba/qwen3-max", "name": "alibaba/qwen3-max", "developer": "Alibaba", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.04225352112676056, "livecodebenchpro/Easy Problems": 0.36619718309859156 } }, { "id": "alibaba/qwen3-next-80b-a3b-thinking", "name": "qwen3-next-80b-a3b-thinking", "developer": "Alibaba", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.14084507042253522, "livecodebenchpro/Easy Problems": 0.7464788732394366 } }, { "id": "aliyun/qwen3-next-80b-a3b-thinking", "name": "qwen3-next-80b-a3b-thinking", "developer": "aliyun", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.0704, "livecodebenchpro/Easy Problems": 0.6901 } }, { "id": "allenai/llama-3-tulu-2-70b-uf-mean-rm", "name": "allenai/llama-3-tulu-2-70b-uf-mean-rm", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7019, "reward-bench/Chat": 0.8631, "reward-bench/Chat Hard": 0.5614, "reward-bench/Safety": 0.6095, "reward-bench/Reasoning": 0.8268, "reward-bench/Prior Sets (0.5 weight)": 0.5957 } }, { "id": "allenai/llama-3-tulu-2-8b-uf-mean-rm", "name": "allenai/llama-3-tulu-2-8b-uf-mean-rm", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7342, "reward-bench/Chat": 0.9525, "reward-bench/Chat Hard": 0.5921, "reward-bench/Safety": 0.6162, "reward-bench/Reasoning": 0.8212, "reward-bench/Prior Sets (0.5 weight)": 0.6434 } }, { "id": "allenai/llama-3-tulu-2-dpo-70b", "name": "allenai/llama-3-tulu-2-dpo-70b", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7496, "reward-bench/Chat": 0.9637, "reward-bench/Chat Hard": 0.5746, "reward-bench/Safety": 0.7486, "reward-bench/Reasoning": 0.802, "reward-bench/Prior Sets (0.5 weight)": 0.5687 } }, { "id": "allenai/llama-3-tulu-2-dpo-8b", "name": "allenai/llama-3-tulu-2-dpo-8b", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7275, "reward-bench/Chat": 0.9525, "reward-bench/Chat Hard": 0.5351, "reward-bench/Safety": 0.6649, "reward-bench/Reasoning": 0.8663, "reward-bench/Prior Sets (0.5 weight)": 0.5097 } }, { "id": "allenai/Llama-3.1-70B-Instruct-RM-RB2", "name": "allenai/Llama-3.1-70B-Instruct-RM-RB2", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9021, "reward-bench/Factuality": 0.8126, "reward-bench/Precise IF": 0.4188, "reward-bench/Math": 0.6995, "reward-bench/Safety": 0.9095, "reward-bench/Focus": 0.8646, "reward-bench/Ties": 0.8835, "reward-bench/Chat": 0.9665, "reward-bench/Chat Hard": 0.8355, "reward-bench/Reasoning": 0.8969, "reward-bench/Prior Sets (0.5 weight)": 0.0 } }, { "id": "allenai/Llama-3.1-8B-Base-RM-RB2", "name": "allenai/Llama-3.1-8B-Base-RM-RB2", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.649, "reward-bench/Chat": 0.933, "reward-bench/Chat Hard": 0.7785, "reward-bench/Safety": 0.8267, "reward-bench/Reasoning": 0.7886, "reward-bench/Prior Sets (0.5 weight)": 0.0, "reward-bench/Factuality": 0.72, "reward-bench/Precise IF": 0.3625, "reward-bench/Math": 0.612, "reward-bench/Focus": 0.8323, "reward-bench/Ties": 0.5406 } }, { "id": "allenai/Llama-3.1-8B-Instruct-RM-RB2", "name": "allenai/Llama-3.1-8B-Instruct-RM-RB2", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8885, "reward-bench/Factuality": 0.7432, "reward-bench/Precise IF": 0.4437, "reward-bench/Math": 0.6175, "reward-bench/Safety": 0.8932, "reward-bench/Focus": 0.9071, "reward-bench/Ties": 0.7638, "reward-bench/Chat": 0.9581, "reward-bench/Chat Hard": 0.8158, "reward-bench/Reasoning": 0.887, "reward-bench/Prior Sets (0.5 weight)": 0.0 } }, { "id": "allenai/Llama-3.1-Tulu-3-70B", "name": "Llama-3.1-Tulu-3-70B", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8379, "hfopenllm_v2/BBH": 0.6157, "hfopenllm_v2/MATH Level 5": 0.3829, "hfopenllm_v2/GPQA": 0.3733, "hfopenllm_v2/MUSR": 0.4988, "hfopenllm_v2/MMLU-PRO": 0.4656 } }, { "id": "allenai/Llama-3.1-Tulu-3-70B-DPO", "name": "Llama-3.1-Tulu-3-70B-DPO", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8282, "hfopenllm_v2/BBH": 0.6146, "hfopenllm_v2/MATH Level 5": 0.4494, "hfopenllm_v2/GPQA": 0.3758, "hfopenllm_v2/MUSR": 0.4923, "hfopenllm_v2/MMLU-PRO": 0.4633 } }, { "id": "allenai/Llama-3.1-Tulu-3-70B-SFT", "name": "Llama-3.1-Tulu-3-70B-SFT", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8051, "hfopenllm_v2/BBH": 0.5951, "hfopenllm_v2/MATH Level 5": 0.3316, "hfopenllm_v2/GPQA": 0.3448, "hfopenllm_v2/MUSR": 0.5026, "hfopenllm_v2/MMLU-PRO": 0.4624 } }, { "id": "allenai/Llama-3.1-Tulu-3-70B-SFT-RM-RB2", "name": "allenai/Llama-3.1-Tulu-3-70B-SFT-RM-RB2", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.722, "reward-bench/Chat": 0.9693, "reward-bench/Chat Hard": 0.8268, "reward-bench/Safety": 0.8689, "reward-bench/Reasoning": 0.8583, "reward-bench/Prior Sets (0.5 weight)": 0.0, "reward-bench/Factuality": 0.8084, "reward-bench/Precise IF": 0.3688, "reward-bench/Math": 0.6776, "reward-bench/Focus": 0.7778, "reward-bench/Ties": 0.8308 } }, { "id": "allenai/Llama-3.1-Tulu-3-8B", "name": "Llama-3.1-Tulu-3-8B", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8267, "hfopenllm_v2/BBH": 0.405, "hfopenllm_v2/MATH Level 5": 0.1964, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4175, "hfopenllm_v2/MMLU-PRO": 0.2827 } }, { "id": "allenai/Llama-3.1-Tulu-3-8B-DPO", "name": "Llama-3.1-Tulu-3-8B-DPO", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8029, "hfopenllm_v2/BBH": 0.4079, "hfopenllm_v2/MATH Level 5": 0.2364, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4161, "hfopenllm_v2/MMLU-PRO": 0.2898 } }, { "id": "allenai/Llama-3.1-Tulu-3-8B-DPO-RM-RB2", "name": "allenai/Llama-3.1-Tulu-3-8B-DPO-RM-RB2", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8431, "reward-bench/Factuality": 0.7516, "reward-bench/Precise IF": 0.3875, "reward-bench/Math": 0.6284, "reward-bench/Safety": 0.8662, "reward-bench/Focus": 0.8545, "reward-bench/Ties": 0.6397, "reward-bench/Chat": 0.9553, "reward-bench/Chat Hard": 0.761, "reward-bench/Reasoning": 0.7898, "reward-bench/Prior Sets (0.5 weight)": 0.0 } }, { "id": "allenai/Llama-3.1-Tulu-3-8B-RL-RM-RB2", "name": "allenai/Llama-3.1-Tulu-3-8B-RL-RM-RB2", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6871, "reward-bench/Chat": 0.9469, "reward-bench/Chat Hard": 0.7588, "reward-bench/Safety": 0.8644, "reward-bench/Reasoning": 0.7715, "reward-bench/Prior Sets (0.5 weight)": 0.0, "reward-bench/Factuality": 0.7642, "reward-bench/Precise IF": 0.4, "reward-bench/Math": 0.6175, "reward-bench/Focus": 0.8485, "reward-bench/Ties": 0.6281 } }, { "id": "allenai/Llama-3.1-Tulu-3-8B-RM", "name": "Llama-3.1-Tulu-3-8B-RM", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.167, "hfopenllm_v2/BBH": 0.295, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3764, "hfopenllm_v2/MMLU-PRO": 0.1082, "reward-bench/Score": 0.59, "reward-bench/Factuality": 0.7453, "reward-bench/Precise IF": 0.3469, "reward-bench/Math": 0.6448, "reward-bench/Safety": 0.7422, "reward-bench/Focus": 0.5364, "reward-bench/Ties": 0.5243 } }, { "id": "allenai/Llama-3.1-Tulu-3-8B-SFT", "name": "Llama-3.1-Tulu-3-8B-SFT", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7403, "hfopenllm_v2/BBH": 0.3872, "hfopenllm_v2/MATH Level 5": 0.1178, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4268, "hfopenllm_v2/MMLU-PRO": 0.2812 } }, { "id": "allenai/Llama-3.1-Tulu-3-8B-SFT-RM-RB2", "name": "allenai/Llama-3.1-Tulu-3-8B-SFT-RM-RB2", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6821, "reward-bench/Chat": 0.9497, "reward-bench/Chat Hard": 0.7917, "reward-bench/Safety": 0.8978, "reward-bench/Reasoning": 0.8005, "reward-bench/Prior Sets (0.5 weight)": 0.0, "reward-bench/Factuality": 0.7326, "reward-bench/Precise IF": 0.3875, "reward-bench/Math": 0.5792, "reward-bench/Focus": 0.8889, "reward-bench/Ties": 0.6063 } }, { "id": "allenai/olmo-1.7-7b", "name": "OLMo 1.7 7B", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "helm_mmlu/MMLU All Subjects": 0.538, "helm_mmlu/Abstract Algebra": 0.33, "helm_mmlu/Anatomy": 0.496, "helm_mmlu/College Physics": 0.333, "helm_mmlu/Computer Security": 0.65, "helm_mmlu/Econometrics": 0.404, "helm_mmlu/Global Facts": 0.34, "helm_mmlu/Jurisprudence": 0.565, "helm_mmlu/Philosophy": 0.592, "helm_mmlu/Professional Psychology": 0.526, "helm_mmlu/Us Foreign Policy": 0.76, "helm_mmlu/Astronomy": 0.526, "helm_mmlu/Business Ethics": 0.59, "helm_mmlu/Clinical Knowledge": 0.57, "helm_mmlu/Conceptual Physics": 0.434, "helm_mmlu/Electrical Engineering": 0.517, "helm_mmlu/Elementary Mathematics": 0.307, "helm_mmlu/Formal Logic": 0.325, "helm_mmlu/High School World History": 0.713, "helm_mmlu/Human Sexuality": 0.595, "helm_mmlu/International Law": 0.612, "helm_mmlu/Logical Fallacies": 0.607, "helm_mmlu/Machine Learning": 0.375, "helm_mmlu/Management": 0.689, "helm_mmlu/Marketing": 0.769, "helm_mmlu/Medical Genetics": 0.56, "helm_mmlu/Miscellaneous": 0.734, "helm_mmlu/Moral Scenarios": 0.335, "helm_mmlu/Nutrition": 0.608, "helm_mmlu/Prehistory": 0.593, "helm_mmlu/Public Relations": 0.6, "helm_mmlu/Security Studies": 0.522, "helm_mmlu/Sociology": 0.751, "helm_mmlu/Virology": 0.452, "helm_mmlu/World Religions": 0.731, "helm_mmlu/Mean win rate": 0.196 } }, { "id": "allenai/OLMo-1.7-7B-hf", "name": "OLMo-1.7-7B-hf", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1569, "hfopenllm_v2/BBH": 0.3014, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.255, "hfopenllm_v2/MUSR": 0.3475, "hfopenllm_v2/MMLU-PRO": 0.1124 } }, { "id": "allenai/OLMo-1B-hf", "name": "OLMo-1B-hf", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2182, "hfopenllm_v2/BBH": 0.3052, "hfopenllm_v2/MATH Level 5": 0.0174, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.4098, "hfopenllm_v2/MMLU-PRO": 0.1174 } }, { "id": "allenai/olmo-2-0325-32b-instruct", "name": "OLMo 2 32B Instruct March 2025", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.475, "helm_capabilities/MMLU-Pro": 0.414, "helm_capabilities/GPQA": 0.287, "helm_capabilities/IFEval": 0.78, "helm_capabilities/WildBench": 0.734, "helm_capabilities/Omni-MATH": 0.161 } }, { "id": "allenai/olmo-2-1124-13b-instruct", "name": "OLMo 2 13B Instruct November 2024", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.44, "helm_capabilities/MMLU-Pro": 0.31, "helm_capabilities/GPQA": 0.316, "helm_capabilities/IFEval": 0.73, "helm_capabilities/WildBench": 0.689, "helm_capabilities/Omni-MATH": 0.156 } }, { "id": "allenai/OLMo-2-1124-7B-Instruct", "name": "OLMo 2 7B Instruct November 2024", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.405, "helm_capabilities/MMLU-Pro": 0.292, "helm_capabilities/GPQA": 0.296, "helm_capabilities/IFEval": 0.693, "helm_capabilities/WildBench": 0.628, "helm_capabilities/Omni-MATH": 0.116, "hfopenllm_v2/IFEval": 0.7244, "hfopenllm_v2/BBH": 0.4022, "hfopenllm_v2/MATH Level 5": 0.1488, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.3508, "hfopenllm_v2/MMLU-PRO": 0.2672 } }, { "id": "allenai/olmo-7b", "name": "OLMo 7B", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.052, "helm_lite/NarrativeQA": 0.597, "helm_lite/NaturalQuestions (closed-book)": 0.259, "helm_lite/OpenbookQA": 0.222, "helm_lite/MMLU": 0.305, "helm_lite/MATH": 0.029, "helm_lite/GSM8K": 0.044, "helm_lite/LegalBench": 0.341, "helm_lite/MedQA": 0.229, "helm_lite/WMT 2014": 0.097, "helm_mmlu/MMLU All Subjects": 0.295, "helm_mmlu/Abstract Algebra": 0.26, "helm_mmlu/Anatomy": 0.222, "helm_mmlu/College Physics": 0.294, "helm_mmlu/Computer Security": 0.3, "helm_mmlu/Econometrics": 0.325, "helm_mmlu/Global Facts": 0.32, "helm_mmlu/Jurisprudence": 0.25, "helm_mmlu/Philosophy": 0.325, "helm_mmlu/Professional Psychology": 0.232, "helm_mmlu/Us Foreign Policy": 0.26, "helm_mmlu/Astronomy": 0.342, "helm_mmlu/Business Ethics": 0.24, "helm_mmlu/Clinical Knowledge": 0.26, "helm_mmlu/Conceptual Physics": 0.319, "helm_mmlu/Electrical Engineering": 0.29, "helm_mmlu/Elementary Mathematics": 0.254, "helm_mmlu/Formal Logic": 0.278, "helm_mmlu/High School World History": 0.253, "helm_mmlu/Human Sexuality": 0.267, "helm_mmlu/International Law": 0.306, "helm_mmlu/Logical Fallacies": 0.264, "helm_mmlu/Machine Learning": 0.286, "helm_mmlu/Management": 0.272, "helm_mmlu/Marketing": 0.269, "helm_mmlu/Medical Genetics": 0.28, "helm_mmlu/Miscellaneous": 0.292, "helm_mmlu/Moral Scenarios": 0.265, "helm_mmlu/Nutrition": 0.34, "helm_mmlu/Prehistory": 0.318, "helm_mmlu/Public Relations": 0.345, "helm_mmlu/Security Studies": 0.408, "helm_mmlu/Sociology": 0.383, "helm_mmlu/Virology": 0.416, "helm_mmlu/World Religions": 0.234, "helm_mmlu/Mean win rate": 0.68 } }, { "id": "allenai/OLMo-7B-hf", "name": "OLMo-7B-hf", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2719, "hfopenllm_v2/BBH": 0.3279, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3487, "hfopenllm_v2/MMLU-PRO": 0.1173 } }, { "id": "allenai/OLMo-7B-Instruct", "name": "allenai/OLMo-7B-Instruct", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6727, "reward-bench/Chat": 0.8966, "reward-bench/Chat Hard": 0.5066, "reward-bench/Safety": 0.6486, "reward-bench/Reasoning": 0.7168, "reward-bench/Prior Sets (0.5 weight)": 0.5173 } }, { "id": "allenai/OLMo-7B-Instruct-hf", "name": "OLMo-7B-Instruct-hf", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3473, "hfopenllm_v2/BBH": 0.3706, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3765, "hfopenllm_v2/MMLU-PRO": 0.1785 } }, { "id": "allenai/OLMoE-1B-7B-0125-Instruct", "name": "OLMoE 1B-7B Instruct January 2025", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.332, "helm_capabilities/MMLU-Pro": 0.169, "helm_capabilities/GPQA": 0.22, "helm_capabilities/IFEval": 0.628, "helm_capabilities/WildBench": 0.551, "helm_capabilities/Omni-MATH": 0.093, "hfopenllm_v2/IFEval": 0.6757, "hfopenllm_v2/BBH": 0.3825, "hfopenllm_v2/MATH Level 5": 0.0899, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3636, "hfopenllm_v2/MMLU-PRO": 0.1915 } }, { "id": "allenai/OLMoE-1B-7B-0924", "name": "OLMoE-1B-7B-0924", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2185, "hfopenllm_v2/BBH": 0.3393, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2475, "hfopenllm_v2/MUSR": 0.3488, "hfopenllm_v2/MMLU-PRO": 0.174 } }, { "id": "allenai/OLMoE-1B-7B-0924-Instruct", "name": "OLMoE-1B-7B-0924-Instruct", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4667, "hfopenllm_v2/BBH": 0.3902, "hfopenllm_v2/MATH Level 5": 0.0279, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3848, "hfopenllm_v2/MMLU-PRO": 0.1876 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1739590997", "name": "allenai/open_instruct_dev-reward_modeling__1__1739590997", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6004, "reward-bench/Factuality": 0.7032, "reward-bench/Precise IF": 0.375, "reward-bench/Math": 0.623, "reward-bench/Safety": 0.7867, "reward-bench/Focus": 0.598, "reward-bench/Ties": 0.5165 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1739871066", "name": "allenai/open_instruct_dev-reward_modeling__1__1739871066", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6012, "reward-bench/Factuality": 0.6989, "reward-bench/Precise IF": 0.425, "reward-bench/Math": 0.6284, "reward-bench/Safety": 0.7978, "reward-bench/Focus": 0.604, "reward-bench/Ties": 0.4527 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1739925892", "name": "allenai/open_instruct_dev-reward_modeling__1__1739925892", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6345, "reward-bench/Factuality": 0.7432, "reward-bench/Precise IF": 0.3563, "reward-bench/Math": 0.623, "reward-bench/Safety": 0.8111, "reward-bench/Focus": 0.7131, "reward-bench/Ties": 0.5606 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1739943850", "name": "allenai/open_instruct_dev-reward_modeling__1__1739943850", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.4978, "reward-bench/Factuality": 0.5726, "reward-bench/Precise IF": 0.3125, "reward-bench/Math": 0.5191, "reward-bench/Safety": 0.6489, "reward-bench/Focus": 0.6222, "reward-bench/Ties": 0.3114 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1739943881", "name": "allenai/open_instruct_dev-reward_modeling__1__1739943881", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5998, "reward-bench/Factuality": 0.7032, "reward-bench/Precise IF": 0.3187, "reward-bench/Math": 0.5792, "reward-bench/Safety": 0.8222, "reward-bench/Focus": 0.6727, "reward-bench/Ties": 0.5025 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1739943972", "name": "allenai/open_instruct_dev-reward_modeling__1__1739943972", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5289, "reward-bench/Factuality": 0.6168, "reward-bench/Precise IF": 0.375, "reward-bench/Math": 0.5738, "reward-bench/Safety": 0.6844, "reward-bench/Focus": 0.5657, "reward-bench/Ties": 0.3577 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1739957701", "name": "allenai/open_instruct_dev-reward_modeling__1__1739957701", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6194, "reward-bench/Factuality": 0.6779, "reward-bench/Precise IF": 0.3563, "reward-bench/Math": 0.6011, "reward-bench/Safety": 0.8022, "reward-bench/Focus": 0.697, "reward-bench/Ties": 0.5822 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1739971507", "name": "allenai/open_instruct_dev-reward_modeling__1__1739971507", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5717, "reward-bench/Factuality": 0.68, "reward-bench/Precise IF": 0.375, "reward-bench/Math": 0.6066, "reward-bench/Safety": 0.7667, "reward-bench/Focus": 0.5475, "reward-bench/Ties": 0.4545 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1739971529", "name": "allenai/open_instruct_dev-reward_modeling__1__1739971529", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5564, "reward-bench/Factuality": 0.6568, "reward-bench/Precise IF": 0.3563, "reward-bench/Math": 0.5956, "reward-bench/Safety": 0.7533, "reward-bench/Focus": 0.5737, "reward-bench/Ties": 0.4027 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1739998765", "name": "allenai/open_instruct_dev-reward_modeling__1__1739998765", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6008, "reward-bench/Factuality": 0.7095, "reward-bench/Precise IF": 0.4125, "reward-bench/Math": 0.6066, "reward-bench/Safety": 0.8022, "reward-bench/Focus": 0.5859, "reward-bench/Ties": 0.4883 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1740005072", "name": "allenai/open_instruct_dev-reward_modeling__1__1740005072", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6097, "reward-bench/Factuality": 0.7137, "reward-bench/Precise IF": 0.3937, "reward-bench/Math": 0.6339, "reward-bench/Safety": 0.7778, "reward-bench/Focus": 0.6343, "reward-bench/Ties": 0.5047 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1740129284", "name": "allenai/open_instruct_dev-reward_modeling__1__1740129284", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6129, "reward-bench/Factuality": 0.7116, "reward-bench/Precise IF": 0.4437, "reward-bench/Math": 0.6448, "reward-bench/Safety": 0.8022, "reward-bench/Focus": 0.6101, "reward-bench/Ties": 0.4652 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1741286813", "name": "allenai/open_instruct_dev-reward_modeling__1__1741286813", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6557, "reward-bench/Factuality": 0.6295, "reward-bench/Precise IF": 0.4188, "reward-bench/Math": 0.612, "reward-bench/Safety": 0.9111, "reward-bench/Focus": 0.8263, "reward-bench/Ties": 0.5365 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1741287363", "name": "allenai/open_instruct_dev-reward_modeling__1__1741287363", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6672, "reward-bench/Factuality": 0.6295, "reward-bench/Precise IF": 0.375, "reward-bench/Math": 0.6066, "reward-bench/Safety": 0.88, "reward-bench/Focus": 0.9374, "reward-bench/Ties": 0.5748 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1741292911", "name": "allenai/open_instruct_dev-reward_modeling__1__1741292911", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6607, "reward-bench/Factuality": 0.6589, "reward-bench/Precise IF": 0.4, "reward-bench/Math": 0.6066, "reward-bench/Safety": 0.9089, "reward-bench/Focus": 0.8869, "reward-bench/Ties": 0.5028 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1742338142", "name": "allenai/open_instruct_dev-reward_modeling__1__1742338142", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6344, "reward-bench/Factuality": 0.7326, "reward-bench/Precise IF": 0.3812, "reward-bench/Math": 0.7049, "reward-bench/Safety": 0.88, "reward-bench/Focus": 0.6323, "reward-bench/Ties": 0.475 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1742519610", "name": "allenai/open_instruct_dev-reward_modeling__1__1742519610", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6361, "reward-bench/Factuality": 0.7074, "reward-bench/Precise IF": 0.3812, "reward-bench/Math": 0.6721, "reward-bench/Safety": 0.82, "reward-bench/Focus": 0.6444, "reward-bench/Ties": 0.5915 } }, { "id": "allenai/open_instruct_dev-reward_modeling__1__1742519628", "name": "allenai/open_instruct_dev-reward_modeling__1__1742519628", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5609, "reward-bench/Factuality": 0.5179, "reward-bench/Precise IF": 0.3563, "reward-bench/Math": 0.623, "reward-bench/Safety": 0.8356, "reward-bench/Focus": 0.5071, "reward-bench/Ties": 0.5254 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_1_100pctflipped__1__1744241455", "name": "allenai/open_instruct_dev-rm_1e-6_1_100pctflipped__1__1744241455", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.0576, "reward-bench/Factuality": 0.04, "reward-bench/Precise IF": 0.1313, "reward-bench/Math": 0.0546, "reward-bench/Safety": 0.0489, "reward-bench/Focus": 0.0808, "reward-bench/Ties": -0.01 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_1_10pctflipped__1__1743295511", "name": "allenai/open_instruct_dev-rm_1e-6_1_10pctflipped__1__1743295511", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5499, "reward-bench/Factuality": 0.6821, "reward-bench/Precise IF": 0.3937, "reward-bench/Math": 0.5956, "reward-bench/Safety": 0.7356, "reward-bench/Focus": 0.5212, "reward-bench/Ties": 0.3711 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_1_20pctflipped__1__1743295406", "name": "allenai/open_instruct_dev-rm_1e-6_1_20pctflipped__1__1743295406", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5054, "reward-bench/Factuality": 0.6358, "reward-bench/Precise IF": 0.3688, "reward-bench/Math": 0.6066, "reward-bench/Safety": 0.6867, "reward-bench/Focus": 0.4424, "reward-bench/Ties": 0.2922 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_1_30pctflipped__1__1743325136", "name": "allenai/open_instruct_dev-rm_1e-6_1_30pctflipped__1__1743325136", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.478, "reward-bench/Factuality": 0.6442, "reward-bench/Precise IF": 0.3563, "reward-bench/Math": 0.612, "reward-bench/Safety": 0.6356, "reward-bench/Focus": 0.2707, "reward-bench/Ties": 0.3496 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_1_50pctflipped__1__1744241398", "name": "allenai/open_instruct_dev-rm_1e-6_1_50pctflipped__1__1744241398", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.219, "reward-bench/Factuality": 0.2484, "reward-bench/Precise IF": 0.2812, "reward-bench/Math": 0.2623, "reward-bench/Safety": 0.3422, "reward-bench/Focus": 0.1717, "reward-bench/Ties": 0.008 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_1_5pctflipped__1__1743444535", "name": "allenai/open_instruct_dev-rm_1e-6_1_5pctflipped__1__1743444535", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5625, "reward-bench/Factuality": 0.6821, "reward-bench/Precise IF": 0.4062, "reward-bench/Math": 0.6011, "reward-bench/Safety": 0.7511, "reward-bench/Focus": 0.5313, "reward-bench/Ties": 0.403 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_1_dpo__1__1743550054", "name": "allenai/open_instruct_dev-rm_1e-6_1_dpo__1__1743550054", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5759, "reward-bench/Factuality": 0.7074, "reward-bench/Precise IF": 0.375, "reward-bench/Math": 0.623, "reward-bench/Safety": 0.7578, "reward-bench/Focus": 0.5333, "reward-bench/Ties": 0.459 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_1_dpo_skyworks__1__1744530271", "name": "allenai/open_instruct_dev-rm_1e-6_1_dpo_skyworks__1__1744530271", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6057, "reward-bench/Factuality": 0.5053, "reward-bench/Precise IF": 0.375, "reward-bench/Math": 0.5902, "reward-bench/Safety": 0.8422, "reward-bench/Focus": 0.7798, "reward-bench/Ties": 0.5419 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_1_dpo_skyworkstulufull__1__1743550181", "name": "allenai/open_instruct_dev-rm_1e-6_1_dpo_skyworkstulufull__1__1743550181", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6535, "reward-bench/Factuality": 0.7137, "reward-bench/Precise IF": 0.3812, "reward-bench/Math": 0.6175, "reward-bench/Safety": 0.8244, "reward-bench/Focus": 0.7737, "reward-bench/Ties": 0.6101 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_1_rl__1__1743551221", "name": "allenai/open_instruct_dev-rm_1e-6_1_rl__1__1743551221", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5799, "reward-bench/Factuality": 0.7116, "reward-bench/Precise IF": 0.3812, "reward-bench/Math": 0.6284, "reward-bench/Safety": 0.76, "reward-bench/Focus": 0.5374, "reward-bench/Ties": 0.461 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_1_rl_skyworks__1__1744530262", "name": "allenai/open_instruct_dev-rm_1e-6_1_rl_skyworks__1__1744530262", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5903, "reward-bench/Factuality": 0.4863, "reward-bench/Precise IF": 0.3625, "reward-bench/Math": 0.5738, "reward-bench/Safety": 0.8489, "reward-bench/Focus": 0.7778, "reward-bench/Ties": 0.4926 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_1_rl_skyworkstulufull__1__1743551523", "name": "allenai/open_instruct_dev-rm_1e-6_1_rl_skyworkstulufull__1__1743551523", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6483, "reward-bench/Factuality": 0.7074, "reward-bench/Precise IF": 0.3625, "reward-bench/Math": 0.6175, "reward-bench/Safety": 0.8222, "reward-bench/Focus": 0.7758, "reward-bench/Ties": 0.6044 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_1_skyworkstulumix__1__1743205750", "name": "allenai/open_instruct_dev-rm_1e-6_1_skyworkstulumix__1__1743205750", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5157, "reward-bench/Factuality": 0.6084, "reward-bench/Precise IF": 0.3688, "reward-bench/Math": 0.6066, "reward-bench/Safety": 0.7089, "reward-bench/Focus": 0.4222, "reward-bench/Ties": 0.3791 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_2_10pctflipped__1__1743295427", "name": "allenai/open_instruct_dev-rm_1e-6_2_10pctflipped__1__1743295427", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6009, "reward-bench/Factuality": 0.7263, "reward-bench/Precise IF": 0.375, "reward-bench/Math": 0.5902, "reward-bench/Safety": 0.7933, "reward-bench/Focus": 0.7273, "reward-bench/Ties": 0.3931 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_2_20pctflipped__1__1743295446", "name": "allenai/open_instruct_dev-rm_1e-6_2_20pctflipped__1__1743295446", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5716, "reward-bench/Factuality": 0.6779, "reward-bench/Precise IF": 0.3937, "reward-bench/Math": 0.5464, "reward-bench/Safety": 0.7533, "reward-bench/Focus": 0.7051, "reward-bench/Ties": 0.3534 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_2_30pctflipped__1__1743325094", "name": "allenai/open_instruct_dev-rm_1e-6_2_30pctflipped__1__1743325094", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5151, "reward-bench/Factuality": 0.6484, "reward-bench/Precise IF": 0.3312, "reward-bench/Math": 0.5574, "reward-bench/Safety": 0.7289, "reward-bench/Focus": 0.4889, "reward-bench/Ties": 0.3357 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_2_5pctflipped__1__1743444636", "name": "allenai/open_instruct_dev-rm_1e-6_2_5pctflipped__1__1743444636", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6119, "reward-bench/Factuality": 0.72, "reward-bench/Precise IF": 0.4062, "reward-bench/Math": 0.6284, "reward-bench/Safety": 0.8067, "reward-bench/Focus": 0.6889, "reward-bench/Ties": 0.421 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_2_dpo__1__1743549325", "name": "allenai/open_instruct_dev-rm_1e-6_2_dpo__1__1743549325", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6008, "reward-bench/Factuality": 0.7179, "reward-bench/Precise IF": 0.35, "reward-bench/Math": 0.5956, "reward-bench/Safety": 0.8, "reward-bench/Focus": 0.6707, "reward-bench/Ties": 0.4707 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_2_rl__1__1743551238", "name": "allenai/open_instruct_dev-rm_1e-6_2_rl__1__1743551238", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5965, "reward-bench/Factuality": 0.7095, "reward-bench/Precise IF": 0.3438, "reward-bench/Math": 0.612, "reward-bench/Safety": 0.8044, "reward-bench/Focus": 0.6566, "reward-bench/Ties": 0.453 } }, { "id": "allenai/open_instruct_dev-rm_1e-6_2_skyworkstulumix__1__1743205906", "name": "allenai/open_instruct_dev-rm_1e-6_2_skyworkstulumix__1__1743205906", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5574, "reward-bench/Factuality": 0.6526, "reward-bench/Precise IF": 0.3937, "reward-bench/Math": 0.6011, "reward-bench/Safety": 0.7711, "reward-bench/Focus": 0.5051, "reward-bench/Ties": 0.4208 } }, { "id": "allenai/open_instruct_dev-rm_2e-5_1_100pctflipped__1__1744241529", "name": "allenai/open_instruct_dev-rm_2e-5_1_100pctflipped__1__1744241529", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.0719, "reward-bench/Factuality": 0.0421, "reward-bench/Precise IF": 0.2062, "reward-bench/Math": 0.0601, "reward-bench/Safety": 0.0378, "reward-bench/Focus": 0.0949, "reward-bench/Ties": -0.01 } }, { "id": "allenai/open_instruct_dev-rm_2e-5_1_10pctflipped__1__1743295305", "name": "allenai/open_instruct_dev-rm_2e-5_1_10pctflipped__1__1743295305", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.553, "reward-bench/Factuality": 0.6674, "reward-bench/Precise IF": 0.3563, "reward-bench/Math": 0.6284, "reward-bench/Safety": 0.6733, "reward-bench/Focus": 0.5697, "reward-bench/Ties": 0.4227 } }, { "id": "allenai/open_instruct_dev-rm_2e-5_1_20pctflipped__1__1743324778", "name": "allenai/open_instruct_dev-rm_2e-5_1_20pctflipped__1__1743324778", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.4955, "reward-bench/Factuality": 0.6189, "reward-bench/Precise IF": 0.325, "reward-bench/Math": 0.5792, "reward-bench/Safety": 0.6378, "reward-bench/Focus": 0.5657, "reward-bench/Ties": 0.2466 } }, { "id": "allenai/open_instruct_dev-rm_2e-5_1_30pctflipped__1__1743326459", "name": "allenai/open_instruct_dev-rm_2e-5_1_30pctflipped__1__1743326459", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.4198, "reward-bench/Factuality": 0.5747, "reward-bench/Precise IF": 0.3375, "reward-bench/Math": 0.5464, "reward-bench/Safety": 0.4933, "reward-bench/Focus": 0.3596, "reward-bench/Ties": 0.2073 } }, { "id": "allenai/open_instruct_dev-rm_2e-5_1_5pctflipped__1__1743443747", "name": "allenai/open_instruct_dev-rm_2e-5_1_5pctflipped__1__1743443747", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5465, "reward-bench/Factuality": 0.6821, "reward-bench/Precise IF": 0.375, "reward-bench/Math": 0.612, "reward-bench/Safety": 0.7333, "reward-bench/Focus": 0.5051, "reward-bench/Ties": 0.3713 } }, { "id": "allenai/open_instruct_dev-rm_2e-5_1_skyworkstulumix__1__1743205935", "name": "allenai/open_instruct_dev-rm_2e-5_1_skyworkstulumix__1__1743205935", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5197, "reward-bench/Factuality": 0.6126, "reward-bench/Precise IF": 0.3375, "reward-bench/Math": 0.5847, "reward-bench/Safety": 0.7333, "reward-bench/Focus": 0.4646, "reward-bench/Ties": 0.3855 } }, { "id": "allenai/open_instruct_dev-rm_2e-5_2_10pctflipped__1__1743295360", "name": "allenai/open_instruct_dev-rm_2e-5_2_10pctflipped__1__1743295360", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.4555, "reward-bench/Factuality": 0.5495, "reward-bench/Precise IF": 0.3063, "reward-bench/Math": 0.4262, "reward-bench/Safety": 0.5711, "reward-bench/Focus": 0.6101, "reward-bench/Ties": 0.2696 } }, { "id": "allenai/open_instruct_dev-rm_2e-5_2_20pctflipped__1__1743295366", "name": "allenai/open_instruct_dev-rm_2e-5_2_20pctflipped__1__1743295366", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.4422, "reward-bench/Factuality": 0.5053, "reward-bench/Precise IF": 0.3375, "reward-bench/Math": 0.4044, "reward-bench/Safety": 0.5422, "reward-bench/Focus": 0.6646, "reward-bench/Ties": 0.1991 } }, { "id": "allenai/open_instruct_dev-rm_2e-5_2_30pctflipped__1__1743326352", "name": "allenai/open_instruct_dev-rm_2e-5_2_30pctflipped__1__1743326352", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.341, "reward-bench/Factuality": 0.4674, "reward-bench/Precise IF": 0.2875, "reward-bench/Math": 0.3333, "reward-bench/Safety": 0.3711, "reward-bench/Focus": 0.3919, "reward-bench/Ties": 0.195 } }, { "id": "allenai/open_instruct_dev-rm_2e-5_2_5pctflipped__1__1743444634", "name": "allenai/open_instruct_dev-rm_2e-5_2_5pctflipped__1__1743444634", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.4698, "reward-bench/Factuality": 0.5853, "reward-bench/Precise IF": 0.2562, "reward-bench/Math": 0.5027, "reward-bench/Safety": 0.6489, "reward-bench/Focus": 0.5697, "reward-bench/Ties": 0.2562 } }, { "id": "allenai/open_instruct_dev-rm_2e-5_2_skyworkstulumix__1__1743205988", "name": "allenai/open_instruct_dev-rm_2e-5_2_skyworkstulumix__1__1743205988", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.4791, "reward-bench/Factuality": 0.6421, "reward-bench/Precise IF": 0.3125, "reward-bench/Math": 0.541, "reward-bench/Safety": 0.6911, "reward-bench/Focus": 0.4182, "reward-bench/Ties": 0.27 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_100pctflipped__1__1744242103", "name": "allenai/open_instruct_dev-rm_3e-6_1_100pctflipped__1__1744242103", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.0607, "reward-bench/Factuality": 0.0274, "reward-bench/Precise IF": 0.1625, "reward-bench/Math": 0.0656, "reward-bench/Safety": 0.04, "reward-bench/Focus": 0.0788, "reward-bench/Ties": -0.01 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_10pctflipped__1__1743324835", "name": "allenai/open_instruct_dev-rm_3e-6_1_10pctflipped__1__1743324835", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6089, "reward-bench/Factuality": 0.7284, "reward-bench/Precise IF": 0.4375, "reward-bench/Math": 0.612, "reward-bench/Safety": 0.7622, "reward-bench/Focus": 0.6444, "reward-bench/Ties": 0.4686 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_1pctflipped__1__1743445221", "name": "allenai/open_instruct_dev-rm_3e-6_1_1pctflipped__1__1743445221", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6032, "reward-bench/Factuality": 0.7158, "reward-bench/Precise IF": 0.4062, "reward-bench/Math": 0.6284, "reward-bench/Safety": 0.7778, "reward-bench/Focus": 0.5859, "reward-bench/Ties": 0.5051 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_20pctflipped__1__1743324826", "name": "allenai/open_instruct_dev-rm_3e-6_1_20pctflipped__1__1743324826", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5831, "reward-bench/Factuality": 0.6947, "reward-bench/Precise IF": 0.4188, "reward-bench/Math": 0.623, "reward-bench/Safety": 0.74, "reward-bench/Focus": 0.5758, "reward-bench/Ties": 0.4465 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_30pctflipped__1__1743326363", "name": "allenai/open_instruct_dev-rm_3e-6_1_30pctflipped__1__1743326363", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5268, "reward-bench/Factuality": 0.68, "reward-bench/Precise IF": 0.3688, "reward-bench/Math": 0.5792, "reward-bench/Safety": 0.7178, "reward-bench/Focus": 0.4343, "reward-bench/Ties": 0.3809 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_5pctflipped__1__1743444498", "name": "allenai/open_instruct_dev-rm_3e-6_1_5pctflipped__1__1743444498", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6093, "reward-bench/Factuality": 0.7326, "reward-bench/Precise IF": 0.4313, "reward-bench/Math": 0.6339, "reward-bench/Safety": 0.7578, "reward-bench/Focus": 0.5859, "reward-bench/Ties": 0.5143 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1__2__1743897475", "name": "allenai/open_instruct_dev-rm_3e-6_1__2__1743897475", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6122, "reward-bench/Factuality": 0.7368, "reward-bench/Precise IF": 0.4, "reward-bench/Math": 0.623, "reward-bench/Safety": 0.8044, "reward-bench/Focus": 0.602, "reward-bench/Ties": 0.5071 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1__3__1744311421", "name": "allenai/open_instruct_dev-rm_3e-6_1__3__1744311421", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5995, "reward-bench/Factuality": 0.7179, "reward-bench/Precise IF": 0.3375, "reward-bench/Math": 0.6066, "reward-bench/Safety": 0.8, "reward-bench/Focus": 0.6323, "reward-bench/Ties": 0.503 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_dpo__1__1743549903", "name": "allenai/open_instruct_dev-rm_3e-6_1_dpo__1__1743549903", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6154, "reward-bench/Factuality": 0.7326, "reward-bench/Precise IF": 0.4375, "reward-bench/Math": 0.6339, "reward-bench/Safety": 0.7778, "reward-bench/Focus": 0.6061, "reward-bench/Ties": 0.5043 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_dpo_skyworks__1__1744530368", "name": "allenai/open_instruct_dev-rm_3e-6_1_dpo_skyworks__1__1744530368", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6604, "reward-bench/Factuality": 0.6316, "reward-bench/Precise IF": 0.3937, "reward-bench/Math": 0.5792, "reward-bench/Safety": 0.9044, "reward-bench/Focus": 0.8929, "reward-bench/Ties": 0.5604 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_dpo_skyworkstulufull__1__1743550182", "name": "allenai/open_instruct_dev-rm_3e-6_1_dpo_skyworkstulufull__1__1743550182", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6783, "reward-bench/Factuality": 0.7705, "reward-bench/Precise IF": 0.4, "reward-bench/Math": 0.6066, "reward-bench/Safety": 0.84, "reward-bench/Focus": 0.8101, "reward-bench/Ties": 0.6427 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_no_if__2__1744316012", "name": "allenai/open_instruct_dev-rm_3e-6_1_no_if__2__1744316012", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5911, "reward-bench/Factuality": 0.7347, "reward-bench/Precise IF": 0.4, "reward-bench/Math": 0.6284, "reward-bench/Safety": 0.74, "reward-bench/Focus": 0.604, "reward-bench/Ties": 0.4392 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_no_if__3__1744315765", "name": "allenai/open_instruct_dev-rm_3e-6_1_no_if__3__1744315765", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5926, "reward-bench/Factuality": 0.7263, "reward-bench/Precise IF": 0.3563, "reward-bench/Math": 0.623, "reward-bench/Safety": 0.7889, "reward-bench/Focus": 0.5879, "reward-bench/Ties": 0.4733 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_rl__1__1743551527", "name": "allenai/open_instruct_dev-rm_3e-6_1_rl__1__1743551527", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6126, "reward-bench/Factuality": 0.7411, "reward-bench/Precise IF": 0.425, "reward-bench/Math": 0.623, "reward-bench/Safety": 0.7822, "reward-bench/Focus": 0.5939, "reward-bench/Ties": 0.5104 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_rl_skyworks__1__1744530236", "name": "allenai/open_instruct_dev-rm_3e-6_1_rl_skyworks__1__1744530236", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6525, "reward-bench/Factuality": 0.6021, "reward-bench/Precise IF": 0.3875, "reward-bench/Math": 0.5792, "reward-bench/Safety": 0.8933, "reward-bench/Focus": 0.8626, "reward-bench/Ties": 0.59 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_rl_skyworkstulufull__1__1743551530", "name": "allenai/open_instruct_dev-rm_3e-6_1_rl_skyworkstulufull__1__1743551530", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6849, "reward-bench/Factuality": 0.7453, "reward-bench/Precise IF": 0.3812, "reward-bench/Math": 0.612, "reward-bench/Safety": 0.8422, "reward-bench/Focus": 0.8404, "reward-bench/Ties": 0.6885 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_skyworkstulu75__1__1743534417", "name": "allenai/open_instruct_dev-rm_3e-6_1_skyworkstulu75__1__1743534417", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.586, "reward-bench/Factuality": 0.6632, "reward-bench/Precise IF": 0.425, "reward-bench/Math": 0.6557, "reward-bench/Safety": 0.7778, "reward-bench/Focus": 0.5172, "reward-bench/Ties": 0.477 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_skyworkstulufull__1__1743446486", "name": "allenai/open_instruct_dev-rm_3e-6_1_skyworkstulufull__1__1743446486", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6773, "reward-bench/Factuality": 0.7432, "reward-bench/Precise IF": 0.4, "reward-bench/Math": 0.612, "reward-bench/Safety": 0.8422, "reward-bench/Focus": 0.804, "reward-bench/Ties": 0.6626 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_skyworkstulufull__2__1744314745", "name": "allenai/open_instruct_dev-rm_3e-6_1_skyworkstulufull__2__1744314745", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6793, "reward-bench/Factuality": 0.7558, "reward-bench/Precise IF": 0.4062, "reward-bench/Math": 0.6284, "reward-bench/Safety": 0.8311, "reward-bench/Focus": 0.8061, "reward-bench/Ties": 0.6485 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_skyworkstulufull__3__1744311661", "name": "allenai/open_instruct_dev-rm_3e-6_1_skyworkstulufull__3__1744311661", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6611, "reward-bench/Factuality": 0.72, "reward-bench/Precise IF": 0.3563, "reward-bench/Math": 0.6393, "reward-bench/Safety": 0.8444, "reward-bench/Focus": 0.7636, "reward-bench/Ties": 0.6428 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_1_skyworkstulumix__1__1743204472", "name": "allenai/open_instruct_dev-rm_3e-6_1_skyworkstulumix__1__1743204472", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5778, "reward-bench/Factuality": 0.6674, "reward-bench/Precise IF": 0.3875, "reward-bench/Math": 0.6011, "reward-bench/Safety": 0.7933, "reward-bench/Focus": 0.5172, "reward-bench/Ties": 0.5003 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_2_10pctflipped__1__1743295267", "name": "allenai/open_instruct_dev-rm_3e-6_2_10pctflipped__1__1743295267", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5746, "reward-bench/Factuality": 0.6505, "reward-bench/Precise IF": 0.35, "reward-bench/Math": 0.5082, "reward-bench/Safety": 0.7844, "reward-bench/Focus": 0.7414, "reward-bench/Ties": 0.4128 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_2_1pctflipped__1__1743445759", "name": "allenai/open_instruct_dev-rm_3e-6_2_1pctflipped__1__1743445759", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6065, "reward-bench/Factuality": 0.7116, "reward-bench/Precise IF": 0.35, "reward-bench/Math": 0.5792, "reward-bench/Safety": 0.8178, "reward-bench/Focus": 0.7152, "reward-bench/Ties": 0.465 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_2_20pctflipped__1__1743324905", "name": "allenai/open_instruct_dev-rm_3e-6_2_20pctflipped__1__1743324905", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5305, "reward-bench/Factuality": 0.5832, "reward-bench/Precise IF": 0.3312, "reward-bench/Math": 0.459, "reward-bench/Safety": 0.7178, "reward-bench/Focus": 0.7071, "reward-bench/Ties": 0.3849 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_2_30pctflipped__1__1743326363", "name": "allenai/open_instruct_dev-rm_3e-6_2_30pctflipped__1__1743326363", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.4436, "reward-bench/Factuality": 0.5411, "reward-bench/Precise IF": 0.3312, "reward-bench/Math": 0.3115, "reward-bench/Safety": 0.6267, "reward-bench/Focus": 0.5414, "reward-bench/Ties": 0.31 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_2_5pctflipped__1__1743444505", "name": "allenai/open_instruct_dev-rm_3e-6_2_5pctflipped__1__1743444505", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5925, "reward-bench/Factuality": 0.68, "reward-bench/Precise IF": 0.3688, "reward-bench/Math": 0.5519, "reward-bench/Safety": 0.78, "reward-bench/Focus": 0.7434, "reward-bench/Ties": 0.431 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_2_dpo__1__1743550180", "name": "allenai/open_instruct_dev-rm_3e-6_2_dpo__1__1743550180", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6198, "reward-bench/Factuality": 0.7263, "reward-bench/Precise IF": 0.3312, "reward-bench/Math": 0.6339, "reward-bench/Safety": 0.8133, "reward-bench/Focus": 0.7232, "reward-bench/Ties": 0.4908 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_2_dpo_skyworkstulufull__1__1743550187", "name": "allenai/open_instruct_dev-rm_3e-6_2_dpo_skyworkstulufull__1__1743550187", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6763, "reward-bench/Factuality": 0.7411, "reward-bench/Precise IF": 0.375, "reward-bench/Math": 0.612, "reward-bench/Safety": 0.8844, "reward-bench/Focus": 0.8545, "reward-bench/Ties": 0.5908 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_2_rl__1__1743551509", "name": "allenai/open_instruct_dev-rm_3e-6_2_rl__1__1743551509", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6245, "reward-bench/Factuality": 0.7242, "reward-bench/Precise IF": 0.35, "reward-bench/Math": 0.6175, "reward-bench/Safety": 0.8178, "reward-bench/Focus": 0.7253, "reward-bench/Ties": 0.5124 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_2_rl_skyworkstulufull__1__1743551498", "name": "allenai/open_instruct_dev-rm_3e-6_2_rl_skyworkstulufull__1__1743551498", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6673, "reward-bench/Factuality": 0.7326, "reward-bench/Precise IF": 0.3438, "reward-bench/Math": 0.6175, "reward-bench/Safety": 0.8622, "reward-bench/Focus": 0.8566, "reward-bench/Ties": 0.5911 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_2_skyworkstulu75__1__1743548926", "name": "allenai/open_instruct_dev-rm_3e-6_2_skyworkstulu75__1__1743548926", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5863, "reward-bench/Factuality": 0.6674, "reward-bench/Precise IF": 0.3937, "reward-bench/Math": 0.6284, "reward-bench/Safety": 0.8, "reward-bench/Focus": 0.5515, "reward-bench/Ties": 0.4768 } }, { "id": "allenai/open_instruct_dev-rm_3e-6_2_skyworkstulumix__1__1743205661", "name": "allenai/open_instruct_dev-rm_3e-6_2_skyworkstulumix__1__1743205661", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.589, "reward-bench/Factuality": 0.6842, "reward-bench/Precise IF": 0.3688, "reward-bench/Math": 0.6393, "reward-bench/Safety": 0.7867, "reward-bench/Focus": 0.6081, "reward-bench/Ties": 0.447 } }, { "id": "allenai/open_instruct_dev-rm_llama70b_skyworkstulufull__4__1747266598", "name": "allenai/open_instruct_dev-rm_llama70b_skyworkstulufull__4__1747266598", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7306, "reward-bench/Factuality": 0.7474, "reward-bench/Precise IF": 0.375, "reward-bench/Math": 0.694, "reward-bench/Safety": 0.8622, "reward-bench/Focus": 0.8061, "reward-bench/Ties": 0.8992 } }, { "id": "allenai/open_instruct_dev-rm_llama70b_skyworkstulufull__8__1745387923", "name": "allenai/open_instruct_dev-rm_llama70b_skyworkstulufull__8__1745387923", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7573, "reward-bench/Factuality": 0.8168, "reward-bench/Precise IF": 0.4125, "reward-bench/Math": 0.7049, "reward-bench/Safety": 0.8733, "reward-bench/Focus": 0.8545, "reward-bench/Ties": 0.8814 } }, { "id": "allenai/open_instruct_dev-rm_llama_1e-6_1__1__1743896628", "name": "allenai/open_instruct_dev-rm_llama_1e-6_1__1__1743896628", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6637, "reward-bench/Factuality": 0.6947, "reward-bench/Precise IF": 0.4062, "reward-bench/Math": 0.6284, "reward-bench/Safety": 0.8422, "reward-bench/Focus": 0.7273, "reward-bench/Ties": 0.6834 } }, { "id": "allenai/open_instruct_dev-rm_llama_1e-6_1_skyworks__1__1744062999", "name": "allenai/open_instruct_dev-rm_llama_1e-6_1_skyworks__1__1744062999", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6665, "reward-bench/Factuality": 0.5979, "reward-bench/Precise IF": 0.3688, "reward-bench/Math": 0.6339, "reward-bench/Safety": 0.8956, "reward-bench/Focus": 0.8606, "reward-bench/Ties": 0.6422 } }, { "id": "allenai/open_instruct_dev-rm_llama_1e-6_1_skyworkstulufull__1__1743712777", "name": "allenai/open_instruct_dev-rm_llama_1e-6_1_skyworkstulufull__1__1743712777", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7038, "reward-bench/Factuality": 0.6947, "reward-bench/Precise IF": 0.3937, "reward-bench/Math": 0.6557, "reward-bench/Safety": 0.8867, "reward-bench/Focus": 0.8586, "reward-bench/Ties": 0.7331 } }, { "id": "allenai/open_instruct_dev-rm_llama_1e-6_2__1__1743896638", "name": "allenai/open_instruct_dev-rm_llama_1e-6_2__1__1743896638", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6754, "reward-bench/Factuality": 0.6716, "reward-bench/Precise IF": 0.4, "reward-bench/Math": 0.6339, "reward-bench/Safety": 0.8756, "reward-bench/Focus": 0.7737, "reward-bench/Ties": 0.6976 } }, { "id": "allenai/open_instruct_dev-rm_llama_1e-6_2_skyworkstulufull__1__1743800938", "name": "allenai/open_instruct_dev-rm_llama_1e-6_2_skyworkstulufull__1__1743800938", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7241, "reward-bench/Factuality": 0.7305, "reward-bench/Precise IF": 0.4, "reward-bench/Math": 0.6667, "reward-bench/Safety": 0.9422, "reward-bench/Focus": 0.9414, "reward-bench/Ties": 0.6635 } }, { "id": "allenai/open_instruct_dev-rm_llama_2e-5_1_skyworkstulufull__1__1743712885", "name": "allenai/open_instruct_dev-rm_llama_2e-5_1_skyworkstulufull__1__1743712885", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6716, "reward-bench/Factuality": 0.6632, "reward-bench/Precise IF": 0.3688, "reward-bench/Math": 0.6284, "reward-bench/Safety": 0.82, "reward-bench/Focus": 0.8303, "reward-bench/Ties": 0.719 } }, { "id": "allenai/open_instruct_dev-rm_llama_2e-5_2_skyworkstulufull__1__1743800773", "name": "allenai/open_instruct_dev-rm_llama_2e-5_2_skyworkstulufull__1__1743800773", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6207, "reward-bench/Factuality": 0.6358, "reward-bench/Precise IF": 0.375, "reward-bench/Math": 0.5902, "reward-bench/Safety": 0.8267, "reward-bench/Focus": 0.802, "reward-bench/Ties": 0.4948 } }, { "id": "allenai/open_instruct_dev-rm_llama_2e-6_1_skyworkstulufull__1__1743893867", "name": "allenai/open_instruct_dev-rm_llama_2e-6_1_skyworkstulufull__1__1743893867", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.719, "reward-bench/Factuality": 0.7263, "reward-bench/Precise IF": 0.3875, "reward-bench/Math": 0.6393, "reward-bench/Safety": 0.8956, "reward-bench/Focus": 0.9273, "reward-bench/Ties": 0.738 } }, { "id": "allenai/open_instruct_dev-rm_llama_3e-6_1__1__1743929424", "name": "allenai/open_instruct_dev-rm_llama_3e-6_1__1__1743929424", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6572, "reward-bench/Factuality": 0.7305, "reward-bench/Precise IF": 0.3688, "reward-bench/Math": 0.6284, "reward-bench/Safety": 0.8289, "reward-bench/Focus": 0.703, "reward-bench/Ties": 0.6837 } }, { "id": "allenai/open_instruct_dev-rm_llama_3e-6_1__2__1744311395", "name": "allenai/open_instruct_dev-rm_llama_3e-6_1__2__1744311395", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6938, "reward-bench/Factuality": 0.7537, "reward-bench/Precise IF": 0.45, "reward-bench/Math": 0.6393, "reward-bench/Safety": 0.8667, "reward-bench/Focus": 0.7616, "reward-bench/Ties": 0.6913 } }, { "id": "allenai/open_instruct_dev-rm_llama_3e-6_1__3__1744311491", "name": "allenai/open_instruct_dev-rm_llama_3e-6_1__3__1744311491", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6754, "reward-bench/Factuality": 0.7242, "reward-bench/Precise IF": 0.4062, "reward-bench/Math": 0.6284, "reward-bench/Safety": 0.8422, "reward-bench/Focus": 0.7535, "reward-bench/Ties": 0.6976 } }, { "id": "allenai/open_instruct_dev-rm_llama_3e-6_1_skyworks__1__1744062787", "name": "allenai/open_instruct_dev-rm_llama_3e-6_1_skyworks__1__1744062787", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7045, "reward-bench/Factuality": 0.6253, "reward-bench/Precise IF": 0.3812, "reward-bench/Math": 0.6667, "reward-bench/Safety": 0.92, "reward-bench/Focus": 0.9232, "reward-bench/Ties": 0.7109 } }, { "id": "allenai/open_instruct_dev-rm_llama_3e-6_1_skyworkstulufull__2__1744311461", "name": "allenai/open_instruct_dev-rm_llama_3e-6_1_skyworkstulufull__2__1744311461", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7189, "reward-bench/Factuality": 0.7305, "reward-bench/Precise IF": 0.3937, "reward-bench/Math": 0.6066, "reward-bench/Safety": 0.8978, "reward-bench/Focus": 0.9374, "reward-bench/Ties": 0.7475 } }, { "id": "allenai/open_instruct_dev-rm_llama_3e-6_1_skyworkstulufull__3__1744311780", "name": "allenai/open_instruct_dev-rm_llama_3e-6_1_skyworkstulufull__3__1744311780", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7172, "reward-bench/Factuality": 0.7242, "reward-bench/Precise IF": 0.4313, "reward-bench/Math": 0.6175, "reward-bench/Safety": 0.8778, "reward-bench/Focus": 0.897, "reward-bench/Ties": 0.7555 } }, { "id": "allenai/open_instruct_dev-rm_llama_3e-6_2__1__1743896489", "name": "allenai/open_instruct_dev-rm_llama_3e-6_2__1__1743896489", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6813, "reward-bench/Factuality": 0.7137, "reward-bench/Precise IF": 0.4437, "reward-bench/Math": 0.6284, "reward-bench/Safety": 0.8644, "reward-bench/Focus": 0.7596, "reward-bench/Ties": 0.6781 } }, { "id": "allenai/open_instruct_dev-rm_llama_3e-6_2_skyworkstulufull__1__1743800713", "name": "allenai/open_instruct_dev-rm_llama_3e-6_2_skyworkstulufull__1__1743800713", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7209, "reward-bench/Factuality": 0.7116, "reward-bench/Precise IF": 0.3875, "reward-bench/Math": 0.6612, "reward-bench/Safety": 0.9067, "reward-bench/Focus": 0.9172, "reward-bench/Ties": 0.7414 } }, { "id": "allenai/open_instruct_dev-rm_llama_4e-6_1_skyworkstulufull__1__1743893911", "name": "allenai/open_instruct_dev-rm_llama_4e-6_1_skyworkstulufull__1__1743893911", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7266, "reward-bench/Factuality": 0.7347, "reward-bench/Precise IF": 0.4313, "reward-bench/Math": 0.6339, "reward-bench/Safety": 0.8933, "reward-bench/Focus": 0.897, "reward-bench/Ties": 0.7697 } }, { "id": "allenai/open_instruct_dev-rm_llamabase_1e-6_1_skyworkstulufull__1__1745386412", "name": "allenai/open_instruct_dev-rm_llamabase_1e-6_1_skyworkstulufull__1__1745386412", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5342, "reward-bench/Factuality": 0.6042, "reward-bench/Precise IF": 0.275, "reward-bench/Math": 0.6284, "reward-bench/Safety": 0.7222, "reward-bench/Focus": 0.5818, "reward-bench/Ties": 0.3935 } }, { "id": "allenai/open_instruct_dev-rm_llamabase_1e-6_2_skyworkstulufull__1__1745441922", "name": "allenai/open_instruct_dev-rm_llamabase_1e-6_2_skyworkstulufull__1__1745441922", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6111, "reward-bench/Factuality": 0.6884, "reward-bench/Precise IF": 0.3063, "reward-bench/Math": 0.623, "reward-bench/Safety": 0.8289, "reward-bench/Focus": 0.7576, "reward-bench/Ties": 0.4628 } }, { "id": "allenai/open_instruct_dev-rm_llamabase_2e-5_1_skyworkstulufull__1__1745386495", "name": "allenai/open_instruct_dev-rm_llamabase_2e-5_1_skyworkstulufull__1__1745386495", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5825, "reward-bench/Factuality": 0.6379, "reward-bench/Precise IF": 0.325, "reward-bench/Math": 0.5355, "reward-bench/Safety": 0.8222, "reward-bench/Focus": 0.7051, "reward-bench/Ties": 0.4691 } }, { "id": "allenai/open_instruct_dev-rm_llamabase_2e-5_2_skyworkstulufull__1__1745386507", "name": "allenai/open_instruct_dev-rm_llamabase_2e-5_2_skyworkstulufull__1__1745386507", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5598, "reward-bench/Factuality": 0.5495, "reward-bench/Precise IF": 0.3563, "reward-bench/Math": 0.5902, "reward-bench/Safety": 0.76, "reward-bench/Focus": 0.7273, "reward-bench/Ties": 0.3754 } }, { "id": "allenai/open_instruct_dev-rm_llamabase_3e-6_1_skyworkstulufull__1__1745386507", "name": "allenai/open_instruct_dev-rm_llamabase_3e-6_1_skyworkstulufull__1__1745386507", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6101, "reward-bench/Factuality": 0.6632, "reward-bench/Precise IF": 0.35, "reward-bench/Math": 0.6175, "reward-bench/Safety": 0.7778, "reward-bench/Focus": 0.7111, "reward-bench/Ties": 0.5408 } }, { "id": "allenai/open_instruct_dev-rm_qwen32b_1e-6_skyworkstulufull__8__1748235917", "name": "allenai/open_instruct_dev-rm_qwen32b_1e-6_skyworkstulufull__8__1748235917", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7185, "reward-bench/Factuality": 0.7305, "reward-bench/Precise IF": 0.4125, "reward-bench/Math": 0.7158, "reward-bench/Safety": 0.7933, "reward-bench/Focus": 0.8545, "reward-bench/Ties": 0.804 } }, { "id": "allenai/open_instruct_dev-rm_qwen32b_3e-6_skyworkstulufull__8__1748288961", "name": "allenai/open_instruct_dev-rm_qwen32b_3e-6_skyworkstulufull__8__1748288961", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7325, "reward-bench/Factuality": 0.7474, "reward-bench/Precise IF": 0.4437, "reward-bench/Math": 0.7158, "reward-bench/Safety": 0.7978, "reward-bench/Focus": 0.8141, "reward-bench/Ties": 0.8763 } }, { "id": "allenai/open_instruct_dev-rm_qwen_1e-6_1_skyworks__1__1744062830", "name": "allenai/open_instruct_dev-rm_qwen_1e-6_1_skyworks__1__1744062830", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6022, "reward-bench/Factuality": 0.5284, "reward-bench/Precise IF": 0.325, "reward-bench/Math": 0.694, "reward-bench/Safety": 0.7556, "reward-bench/Focus": 0.7616, "reward-bench/Ties": 0.5486 } }, { "id": "allenai/open_instruct_dev-rm_qwen_1e-6_1_skyworks__2__1744576024", "name": "allenai/open_instruct_dev-rm_qwen_1e-6_1_skyworks__2__1744576024", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5948, "reward-bench/Factuality": 0.5579, "reward-bench/Precise IF": 0.2875, "reward-bench/Math": 0.6776, "reward-bench/Safety": 0.72, "reward-bench/Focus": 0.7394, "reward-bench/Ties": 0.5863 } }, { "id": "allenai/open_instruct_dev-rm_qwen_1e-6_1_skyworkstulufull__1__1743712914", "name": "allenai/open_instruct_dev-rm_qwen_1e-6_1_skyworkstulufull__1__1743712914", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6492, "reward-bench/Factuality": 0.6084, "reward-bench/Precise IF": 0.35, "reward-bench/Math": 0.6776, "reward-bench/Safety": 0.76, "reward-bench/Focus": 0.8, "reward-bench/Ties": 0.699 } }, { "id": "allenai/open_instruct_dev-rm_qwen_2e-5_1_skyworkstulufull__1__1743713091", "name": "allenai/open_instruct_dev-rm_qwen_2e-5_1_skyworkstulufull__1__1743713091", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6764, "reward-bench/Factuality": 0.7074, "reward-bench/Precise IF": 0.3, "reward-bench/Math": 0.6885, "reward-bench/Safety": 0.8622, "reward-bench/Focus": 0.802, "reward-bench/Ties": 0.6984 } }, { "id": "allenai/open_instruct_dev-rm_qwen_3e-6_1_skyworks__1__1744062829", "name": "allenai/open_instruct_dev-rm_qwen_3e-6_1_skyworks__1__1744062829", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6408, "reward-bench/Factuality": 0.6337, "reward-bench/Precise IF": 0.3063, "reward-bench/Math": 0.6831, "reward-bench/Safety": 0.8467, "reward-bench/Focus": 0.8222, "reward-bench/Ties": 0.5529 } }, { "id": "allenai/open_instruct_dev-rm_qwen_3e-6_1_skyworks__2__1744576050", "name": "allenai/open_instruct_dev-rm_qwen_3e-6_1_skyworks__2__1744576050", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6452, "reward-bench/Factuality": 0.6063, "reward-bench/Precise IF": 0.3187, "reward-bench/Math": 0.7158, "reward-bench/Safety": 0.8356, "reward-bench/Focus": 0.8343, "reward-bench/Ties": 0.5603 } }, { "id": "allenai/open_instruct_dev-rm_qwen_3e-6_1_skyworkstulufull__1__1743712916", "name": "allenai/open_instruct_dev-rm_qwen_3e-6_1_skyworkstulufull__1__1743712916", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7013, "reward-bench/Factuality": 0.7263, "reward-bench/Precise IF": 0.3438, "reward-bench/Math": 0.6995, "reward-bench/Safety": 0.8222, "reward-bench/Focus": 0.8444, "reward-bench/Ties": 0.7714 } }, { "id": "allenai/open_instruct_dev-rm_qwen_3e-6_2__1__1743023576", "name": "allenai/open_instruct_dev-rm_qwen_3e-6_2__1__1743023576", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6369, "reward-bench/Factuality": 0.6905, "reward-bench/Precise IF": 0.3187, "reward-bench/Math": 0.6448, "reward-bench/Safety": 0.7844, "reward-bench/Focus": 0.7596, "reward-bench/Ties": 0.6236 } }, { "id": "allenai/open_instruct_dev-rm_qwen_3e-6_3__1__1743023619", "name": "allenai/open_instruct_dev-rm_qwen_3e-6_3__1__1743023619", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6221, "reward-bench/Factuality": 0.6674, "reward-bench/Precise IF": 0.325, "reward-bench/Math": 0.612, "reward-bench/Safety": 0.7978, "reward-bench/Focus": 0.7455, "reward-bench/Ties": 0.5852 } }, { "id": "allenai/open_instruct_dev-rm_qwenbase_1e-6_1_skyworkstulufull__1__1745388583", "name": "allenai/open_instruct_dev-rm_qwenbase_1e-6_1_skyworkstulufull__1__1745388583", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5735, "reward-bench/Factuality": 0.5895, "reward-bench/Precise IF": 0.2625, "reward-bench/Math": 0.6448, "reward-bench/Safety": 0.6889, "reward-bench/Focus": 0.6727, "reward-bench/Ties": 0.5823 } }, { "id": "allenai/open_instruct_dev-rm_qwenbase_1e-6_2_skyworkstulufull__1__1745388604", "name": "allenai/open_instruct_dev-rm_qwenbase_1e-6_2_skyworkstulufull__1__1745388604", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6336, "reward-bench/Factuality": 0.6337, "reward-bench/Precise IF": 0.3063, "reward-bench/Math": 0.6885, "reward-bench/Safety": 0.7244, "reward-bench/Focus": 0.802, "reward-bench/Ties": 0.6465 } }, { "id": "allenai/open_instruct_dev-rm_qwenbase_2e-5_1_skyworkstulufull__1__1745388738", "name": "allenai/open_instruct_dev-rm_qwenbase_2e-5_1_skyworkstulufull__1__1745388738", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6824, "reward-bench/Factuality": 0.6989, "reward-bench/Precise IF": 0.3625, "reward-bench/Math": 0.6831, "reward-bench/Safety": 0.8311, "reward-bench/Focus": 0.8081, "reward-bench/Ties": 0.7107 } }, { "id": "allenai/open_instruct_dev-rm_qwenbase_2e-5_2_skyworkstulufull__1__1745388191", "name": "allenai/open_instruct_dev-rm_qwenbase_2e-5_2_skyworkstulufull__1__1745388191", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6392, "reward-bench/Factuality": 0.6589, "reward-bench/Precise IF": 0.3312, "reward-bench/Math": 0.6995, "reward-bench/Safety": 0.7933, "reward-bench/Focus": 0.7717, "reward-bench/Ties": 0.5804 } }, { "id": "allenai/open_instruct_dev-rm_qwenbase_3e-6_1_skyworkstulufull__1__1745388737", "name": "allenai/open_instruct_dev-rm_qwenbase_3e-6_1_skyworkstulufull__1__1745388737", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.664, "reward-bench/Factuality": 0.6821, "reward-bench/Precise IF": 0.3312, "reward-bench/Math": 0.6448, "reward-bench/Safety": 0.8133, "reward-bench/Focus": 0.8061, "reward-bench/Ties": 0.7066 } }, { "id": "allenai/open_instruct_dev-rm_qwenbase_3e-6_2_skyworkstulufull__1__1745388138", "name": "allenai/open_instruct_dev-rm_qwenbase_3e-6_2_skyworkstulufull__1__1745388138", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6678, "reward-bench/Factuality": 0.6505, "reward-bench/Precise IF": 0.3312, "reward-bench/Math": 0.6831, "reward-bench/Safety": 0.7978, "reward-bench/Focus": 0.8808, "reward-bench/Ties": 0.6632 } }, { "id": "allenai/open_instruct_dev-rm_tulu3_70b_1__8__1742924455", "name": "allenai/open_instruct_dev-rm_tulu3_70b_1__8__1742924455", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6618, "reward-bench/Factuality": 0.7958, "reward-bench/Precise IF": 0.325, "reward-bench/Math": 0.6557, "reward-bench/Safety": 0.8311, "reward-bench/Focus": 0.6323, "reward-bench/Ties": 0.7311 } }, { "id": "allenai/open_instruct_dev-rm_tulu3_70b_2__8__1742982964", "name": "allenai/open_instruct_dev-rm_tulu3_70b_2__8__1742982964", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6605, "reward-bench/Factuality": 0.7789, "reward-bench/Precise IF": 0.3688, "reward-bench/Math": 0.6448, "reward-bench/Safety": 0.8844, "reward-bench/Focus": 0.6667, "reward-bench/Ties": 0.6195 } }, { "id": "allenai/tulu-2-dpo-13b", "name": "allenai/tulu-2-dpo-13b", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7368, "reward-bench/Chat": 0.9581, "reward-bench/Chat Hard": 0.5833, "reward-bench/Safety": 0.7946, "reward-bench/Reasoning": 0.7323, "reward-bench/Prior Sets (0.5 weight)": 0.4947 } }, { "id": "allenai/tulu-2-dpo-70b", "name": "allenai/tulu-2-dpo-70b", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7621, "reward-bench/Chat": 0.9749, "reward-bench/Chat Hard": 0.6053, "reward-bench/Safety": 0.8446, "reward-bench/Reasoning": 0.7407, "reward-bench/Prior Sets (0.5 weight)": 0.5278 } }, { "id": "allenai/tulu-2-dpo-7b", "name": "allenai/tulu-2-dpo-7b", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7212, "reward-bench/Chat": 0.9749, "reward-bench/Chat Hard": 0.5614, "reward-bench/Safety": 0.7527, "reward-bench/Reasoning": 0.7176, "reward-bench/Prior Sets (0.5 weight)": 0.4774 } }, { "id": "allenai/tulu-v2.5-13b-preference-mix-rm", "name": "allenai/tulu-v2.5-13b-preference-mix-rm", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8027, "reward-bench/Chat": 0.9358, "reward-bench/Chat Hard": 0.682, "reward-bench/Safety": 0.773, "reward-bench/Reasoning": 0.885, "reward-bench/Prior Sets (0.5 weight)": 0.6724 } }, { "id": "allenai/tulu-v2.5-13b-uf-rm", "name": "allenai/tulu-v2.5-13b-uf-rm", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.4806, "reward-bench/Chat": 0.3939, "reward-bench/Chat Hard": 0.4232, "reward-bench/Safety": 0.5554, "reward-bench/Reasoning": 0.4737, "reward-bench/Prior Sets (0.5 weight)": 0.6326 } }, { "id": "allenai/tulu-v2.5-70b-preference-mix-rm", "name": "allenai/tulu-v2.5-70b-preference-mix-rm", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6516, "reward-bench/Chat": 0.7737, "reward-bench/Chat Hard": 0.5921, "reward-bench/Safety": 0.8486, "reward-bench/Reasoning": 0.4138, "reward-bench/Prior Sets (0.5 weight)": 0.6079 } }, { "id": "allenai/tulu-v2.5-70b-uf-rm", "name": "allenai/tulu-v2.5-70b-uf-rm", "developer": "allenai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7398, "reward-bench/Chat": 0.8659, "reward-bench/Chat Hard": 0.7171, "reward-bench/Safety": 0.7014, "reward-bench/Reasoning": 0.757, "reward-bench/Prior Sets (0.5 weight)": 0.5757 } }, { "id": "allknowingroger/Chocolatine-24B", "name": "Chocolatine-24B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1958, "hfopenllm_v2/BBH": 0.6191, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.4323, "hfopenllm_v2/MMLU-PRO": 0.4566 } }, { "id": "allknowingroger/Gemma2Slerp1-2.6B", "name": "Gemma2Slerp1-2.6B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5354, "hfopenllm_v2/BBH": 0.4343, "hfopenllm_v2/MATH Level 5": 0.1065, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4562, "hfopenllm_v2/MMLU-PRO": 0.2689 } }, { "id": "allknowingroger/Gemma2Slerp1-27B", "name": "Gemma2Slerp1-27B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7186, "hfopenllm_v2/BBH": 0.6399, "hfopenllm_v2/MATH Level 5": 0.2583, "hfopenllm_v2/GPQA": 0.3641, "hfopenllm_v2/MUSR": 0.4767, "hfopenllm_v2/MMLU-PRO": 0.4456 } }, { "id": "allknowingroger/Gemma2Slerp2-2.6B", "name": "Gemma2Slerp2-2.6B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5747, "hfopenllm_v2/BBH": 0.4308, "hfopenllm_v2/MATH Level 5": 0.0906, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4468, "hfopenllm_v2/MMLU-PRO": 0.2696 } }, { "id": "allknowingroger/Gemma2Slerp2-27B", "name": "Gemma2Slerp2-27B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7546, "hfopenllm_v2/BBH": 0.6557, "hfopenllm_v2/MATH Level 5": 0.2787, "hfopenllm_v2/GPQA": 0.37, "hfopenllm_v2/MUSR": 0.4621, "hfopenllm_v2/MMLU-PRO": 0.4623 } }, { "id": "allknowingroger/Gemma2Slerp3-27B", "name": "Gemma2Slerp3-27B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7426, "hfopenllm_v2/BBH": 0.65, "hfopenllm_v2/MATH Level 5": 0.2742, "hfopenllm_v2/GPQA": 0.3549, "hfopenllm_v2/MUSR": 0.474, "hfopenllm_v2/MMLU-PRO": 0.4641 } }, { "id": "allknowingroger/Gemma2Slerp4-27B", "name": "Gemma2Slerp4-27B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7497, "hfopenllm_v2/BBH": 0.653, "hfopenllm_v2/MATH Level 5": 0.2719, "hfopenllm_v2/GPQA": 0.3666, "hfopenllm_v2/MUSR": 0.4502, "hfopenllm_v2/MMLU-PRO": 0.4649 } }, { "id": "allknowingroger/GemmaSlerp-9B", "name": "GemmaSlerp-9B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7043, "hfopenllm_v2/BBH": 0.5921, "hfopenllm_v2/MATH Level 5": 0.216, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.4673, "hfopenllm_v2/MMLU-PRO": 0.4161 } }, { "id": "allknowingroger/GemmaSlerp2-9B", "name": "GemmaSlerp2-9B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7281, "hfopenllm_v2/BBH": 0.5983, "hfopenllm_v2/MATH Level 5": 0.2107, "hfopenllm_v2/GPQA": 0.3523, "hfopenllm_v2/MUSR": 0.4767, "hfopenllm_v2/MMLU-PRO": 0.4239 } }, { "id": "allknowingroger/GemmaSlerp4-10B", "name": "GemmaSlerp4-10B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7326, "hfopenllm_v2/BBH": 0.6028, "hfopenllm_v2/MATH Level 5": 0.2243, "hfopenllm_v2/GPQA": 0.3532, "hfopenllm_v2/MUSR": 0.454, "hfopenllm_v2/MMLU-PRO": 0.425 } }, { "id": "allknowingroger/GemmaSlerp5-10B", "name": "GemmaSlerp5-10B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7353, "hfopenllm_v2/BBH": 0.6054, "hfopenllm_v2/MATH Level 5": 0.2183, "hfopenllm_v2/GPQA": 0.3523, "hfopenllm_v2/MUSR": 0.4608, "hfopenllm_v2/MMLU-PRO": 0.4328 } }, { "id": "allknowingroger/GemmaStock1-27B", "name": "GemmaStock1-27B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7509, "hfopenllm_v2/BBH": 0.6566, "hfopenllm_v2/MATH Level 5": 0.2636, "hfopenllm_v2/GPQA": 0.3641, "hfopenllm_v2/MUSR": 0.4527, "hfopenllm_v2/MMLU-PRO": 0.473 } }, { "id": "allknowingroger/HomerSlerp1-7B", "name": "HomerSlerp1-7B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4621, "hfopenllm_v2/BBH": 0.5518, "hfopenllm_v2/MATH Level 5": 0.2719, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.4359, "hfopenllm_v2/MMLU-PRO": 0.4504 } }, { "id": "allknowingroger/HomerSlerp2-7B", "name": "HomerSlerp2-7B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4487, "hfopenllm_v2/BBH": 0.5649, "hfopenllm_v2/MATH Level 5": 0.2968, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4356, "hfopenllm_v2/MMLU-PRO": 0.4515 } }, { "id": "allknowingroger/HomerSlerp3-7B", "name": "HomerSlerp3-7B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4363, "hfopenllm_v2/BBH": 0.5598, "hfopenllm_v2/MATH Level 5": 0.3021, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4462, "hfopenllm_v2/MMLU-PRO": 0.4535 } }, { "id": "allknowingroger/HomerSlerp4-7B", "name": "HomerSlerp4-7B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4374, "hfopenllm_v2/BBH": 0.5571, "hfopenllm_v2/MATH Level 5": 0.327, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4408, "hfopenllm_v2/MMLU-PRO": 0.4472 } }, { "id": "allknowingroger/limyClown-7B-slerp", "name": "limyClown-7B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4017, "hfopenllm_v2/BBH": 0.5148, "hfopenllm_v2/MATH Level 5": 0.0687, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4293, "hfopenllm_v2/MMLU-PRO": 0.3038 } }, { "id": "allknowingroger/LimyQstar-7B-slerp", "name": "LimyQstar-7B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3491, "hfopenllm_v2/BBH": 0.5024, "hfopenllm_v2/MATH Level 5": 0.0687, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4146, "hfopenllm_v2/MMLU-PRO": 0.3103 } }, { "id": "allknowingroger/llama3-Jallabi-40B-s", "name": "llama3-Jallabi-40B-s", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1921, "hfopenllm_v2/BBH": 0.3252, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2374, "hfopenllm_v2/MUSR": 0.375, "hfopenllm_v2/MMLU-PRO": 0.1088 } }, { "id": "allknowingroger/Llama3.1-60B", "name": "Llama3.1-60B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1815, "hfopenllm_v2/BBH": 0.3242, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3596, "hfopenllm_v2/MMLU-PRO": 0.331 } }, { "id": "allknowingroger/llama3AnFeng-40B", "name": "llama3AnFeng-40B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1742, "hfopenllm_v2/BBH": 0.3794, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.394, "hfopenllm_v2/MMLU-PRO": 0.198 } }, { "id": "allknowingroger/Marco-01-slerp1-7B", "name": "Marco-01-slerp1-7B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4681, "hfopenllm_v2/BBH": 0.5541, "hfopenllm_v2/MATH Level 5": 0.3157, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4452, "hfopenllm_v2/MMLU-PRO": 0.4483 } }, { "id": "allknowingroger/Meme-7B-slerp", "name": "Meme-7B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5164, "hfopenllm_v2/BBH": 0.4661, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4223, "hfopenllm_v2/MMLU-PRO": 0.281 } }, { "id": "allknowingroger/Ministral-8B-slerp", "name": "Ministral-8B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1961, "hfopenllm_v2/BBH": 0.4686, "hfopenllm_v2/MATH Level 5": 0.0038, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4285, "hfopenllm_v2/MMLU-PRO": 0.3119 } }, { "id": "allknowingroger/Mistralmash1-7B-s", "name": "Mistralmash1-7B-s", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3961, "hfopenllm_v2/BBH": 0.5277, "hfopenllm_v2/MATH Level 5": 0.0921, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4267, "hfopenllm_v2/MMLU-PRO": 0.3293 } }, { "id": "allknowingroger/Mistralmash2-7B-s", "name": "Mistralmash2-7B-s", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4102, "hfopenllm_v2/BBH": 0.5305, "hfopenllm_v2/MATH Level 5": 0.0793, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4372, "hfopenllm_v2/MMLU-PRO": 0.3345 } }, { "id": "allknowingroger/MistralPhi3-11B", "name": "MistralPhi3-11B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1943, "hfopenllm_v2/BBH": 0.6234, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.4267, "hfopenllm_v2/MMLU-PRO": 0.4688 } }, { "id": "allknowingroger/MixTAO-19B-pass", "name": "MixTAO-19B-pass", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3814, "hfopenllm_v2/BBH": 0.5128, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4783, "hfopenllm_v2/MMLU-PRO": 0.3105 } }, { "id": "allknowingroger/MixTaoTruthful-13B-slerp", "name": "MixTaoTruthful-13B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4139, "hfopenllm_v2/BBH": 0.5207, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4292, "hfopenllm_v2/MMLU-PRO": 0.31 } }, { "id": "allknowingroger/MultiCalm-7B-slerp", "name": "MultiCalm-7B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3927, "hfopenllm_v2/BBH": 0.5122, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4319, "hfopenllm_v2/MMLU-PRO": 0.3033 } }, { "id": "allknowingroger/MultiMash-12B-slerp", "name": "MultiMash-12B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3974, "hfopenllm_v2/BBH": 0.5142, "hfopenllm_v2/MATH Level 5": 0.0808, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.4438, "hfopenllm_v2/MMLU-PRO": 0.3068 } }, { "id": "allknowingroger/MultiMash10-13B-slerp", "name": "MultiMash10-13B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4163, "hfopenllm_v2/BBH": 0.5186, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4318, "hfopenllm_v2/MMLU-PRO": 0.3117 } }, { "id": "allknowingroger/MultiMash11-13B-slerp", "name": "MultiMash11-13B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4251, "hfopenllm_v2/BBH": 0.5194, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4373, "hfopenllm_v2/MMLU-PRO": 0.3085 } }, { "id": "allknowingroger/MultiMash2-12B-slerp", "name": "MultiMash2-12B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4261, "hfopenllm_v2/BBH": 0.5134, "hfopenllm_v2/MATH Level 5": 0.0642, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.4228, "hfopenllm_v2/MMLU-PRO": 0.3043 } }, { "id": "allknowingroger/Multimash3-12B-slerp", "name": "Multimash3-12B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4437, "hfopenllm_v2/BBH": 0.5177, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.4344, "hfopenllm_v2/MMLU-PRO": 0.3068 } }, { "id": "allknowingroger/MultiMash5-12B-slerp", "name": "MultiMash5-12B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4142, "hfopenllm_v2/BBH": 0.5145, "hfopenllm_v2/MATH Level 5": 0.0634, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4203, "hfopenllm_v2/MMLU-PRO": 0.3028 } }, { "id": "allknowingroger/MultiMash6-12B-slerp", "name": "MultiMash6-12B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.43, "hfopenllm_v2/BBH": 0.5196, "hfopenllm_v2/MATH Level 5": 0.0725, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.4306, "hfopenllm_v2/MMLU-PRO": 0.3091 } }, { "id": "allknowingroger/MultiMash7-12B-slerp", "name": "MultiMash7-12B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4213, "hfopenllm_v2/BBH": 0.5111, "hfopenllm_v2/MATH Level 5": 0.0695, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.4279, "hfopenllm_v2/MMLU-PRO": 0.3029 } }, { "id": "allknowingroger/MultiMash8-13B-slerp", "name": "MultiMash8-13B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4321, "hfopenllm_v2/BBH": 0.5178, "hfopenllm_v2/MATH Level 5": 0.077, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4424, "hfopenllm_v2/MMLU-PRO": 0.3126 } }, { "id": "allknowingroger/MultiMash9-13B-slerp", "name": "MultiMash9-13B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4188, "hfopenllm_v2/BBH": 0.5194, "hfopenllm_v2/MATH Level 5": 0.0785, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.4398, "hfopenllm_v2/MMLU-PRO": 0.31 } }, { "id": "allknowingroger/Multimerge-19B-pass", "name": "Multimerge-19B-pass", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1773, "hfopenllm_v2/BBH": 0.2892, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.343, "hfopenllm_v2/MMLU-PRO": 0.1169 } }, { "id": "allknowingroger/MultiMerge-7B-slerp", "name": "MultiMerge-7B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3948, "hfopenllm_v2/BBH": 0.514, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.428, "hfopenllm_v2/MMLU-PRO": 0.3037 } }, { "id": "allknowingroger/MultiverseEx26-7B-slerp", "name": "MultiverseEx26-7B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3939, "hfopenllm_v2/BBH": 0.5134, "hfopenllm_v2/MATH Level 5": 0.0755, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4293, "hfopenllm_v2/MMLU-PRO": 0.3035 } }, { "id": "allknowingroger/Neuralcoven-7B-slerp", "name": "Neuralcoven-7B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3859, "hfopenllm_v2/BBH": 0.5303, "hfopenllm_v2/MATH Level 5": 0.0785, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.429, "hfopenllm_v2/MMLU-PRO": 0.3294 } }, { "id": "allknowingroger/Neuralmultiverse-7B-slerp", "name": "Neuralmultiverse-7B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3769, "hfopenllm_v2/BBH": 0.5166, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.428, "hfopenllm_v2/MMLU-PRO": 0.3042 } }, { "id": "allknowingroger/NeuralWestSeverus-7B-slerp", "name": "NeuralWestSeverus-7B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4136, "hfopenllm_v2/BBH": 0.5244, "hfopenllm_v2/MATH Level 5": 0.0733, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.4529, "hfopenllm_v2/MMLU-PRO": 0.3137 } }, { "id": "allknowingroger/Ph3della5-14B", "name": "Ph3della5-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4799, "hfopenllm_v2/BBH": 0.6332, "hfopenllm_v2/MATH Level 5": 0.1767, "hfopenllm_v2/GPQA": 0.3423, "hfopenllm_v2/MUSR": 0.4386, "hfopenllm_v2/MMLU-PRO": 0.4787 } }, { "id": "allknowingroger/Ph3merge-14B", "name": "Ph3merge-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2701, "hfopenllm_v2/BBH": 0.6381, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4334, "hfopenllm_v2/MMLU-PRO": 0.4611 } }, { "id": "allknowingroger/Ph3merge2-14B", "name": "Ph3merge2-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1706, "hfopenllm_v2/BBH": 0.3607, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.3911, "hfopenllm_v2/MMLU-PRO": 0.1723 } }, { "id": "allknowingroger/Ph3merge3-14B", "name": "Ph3merge3-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1645, "hfopenllm_v2/BBH": 0.3597, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4082, "hfopenllm_v2/MMLU-PRO": 0.1647 } }, { "id": "allknowingroger/Ph3task1-14B", "name": "Ph3task1-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4695, "hfopenllm_v2/BBH": 0.6318, "hfopenllm_v2/MATH Level 5": 0.1669, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.4508, "hfopenllm_v2/MMLU-PRO": 0.4734 } }, { "id": "allknowingroger/Ph3task2-14B", "name": "Ph3task2-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4713, "hfopenllm_v2/BBH": 0.6098, "hfopenllm_v2/MATH Level 5": 0.1465, "hfopenllm_v2/GPQA": 0.3305, "hfopenllm_v2/MUSR": 0.4535, "hfopenllm_v2/MMLU-PRO": 0.446 } }, { "id": "allknowingroger/Ph3task3-14B", "name": "Ph3task3-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4962, "hfopenllm_v2/BBH": 0.6298, "hfopenllm_v2/MATH Level 5": 0.176, "hfopenllm_v2/GPQA": 0.3414, "hfopenllm_v2/MUSR": 0.4426, "hfopenllm_v2/MMLU-PRO": 0.4771 } }, { "id": "allknowingroger/Ph3unsloth-3B-slerp", "name": "Ph3unsloth-3B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1894, "hfopenllm_v2/BBH": 0.5468, "hfopenllm_v2/MATH Level 5": 0.1012, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4528, "hfopenllm_v2/MMLU-PRO": 0.3701 } }, { "id": "allknowingroger/Phi3mash1-17B-pass", "name": "Phi3mash1-17B-pass", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1884, "hfopenllm_v2/BBH": 0.6129, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4451, "hfopenllm_v2/MMLU-PRO": 0.4589 } }, { "id": "allknowingroger/Quen2-65B", "name": "Quen2-65B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1758, "hfopenllm_v2/BBH": 0.2757, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2357, "hfopenllm_v2/MUSR": 0.3209, "hfopenllm_v2/MMLU-PRO": 0.1114 } }, { "id": "allknowingroger/Qwen2.5-42B-AGI", "name": "Qwen2.5-42B-AGI", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1913, "hfopenllm_v2/BBH": 0.2942, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.362, "hfopenllm_v2/MMLU-PRO": 0.1168 } }, { "id": "allknowingroger/Qwen2.5-7B-task2", "name": "Qwen2.5-7B-task2", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4527, "hfopenllm_v2/BBH": 0.5626, "hfopenllm_v2/MATH Level 5": 0.355, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.437, "hfopenllm_v2/MMLU-PRO": 0.4517 } }, { "id": "allknowingroger/Qwen2.5-7B-task3", "name": "Qwen2.5-7B-task3", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5129, "hfopenllm_v2/BBH": 0.5398, "hfopenllm_v2/MATH Level 5": 0.2606, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4356, "hfopenllm_v2/MMLU-PRO": 0.4501 } }, { "id": "allknowingroger/Qwen2.5-7B-task4", "name": "Qwen2.5-7B-task4", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5005, "hfopenllm_v2/BBH": 0.5583, "hfopenllm_v2/MATH Level 5": 0.3112, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4395, "hfopenllm_v2/MMLU-PRO": 0.4561 } }, { "id": "allknowingroger/Qwen2.5-7B-task7", "name": "Qwen2.5-7B-task7", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4284, "hfopenllm_v2/BBH": 0.5552, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4326, "hfopenllm_v2/MMLU-PRO": 0.4133 } }, { "id": "allknowingroger/Qwen2.5-7B-task8", "name": "Qwen2.5-7B-task8", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4645, "hfopenllm_v2/BBH": 0.5525, "hfopenllm_v2/MATH Level 5": 0.3527, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4514, "hfopenllm_v2/MMLU-PRO": 0.4433 } }, { "id": "allknowingroger/Qwen2.5-slerp-14B", "name": "Qwen2.5-slerp-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4928, "hfopenllm_v2/BBH": 0.6512, "hfopenllm_v2/MATH Level 5": 0.4622, "hfopenllm_v2/GPQA": 0.3674, "hfopenllm_v2/MUSR": 0.4744, "hfopenllm_v2/MMLU-PRO": 0.5379 } }, { "id": "allknowingroger/QwenSlerp12-7B", "name": "QwenSlerp12-7B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5076, "hfopenllm_v2/BBH": 0.5556, "hfopenllm_v2/MATH Level 5": 0.2946, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.4595, "hfopenllm_v2/MMLU-PRO": 0.4461 } }, { "id": "allknowingroger/Qwenslerp2-14B", "name": "Qwenslerp2-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5007, "hfopenllm_v2/BBH": 0.6555, "hfopenllm_v2/MATH Level 5": 0.4456, "hfopenllm_v2/GPQA": 0.3683, "hfopenllm_v2/MUSR": 0.4729, "hfopenllm_v2/MMLU-PRO": 0.5403 } }, { "id": "allknowingroger/Qwenslerp2-7B", "name": "Qwenslerp2-7B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5294, "hfopenllm_v2/BBH": 0.5609, "hfopenllm_v2/MATH Level 5": 0.3421, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4356, "hfopenllm_v2/MMLU-PRO": 0.4515 } }, { "id": "allknowingroger/Qwenslerp3-14B", "name": "Qwenslerp3-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5052, "hfopenllm_v2/BBH": 0.6521, "hfopenllm_v2/MATH Level 5": 0.4464, "hfopenllm_v2/GPQA": 0.375, "hfopenllm_v2/MUSR": 0.4676, "hfopenllm_v2/MMLU-PRO": 0.5395 } }, { "id": "allknowingroger/Qwenslerp3-7B", "name": "Qwenslerp3-7B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5018, "hfopenllm_v2/BBH": 0.558, "hfopenllm_v2/MATH Level 5": 0.3218, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4515, "hfopenllm_v2/MMLU-PRO": 0.4542 } }, { "id": "allknowingroger/QwenSlerp4-14B", "name": "QwenSlerp4-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6328, "hfopenllm_v2/BBH": 0.6483, "hfopenllm_v2/MATH Level 5": 0.3693, "hfopenllm_v2/GPQA": 0.3725, "hfopenllm_v2/MUSR": 0.465, "hfopenllm_v2/MMLU-PRO": 0.5436 } }, { "id": "allknowingroger/QwenSlerp5-14B", "name": "QwenSlerp5-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7119, "hfopenllm_v2/BBH": 0.6357, "hfopenllm_v2/MATH Level 5": 0.3565, "hfopenllm_v2/GPQA": 0.3649, "hfopenllm_v2/MUSR": 0.4675, "hfopenllm_v2/MMLU-PRO": 0.5391 } }, { "id": "allknowingroger/QwenSlerp6-14B", "name": "QwenSlerp6-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6867, "hfopenllm_v2/BBH": 0.6384, "hfopenllm_v2/MATH Level 5": 0.3724, "hfopenllm_v2/GPQA": 0.3733, "hfopenllm_v2/MUSR": 0.469, "hfopenllm_v2/MMLU-PRO": 0.5406 } }, { "id": "allknowingroger/QwenStock1-14B", "name": "QwenStock1-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5634, "hfopenllm_v2/BBH": 0.6528, "hfopenllm_v2/MATH Level 5": 0.3769, "hfopenllm_v2/GPQA": 0.3767, "hfopenllm_v2/MUSR": 0.473, "hfopenllm_v2/MMLU-PRO": 0.5418 } }, { "id": "allknowingroger/QwenStock2-14B", "name": "QwenStock2-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5563, "hfopenllm_v2/BBH": 0.6569, "hfopenllm_v2/MATH Level 5": 0.3882, "hfopenllm_v2/GPQA": 0.3792, "hfopenllm_v2/MUSR": 0.4756, "hfopenllm_v2/MMLU-PRO": 0.5406 } }, { "id": "allknowingroger/QwenStock3-14B", "name": "QwenStock3-14B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5615, "hfopenllm_v2/BBH": 0.6565, "hfopenllm_v2/MATH Level 5": 0.3776, "hfopenllm_v2/GPQA": 0.3784, "hfopenllm_v2/MUSR": 0.4756, "hfopenllm_v2/MMLU-PRO": 0.5428 } }, { "id": "allknowingroger/RogerMerge-7B-slerp", "name": "RogerMerge-7B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3933, "hfopenllm_v2/BBH": 0.516, "hfopenllm_v2/MATH Level 5": 0.0687, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.432, "hfopenllm_v2/MMLU-PRO": 0.303 } }, { "id": "allknowingroger/ROGERphi-7B-slerp", "name": "ROGERphi-7B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3861, "hfopenllm_v2/BBH": 0.5196, "hfopenllm_v2/MATH Level 5": 0.0733, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4685, "hfopenllm_v2/MMLU-PRO": 0.3053 } }, { "id": "allknowingroger/Rombos-LLM-V2.5-Qwen-42b", "name": "Rombos-LLM-V2.5-Qwen-42b", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1879, "hfopenllm_v2/BBH": 0.2969, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3633, "hfopenllm_v2/MMLU-PRO": 0.1168 } }, { "id": "allknowingroger/Strangecoven-7B-slerp", "name": "Strangecoven-7B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3746, "hfopenllm_v2/BBH": 0.5368, "hfopenllm_v2/MATH Level 5": 0.0763, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4199, "hfopenllm_v2/MMLU-PRO": 0.3364 } }, { "id": "allknowingroger/Weirdslerp2-25B", "name": "Weirdslerp2-25B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1754, "hfopenllm_v2/BBH": 0.2874, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.3524, "hfopenllm_v2/MMLU-PRO": 0.1128 } }, { "id": "allknowingroger/WestlakeMaziyar-7B-slerp", "name": "WestlakeMaziyar-7B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4838, "hfopenllm_v2/BBH": 0.5245, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4474, "hfopenllm_v2/MMLU-PRO": 0.3078 } }, { "id": "allknowingroger/YamMaths-7B-slerp", "name": "YamMaths-7B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4148, "hfopenllm_v2/BBH": 0.5156, "hfopenllm_v2/MATH Level 5": 0.0853, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.4384, "hfopenllm_v2/MMLU-PRO": 0.3131 } }, { "id": "allknowingroger/Yi-1.5-34B", "name": "Yi-1.5-34B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1639, "hfopenllm_v2/BBH": 0.2827, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3857, "hfopenllm_v2/MMLU-PRO": 0.1095 } }, { "id": "allknowingroger/Yi-blossom-40B", "name": "Yi-blossom-40B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2009, "hfopenllm_v2/BBH": 0.3215, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3843, "hfopenllm_v2/MMLU-PRO": 0.108 } }, { "id": "allknowingroger/Yibuddy-35B", "name": "Yibuddy-35B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4235, "hfopenllm_v2/BBH": 0.5916, "hfopenllm_v2/MATH Level 5": 0.1571, "hfopenllm_v2/GPQA": 0.3557, "hfopenllm_v2/MUSR": 0.4505, "hfopenllm_v2/MMLU-PRO": 0.4489 } }, { "id": "allknowingroger/Yillama-40B", "name": "Yillama-40B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1697, "hfopenllm_v2/BBH": 0.4063, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.3501, "hfopenllm_v2/MMLU-PRO": 0.1981 } }, { "id": "allknowingroger/Yislerp-34B", "name": "Yislerp-34B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3692, "hfopenllm_v2/BBH": 0.6159, "hfopenllm_v2/MATH Level 5": 0.216, "hfopenllm_v2/GPQA": 0.3582, "hfopenllm_v2/MUSR": 0.4566, "hfopenllm_v2/MMLU-PRO": 0.4751 } }, { "id": "allknowingroger/Yislerp2-34B", "name": "Yislerp2-34B", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3999, "hfopenllm_v2/BBH": 0.6246, "hfopenllm_v2/MATH Level 5": 0.2296, "hfopenllm_v2/GPQA": 0.3641, "hfopenllm_v2/MUSR": 0.453, "hfopenllm_v2/MMLU-PRO": 0.4724 } }, { "id": "allknowingroger/Yunconglong-13B-slerp", "name": "Yunconglong-13B-slerp", "developer": "allknowingroger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4242, "hfopenllm_v2/BBH": 0.5166, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4161, "hfopenllm_v2/MMLU-PRO": 0.3036 } }, { "id": "allura-org/L3.1-8b-RP-Ink", "name": "L3.1-8b-RP-Ink", "developer": "allura-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7811, "hfopenllm_v2/BBH": 0.4828, "hfopenllm_v2/MATH Level 5": 0.148, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3608, "hfopenllm_v2/MMLU-PRO": 0.3428 } }, { "id": "allura-org/Mistral-Small-24b-Sertraline-0304", "name": "Mistral-Small-24b-Sertraline-0304", "developer": "allura-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.68, "hfopenllm_v2/BBH": 0.6525, "hfopenllm_v2/MATH Level 5": 0.2228, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4395, "hfopenllm_v2/MMLU-PRO": 0.5106 } }, { "id": "allura-org/Mistral-Small-Sisyphus-24b-2503", "name": "Mistral-Small-Sisyphus-24b-2503", "developer": "allura-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6848, "hfopenllm_v2/BBH": 0.627, "hfopenllm_v2/MATH Level 5": 0.25, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3977, "hfopenllm_v2/MMLU-PRO": 0.5127 } }, { "id": "allura-org/MN-12b-RP-Ink", "name": "MN-12b-RP-Ink", "developer": "allura-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7186, "hfopenllm_v2/BBH": 0.4834, "hfopenllm_v2/MATH Level 5": 0.1186, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.3818, "hfopenllm_v2/MMLU-PRO": 0.3514 } }, { "id": "allura-org/MoE-Girl-1BA-7BT", "name": "MoE-Girl-1BA-7BT", "developer": "allura-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2705, "hfopenllm_v2/BBH": 0.3139, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3436, "hfopenllm_v2/MMLU-PRO": 0.1218 } }, { "id": "allura-org/MS-Meadowlark-22B", "name": "MS-Meadowlark-22B", "developer": "allura-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6697, "hfopenllm_v2/BBH": 0.5163, "hfopenllm_v2/MATH Level 5": 0.1835, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.3843, "hfopenllm_v2/MMLU-PRO": 0.3823 } }, { "id": "allura-org/Teleut-7b", "name": "Teleut-7b", "developer": "allura-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6379, "hfopenllm_v2/BBH": 0.5141, "hfopenllm_v2/MATH Level 5": 0.2409, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.464, "hfopenllm_v2/MMLU-PRO": 0.4131 } }, { "id": "allura-org/TQ2.5-14B-Aletheia-v1", "name": "TQ2.5-14B-Aletheia-v1", "developer": "allura-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.753, "hfopenllm_v2/BBH": 0.6585, "hfopenllm_v2/MATH Level 5": 0.3399, "hfopenllm_v2/GPQA": 0.3624, "hfopenllm_v2/MUSR": 0.4452, "hfopenllm_v2/MMLU-PRO": 0.5241 } }, { "id": "allura-org/TQ2.5-14B-Neon-v1", "name": "TQ2.5-14B-Neon-v1", "developer": "allura-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6754, "hfopenllm_v2/BBH": 0.6553, "hfopenllm_v2/MATH Level 5": 0.3603, "hfopenllm_v2/GPQA": 0.3716, "hfopenllm_v2/MUSR": 0.461, "hfopenllm_v2/MMLU-PRO": 0.5253 } }, { "id": "aloobun/d-SmolLM2-360M", "name": "d-SmolLM2-360M", "developer": "aloobun", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2097, "hfopenllm_v2/BBH": 0.3196, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3981, "hfopenllm_v2/MMLU-PRO": 0.1169 } }, { "id": "aloobun/Meta-Llama-3-7B-28Layers", "name": "Meta-Llama-3-7B-28Layers", "developer": "aloobun", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1964, "hfopenllm_v2/BBH": 0.4437, "hfopenllm_v2/MATH Level 5": 0.0279, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3589, "hfopenllm_v2/MMLU-PRO": 0.316 } }, { "id": "alpindale/magnum-72b-v1", "name": "magnum-72b-v1", "developer": "alpindale", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7606, "hfopenllm_v2/BBH": 0.6982, "hfopenllm_v2/MATH Level 5": 0.398, "hfopenllm_v2/GPQA": 0.3909, "hfopenllm_v2/MUSR": 0.4489, "hfopenllm_v2/MMLU-PRO": 0.5468 } }, { "id": "alpindale/WizardLM-2-8x22B", "name": "WizardLM-2-8x22B", "developer": "alpindale", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5272, "hfopenllm_v2/BBH": 0.6377, "hfopenllm_v2/MATH Level 5": 0.25, "hfopenllm_v2/GPQA": 0.3817, "hfopenllm_v2/MUSR": 0.4387, "hfopenllm_v2/MMLU-PRO": 0.4596 } }, { "id": "Alsebay/Qwen2.5-7B-test-novelist", "name": "Qwen2.5-7B-test-novelist", "developer": "Alsebay", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5352, "hfopenllm_v2/BBH": 0.5151, "hfopenllm_v2/MATH Level 5": 0.2349, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4749, "hfopenllm_v2/MMLU-PRO": 0.3866 } }, { "id": "altomek/YiSM-34B-0rn", "name": "YiSM-34B-0rn", "developer": "altomek", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4284, "hfopenllm_v2/BBH": 0.614, "hfopenllm_v2/MATH Level 5": 0.2281, "hfopenllm_v2/GPQA": 0.3716, "hfopenllm_v2/MUSR": 0.445, "hfopenllm_v2/MMLU-PRO": 0.4696 } }, { "id": "Amaorynho/BBAI2006", "name": "BBAI2006", "developer": "Amaorynho", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1467, "hfopenllm_v2/BBH": 0.2704, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3605, "hfopenllm_v2/MMLU-PRO": 0.1123 } }, { "id": "Amaorynho/BBAI270V4", "name": "BBAI270V4", "developer": "Amaorynho", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.199, "hfopenllm_v2/BBH": 0.3071, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.3314, "hfopenllm_v2/MMLU-PRO": 0.1114 } }, { "id": "Amaorynho/BBAI_375", "name": "BBAI_375", "developer": "Amaorynho", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1467, "hfopenllm_v2/BBH": 0.2704, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3605, "hfopenllm_v2/MMLU-PRO": 0.1123 } }, { "id": "Amaorynho/BBAIIFEV1", "name": "BBAIIFEV1", "developer": "Amaorynho", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8047, "hfopenllm_v2/BBH": 0.5292, "hfopenllm_v2/MATH Level 5": 0.1934, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4185, "hfopenllm_v2/MMLU-PRO": 0.3857 } }, { "id": "amazon/amazon-nova-2-lite-v1-0-fc", "name": "Amazon-Nova-2-Lite-v1:0 (FC)", "developer": "amazon", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 80.0, "bfcl/bfcl.overall.overall_accuracy": 27.1, "bfcl/bfcl.overall.total_cost_usd": 78.19, "bfcl/bfcl.overall.latency_mean_s": 8.55, "bfcl/bfcl.overall.latency_std_s": 9.85, "bfcl/bfcl.overall.latency_p95_s": 27.62, "bfcl/bfcl.non_live.ast_accuracy": 86.96, "bfcl/bfcl.non_live.simple_ast_accuracy": 76.33, "bfcl/bfcl.non_live.multiple_ast_accuracy": 94.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 91.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 86.0, "bfcl/bfcl.live.live_accuracy": 80.83, "bfcl/bfcl.live.live_simple_ast_accuracy": 83.33, "bfcl/bfcl.live.live_multiple_ast_accuracy": 80.15, "bfcl/bfcl.live.live_parallel_ast_accuracy": 87.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 79.17, "bfcl/bfcl.multi_turn.accuracy": 2.12, "bfcl/bfcl.multi_turn.base_accuracy": 2.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 1.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 2.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 2.5, "bfcl/bfcl.web_search.accuracy": 5.0, "bfcl/bfcl.web_search.base_accuracy": 4.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 6.0, "bfcl/bfcl.memory.accuracy": 2.37, "bfcl/bfcl.memory.kv_accuracy": 1.94, "bfcl/bfcl.memory.vector_accuracy": 3.23, "bfcl/bfcl.memory.recursive_summarization_accuracy": 1.94, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 82.11 } }, { "id": "amazon/amazon-nova-micro-v1-0-fc", "name": "Amazon-Nova-Micro-v1:0 (FC)", "developer": "amazon", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 95.0, "bfcl/bfcl.overall.overall_accuracy": 22.29, "bfcl/bfcl.overall.total_cost_usd": 1.81, "bfcl/bfcl.overall.latency_mean_s": 1.12, "bfcl/bfcl.overall.latency_std_s": 0.45, "bfcl/bfcl.overall.latency_p95_s": 1.79, "bfcl/bfcl.non_live.ast_accuracy": 74.1, "bfcl/bfcl.non_live.simple_ast_accuracy": 70.92, "bfcl/bfcl.non_live.multiple_ast_accuracy": 87.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 75.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 62.5, "bfcl/bfcl.live.live_accuracy": 66.32, "bfcl/bfcl.live.live_simple_ast_accuracy": 72.09, "bfcl/bfcl.live.live_multiple_ast_accuracy": 64.96, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 54.17, "bfcl/bfcl.multi_turn.accuracy": 1.38, "bfcl/bfcl.multi_turn.base_accuracy": 1.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 1.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 2.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 1.0, "bfcl/bfcl.web_search.accuracy": 1.5, "bfcl/bfcl.web_search.base_accuracy": 3.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 2.37, "bfcl/bfcl.memory.kv_accuracy": 1.94, "bfcl/bfcl.memory.vector_accuracy": 2.58, "bfcl/bfcl.memory.recursive_summarization_accuracy": 2.58, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 70.65 } }, { "id": "amazon/amazon-nova-pro-v1-0-fc", "name": "Amazon-Nova-Pro-v1:0 (FC)", "developer": "amazon", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 88.0, "bfcl/bfcl.overall.overall_accuracy": 24.97, "bfcl/bfcl.overall.total_cost_usd": 48.44, "bfcl/bfcl.overall.latency_mean_s": 2.25, "bfcl/bfcl.overall.latency_std_s": 1.91, "bfcl/bfcl.overall.latency_p95_s": 3.29, "bfcl/bfcl.non_live.ast_accuracy": 86.58, "bfcl/bfcl.non_live.simple_ast_accuracy": 75.83, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 84.0, "bfcl/bfcl.live.live_accuracy": 78.53, "bfcl/bfcl.live.live_simple_ast_accuracy": 81.4, "bfcl/bfcl.live.live_multiple_ast_accuracy": 77.97, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 1.88, "bfcl/bfcl.multi_turn.base_accuracy": 1.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 2.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 3.0, "bfcl/bfcl.web_search.accuracy": 2.5, "bfcl/bfcl.web_search.base_accuracy": 4.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 1.0, "bfcl/bfcl.memory.accuracy": 1.94, "bfcl/bfcl.memory.kv_accuracy": 2.58, "bfcl/bfcl.memory.vector_accuracy": 1.29, "bfcl/bfcl.memory.recursive_summarization_accuracy": 1.94, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 70.06 } }, { "id": "amazon/MegaBeam-Mistral-7B-300k", "name": "MegaBeam-Mistral-7B-300k", "developer": "amazon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5203, "hfopenllm_v2/BBH": 0.4228, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.398, "hfopenllm_v2/MMLU-PRO": 0.2549 } }, { "id": "amazon/nova-lite-v1:0", "name": "Amazon Nova Lite", "developer": "amazon", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.551, "helm_capabilities/MMLU-Pro": 0.6, "helm_capabilities/GPQA": 0.397, "helm_capabilities/IFEval": 0.776, "helm_capabilities/WildBench": 0.75, "helm_capabilities/Omni-MATH": 0.233, "helm_lite/Mean win rate": 0.708, "helm_lite/NarrativeQA": 0.768, "helm_lite/NaturalQuestions (closed-book)": 0.352, "helm_lite/OpenbookQA": 0.928, "helm_lite/MMLU": 0.693, "helm_lite/MATH": 0.779, "helm_lite/GSM8K": 0.829, "helm_lite/LegalBench": 0.659, "helm_lite/MedQA": 0.696, "helm_lite/WMT 2014": 0.204, "helm_mmlu/MMLU All Subjects": 0.77, "helm_mmlu/Abstract Algebra": 0.52, "helm_mmlu/Anatomy": 0.719, "helm_mmlu/College Physics": 0.608, "helm_mmlu/Computer Security": 0.79, "helm_mmlu/Econometrics": 0.675, "helm_mmlu/Global Facts": 0.55, "helm_mmlu/Jurisprudence": 0.852, "helm_mmlu/Philosophy": 0.817, "helm_mmlu/Professional Psychology": 0.812, "helm_mmlu/Us Foreign Policy": 0.92, "helm_mmlu/Astronomy": 0.862, "helm_mmlu/Business Ethics": 0.73, "helm_mmlu/Clinical Knowledge": 0.8, "helm_mmlu/Conceptual Physics": 0.796, "helm_mmlu/Electrical Engineering": 0.779, "helm_mmlu/Elementary Mathematics": 0.757, "helm_mmlu/Formal Logic": 0.643, "helm_mmlu/High School World History": 0.886, "helm_mmlu/Human Sexuality": 0.84, "helm_mmlu/International Law": 0.843, "helm_mmlu/Logical Fallacies": 0.81, "helm_mmlu/Machine Learning": 0.509, "helm_mmlu/Management": 0.864, "helm_mmlu/Marketing": 0.889, "helm_mmlu/Medical Genetics": 0.9, "helm_mmlu/Miscellaneous": 0.872, "helm_mmlu/Moral Scenarios": 0.694, "helm_mmlu/Nutrition": 0.788, "helm_mmlu/Prehistory": 0.849, "helm_mmlu/Public Relations": 0.682, "helm_mmlu/Security Studies": 0.788, "helm_mmlu/Sociology": 0.896, "helm_mmlu/Virology": 0.542, "helm_mmlu/World Religions": 0.871, "helm_mmlu/Mean win rate": 0.987 } }, { "id": "amazon/nova-micro-v1:0", "name": "Amazon Nova Micro", "developer": "amazon", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.522, "helm_capabilities/MMLU-Pro": 0.511, "helm_capabilities/GPQA": 0.383, "helm_capabilities/IFEval": 0.76, "helm_capabilities/WildBench": 0.743, "helm_capabilities/Omni-MATH": 0.214, "helm_lite/Mean win rate": 0.524, "helm_lite/NarrativeQA": 0.744, "helm_lite/NaturalQuestions (closed-book)": 0.285, "helm_lite/OpenbookQA": 0.888, "helm_lite/MMLU": 0.64, "helm_lite/MATH": 0.76, "helm_lite/GSM8K": 0.794, "helm_lite/LegalBench": 0.615, "helm_lite/MedQA": 0.608, "helm_lite/WMT 2014": 0.192, "helm_mmlu/MMLU All Subjects": 0.708, "helm_mmlu/Abstract Algebra": 0.42, "helm_mmlu/Anatomy": 0.726, "helm_mmlu/College Physics": 0.5, "helm_mmlu/Computer Security": 0.77, "helm_mmlu/Econometrics": 0.57, "helm_mmlu/Global Facts": 0.44, "helm_mmlu/Jurisprudence": 0.815, "helm_mmlu/Philosophy": 0.733, "helm_mmlu/Professional Psychology": 0.739, "helm_mmlu/Us Foreign Policy": 0.9, "helm_mmlu/Astronomy": 0.822, "helm_mmlu/Business Ethics": 0.71, "helm_mmlu/Clinical Knowledge": 0.751, "helm_mmlu/Conceptual Physics": 0.706, "helm_mmlu/Electrical Engineering": 0.683, "helm_mmlu/Elementary Mathematics": 0.55, "helm_mmlu/Formal Logic": 0.508, "helm_mmlu/High School World History": 0.84, "helm_mmlu/Human Sexuality": 0.824, "helm_mmlu/International Law": 0.843, "helm_mmlu/Logical Fallacies": 0.798, "helm_mmlu/Machine Learning": 0.562, "helm_mmlu/Management": 0.816, "helm_mmlu/Marketing": 0.91, "helm_mmlu/Medical Genetics": 0.82, "helm_mmlu/Miscellaneous": 0.83, "helm_mmlu/Moral Scenarios": 0.464, "helm_mmlu/Nutrition": 0.778, "helm_mmlu/Prehistory": 0.787, "helm_mmlu/Public Relations": 0.673, "helm_mmlu/Security Studies": 0.718, "helm_mmlu/Sociology": 0.846, "helm_mmlu/Virology": 0.524, "helm_mmlu/World Religions": 0.825, "helm_mmlu/Mean win rate": 1.0 } }, { "id": "amazon/nova-premier-v1:0", "name": "Amazon Nova Premier", "developer": "amazon", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.637, "helm_capabilities/MMLU-Pro": 0.726, "helm_capabilities/GPQA": 0.518, "helm_capabilities/IFEval": 0.803, "helm_capabilities/WildBench": 0.788, "helm_capabilities/Omni-MATH": 0.35 } }, { "id": "amazon/nova-pro-v1:0", "name": "Amazon Nova Pro", "developer": "amazon", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.591, "helm_capabilities/MMLU-Pro": 0.673, "helm_capabilities/GPQA": 0.446, "helm_capabilities/IFEval": 0.815, "helm_capabilities/WildBench": 0.777, "helm_capabilities/Omni-MATH": 0.242, "helm_lite/Mean win rate": 0.885, "helm_lite/NarrativeQA": 0.791, "helm_lite/NaturalQuestions (closed-book)": 0.405, "helm_lite/OpenbookQA": 0.96, "helm_lite/MMLU": 0.758, "helm_lite/MATH": 0.821, "helm_lite/GSM8K": 0.87, "helm_lite/LegalBench": 0.736, "helm_lite/MedQA": 0.811, "helm_lite/WMT 2014": 0.229, "helm_mmlu/MMLU All Subjects": 0.82, "helm_mmlu/Abstract Algebra": 0.69, "helm_mmlu/Anatomy": 0.807, "helm_mmlu/College Physics": 0.647, "helm_mmlu/Computer Security": 0.84, "helm_mmlu/Econometrics": 0.702, "helm_mmlu/Global Facts": 0.54, "helm_mmlu/Jurisprudence": 0.861, "helm_mmlu/Philosophy": 0.826, "helm_mmlu/Professional Psychology": 0.864, "helm_mmlu/Us Foreign Policy": 0.93, "helm_mmlu/Astronomy": 0.895, "helm_mmlu/Business Ethics": 0.81, "helm_mmlu/Clinical Knowledge": 0.875, "helm_mmlu/Conceptual Physics": 0.851, "helm_mmlu/Electrical Engineering": 0.8, "helm_mmlu/Elementary Mathematics": 0.831, "helm_mmlu/Formal Logic": 0.714, "helm_mmlu/High School World History": 0.928, "helm_mmlu/Human Sexuality": 0.885, "helm_mmlu/International Law": 0.901, "helm_mmlu/Logical Fallacies": 0.871, "helm_mmlu/Machine Learning": 0.625, "helm_mmlu/Management": 0.922, "helm_mmlu/Marketing": 0.923, "helm_mmlu/Medical Genetics": 0.87, "helm_mmlu/Miscellaneous": 0.912, "helm_mmlu/Moral Scenarios": 0.76, "helm_mmlu/Nutrition": 0.866, "helm_mmlu/Prehistory": 0.926, "helm_mmlu/Public Relations": 0.8, "helm_mmlu/Security Studies": 0.849, "helm_mmlu/Sociology": 0.905, "helm_mmlu/Virology": 0.59, "helm_mmlu/World Religions": 0.877, "helm_mmlu/Mean win rate": 0.975 } }, { "id": "amd/AMD-Llama-135m", "name": "AMD-Llama-135m", "developer": "amd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1842, "hfopenllm_v2/BBH": 0.2974, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.378, "hfopenllm_v2/MMLU-PRO": 0.1169 } }, { "id": "Amu/t1-1.5B", "name": "t1-1.5B", "developer": "Amu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3394, "hfopenllm_v2/BBH": 0.4008, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.2433, "hfopenllm_v2/MUSR": 0.3517, "hfopenllm_v2/MMLU-PRO": 0.2566 } }, { "id": "Amu/t1-3B", "name": "t1-3B", "developer": "Amu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3328, "hfopenllm_v2/BBH": 0.3999, "hfopenllm_v2/MATH Level 5": 0.1375, "hfopenllm_v2/GPQA": 0.2408, "hfopenllm_v2/MUSR": 0.3435, "hfopenllm_v2/MMLU-PRO": 0.1284 } }, { "id": "anakin87/gemma-2b-orpo", "name": "gemma-2b-orpo", "developer": "anakin87", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2478, "hfopenllm_v2/BBH": 0.3426, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3728, "hfopenllm_v2/MMLU-PRO": 0.1306 } }, { "id": "anthracite-org/magnum-v1-72b", "name": "magnum-v1-72b", "developer": "anthracite-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7606, "hfopenllm_v2/BBH": 0.6982, "hfopenllm_v2/MATH Level 5": 0.398, "hfopenllm_v2/GPQA": 0.3909, "hfopenllm_v2/MUSR": 0.4489, "hfopenllm_v2/MMLU-PRO": 0.5486 } }, { "id": "anthracite-org/magnum-v2-12b", "name": "magnum-v2-12b", "developer": "anthracite-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3762, "hfopenllm_v2/BBH": 0.5021, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4179, "hfopenllm_v2/MMLU-PRO": 0.3167 } }, { "id": "anthracite-org/magnum-v2-72b", "name": "magnum-v2-72b", "developer": "anthracite-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.756, "hfopenllm_v2/BBH": 0.7005, "hfopenllm_v2/MATH Level 5": 0.3542, "hfopenllm_v2/GPQA": 0.3859, "hfopenllm_v2/MUSR": 0.4372, "hfopenllm_v2/MMLU-PRO": 0.5456 } }, { "id": "anthracite-org/magnum-v2.5-12b-kto", "name": "magnum-v2.5-12b-kto", "developer": "anthracite-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3866, "hfopenllm_v2/BBH": 0.5077, "hfopenllm_v2/MATH Level 5": 0.0521, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4086, "hfopenllm_v2/MMLU-PRO": 0.3215 } }, { "id": "anthracite-org/magnum-v3-27b-kto", "name": "magnum-v3-27b-kto", "developer": "anthracite-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5675, "hfopenllm_v2/BBH": 0.586, "hfopenllm_v2/MATH Level 5": 0.1813, "hfopenllm_v2/GPQA": 0.3557, "hfopenllm_v2/MUSR": 0.3855, "hfopenllm_v2/MMLU-PRO": 0.4238 } }, { "id": "anthracite-org/magnum-v3-34b", "name": "magnum-v3-34b", "developer": "anthracite-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5115, "hfopenllm_v2/BBH": 0.6088, "hfopenllm_v2/MATH Level 5": 0.1949, "hfopenllm_v2/GPQA": 0.3607, "hfopenllm_v2/MUSR": 0.3872, "hfopenllm_v2/MMLU-PRO": 0.4752 } }, { "id": "anthracite-org/magnum-v3-9b-chatml", "name": "magnum-v3-9b-chatml", "developer": "anthracite-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1275, "hfopenllm_v2/BBH": 0.5428, "hfopenllm_v2/MATH Level 5": 0.0695, "hfopenllm_v2/GPQA": 0.3456, "hfopenllm_v2/MUSR": 0.4432, "hfopenllm_v2/MMLU-PRO": 0.4242 } }, { "id": "anthracite-org/magnum-v3-9b-customgemma2", "name": "magnum-v3-9b-customgemma2", "developer": "anthracite-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1273, "hfopenllm_v2/BBH": 0.534, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4565, "hfopenllm_v2/MMLU-PRO": 0.4205 } }, { "id": "anthracite-org/magnum-v4-12b", "name": "magnum-v4-12b", "developer": "anthracite-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3393, "hfopenllm_v2/BBH": 0.5177, "hfopenllm_v2/MATH Level 5": 0.1178, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4093, "hfopenllm_v2/MMLU-PRO": 0.3604 } }, { "id": "anthracite-org/magnum-v4-22b", "name": "magnum-v4-22b", "developer": "anthracite-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5629, "hfopenllm_v2/BBH": 0.5486, "hfopenllm_v2/MATH Level 5": 0.2002, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4408, "hfopenllm_v2/MMLU-PRO": 0.383 } }, { "id": "anthracite-org/magnum-v4-27b", "name": "magnum-v4-27b", "developer": "anthracite-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3454, "hfopenllm_v2/BBH": 0.5867, "hfopenllm_v2/MATH Level 5": 0.1798, "hfopenllm_v2/GPQA": 0.37, "hfopenllm_v2/MUSR": 0.438, "hfopenllm_v2/MMLU-PRO": 0.4376 } }, { "id": "anthracite-org/magnum-v4-9b", "name": "magnum-v4-9b", "developer": "anthracite-org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3503, "hfopenllm_v2/BBH": 0.5336, "hfopenllm_v2/MATH Level 5": 0.1307, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4516, "hfopenllm_v2/MMLU-PRO": 0.3953 } }, { "id": "Anthropic-LM-v4-s3-52B", "name": "Anthropic-LM v4-s3 52B", "developer": "unknown", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.78, "helm_classic/MMLU": 0.481, "helm_classic/BoolQ": 0.815, "helm_classic/NarrativeQA": 0.728, "helm_classic/NaturalQuestions (open-book)": 0.686, "helm_classic/QuAC": 0.431, "helm_classic/HellaSwag": 0.807, "helm_classic/OpenbookQA": 0.558, "helm_classic/TruthfulQA": 0.368, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": 0.154, "helm_classic/XSUM": 0.134, "helm_classic/IMDB": 0.934, "helm_classic/CivilComments": 0.61, "helm_classic/RAFT": 0.699 } }, { "id": "anthropic/claude-2.0", "name": "Claude 2.0", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.489, "helm_lite/NarrativeQA": 0.718, "helm_lite/NaturalQuestions (closed-book)": 0.428, "helm_lite/OpenbookQA": 0.862, "helm_lite/MMLU": 0.639, "helm_lite/MATH": 0.603, "helm_lite/GSM8K": 0.583, "helm_lite/LegalBench": 0.643, "helm_lite/MedQA": 0.652, "helm_lite/WMT 2014": 0.219 } }, { "id": "anthropic/claude-2.1", "name": "Claude 2.1", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.437, "helm_lite/NarrativeQA": 0.677, "helm_lite/NaturalQuestions (closed-book)": 0.375, "helm_lite/OpenbookQA": 0.872, "helm_lite/MMLU": 0.643, "helm_lite/MATH": 0.632, "helm_lite/GSM8K": 0.604, "helm_lite/LegalBench": 0.643, "helm_lite/MedQA": 0.644, "helm_lite/WMT 2014": 0.204, "helm_mmlu/MMLU All Subjects": 0.735, "helm_mmlu/Abstract Algebra": 0.4, "helm_mmlu/Anatomy": 0.726, "helm_mmlu/College Physics": 0.5, "helm_mmlu/Computer Security": 0.81, "helm_mmlu/Econometrics": 0.596, "helm_mmlu/Global Facts": 0.55, "helm_mmlu/Jurisprudence": 0.87, "helm_mmlu/Philosophy": 0.794, "helm_mmlu/Professional Psychology": 0.797, "helm_mmlu/Us Foreign Policy": 0.92, "helm_mmlu/Astronomy": 0.855, "helm_mmlu/Business Ethics": 0.73, "helm_mmlu/Clinical Knowledge": 0.785, "helm_mmlu/Conceptual Physics": 0.766, "helm_mmlu/Electrical Engineering": 0.724, "helm_mmlu/Elementary Mathematics": 0.521, "helm_mmlu/Formal Logic": 0.5, "helm_mmlu/High School World History": 0.903, "helm_mmlu/Human Sexuality": 0.847, "helm_mmlu/International Law": 0.901, "helm_mmlu/Logical Fallacies": 0.834, "helm_mmlu/Machine Learning": 0.482, "helm_mmlu/Management": 0.825, "helm_mmlu/Marketing": 0.923, "helm_mmlu/Medical Genetics": 0.81, "helm_mmlu/Miscellaneous": 0.88, "helm_mmlu/Moral Scenarios": 0.52, "helm_mmlu/Nutrition": 0.781, "helm_mmlu/Prehistory": 0.821, "helm_mmlu/Public Relations": 0.773, "helm_mmlu/Security Studies": 0.812, "helm_mmlu/Sociology": 0.886, "helm_mmlu/Virology": 0.554, "helm_mmlu/World Religions": 0.854, "helm_mmlu/Mean win rate": 0.048 } }, { "id": "anthropic/claude-3-5-haiku-20241022", "name": "Claude 3.5 Haiku 20241022", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.6114, "global-mmlu-lite/Culturally Sensitive": 0.5834, "global-mmlu-lite/Culturally Agnostic": 0.6394, "global-mmlu-lite/Arabic": 0.695, "global-mmlu-lite/English": 0.485, "global-mmlu-lite/Bengali": 0.675, "global-mmlu-lite/German": 0.565, "global-mmlu-lite/French": 0.61, "global-mmlu-lite/Hindi": 0.6575, "global-mmlu-lite/Indonesian": 0.5475, "global-mmlu-lite/Italian": 0.48, "global-mmlu-lite/Japanese": 0.655, "global-mmlu-lite/Korean": 0.6575, "global-mmlu-lite/Portuguese": 0.5225, "global-mmlu-lite/Spanish": 0.485, "global-mmlu-lite/Swahili": 0.69, "global-mmlu-lite/Yoruba": 0.6675, "global-mmlu-lite/Chinese": 0.69, "global-mmlu-lite/Burmese": 0.7, "helm_capabilities/Mean score": 0.549, "helm_capabilities/MMLU-Pro": 0.605, "helm_capabilities/GPQA": 0.363, "helm_capabilities/IFEval": 0.792, "helm_capabilities/WildBench": 0.76, "helm_capabilities/Omni-MATH": 0.224, "helm_lite/Mean win rate": 0.531, "helm_lite/NarrativeQA": 0.763, "helm_lite/NaturalQuestions (closed-book)": 0.344, "helm_lite/OpenbookQA": 0.854, "helm_lite/MMLU": 0.671, "helm_lite/MATH": 0.872, "helm_lite/GSM8K": 0.815, "helm_lite/LegalBench": 0.631, "helm_lite/MedQA": 0.722, "helm_lite/WMT 2014": 0.135, "helm_mmlu/MMLU All Subjects": 0.743, "helm_mmlu/Abstract Algebra": 0.47, "helm_mmlu/Anatomy": 0.793, "helm_mmlu/College Physics": 0.52, "helm_mmlu/Computer Security": 0.84, "helm_mmlu/Econometrics": 0.596, "helm_mmlu/Global Facts": 0.5, "helm_mmlu/Jurisprudence": 0.861, "helm_mmlu/Philosophy": 0.823, "helm_mmlu/Professional Psychology": 0.825, "helm_mmlu/Us Foreign Policy": 0.94, "helm_mmlu/Astronomy": 0.829, "helm_mmlu/Business Ethics": 0.8, "helm_mmlu/Clinical Knowledge": 0.823, "helm_mmlu/Conceptual Physics": 0.723, "helm_mmlu/Electrical Engineering": 0.717, "helm_mmlu/Elementary Mathematics": 0.561, "helm_mmlu/Formal Logic": 0.619, "helm_mmlu/High School World History": 0.882, "helm_mmlu/Human Sexuality": 0.885, "helm_mmlu/International Law": 0.884, "helm_mmlu/Logical Fallacies": 0.822, "helm_mmlu/Machine Learning": 0.518, "helm_mmlu/Management": 0.845, "helm_mmlu/Marketing": 0.897, "helm_mmlu/Medical Genetics": 0.83, "helm_mmlu/Miscellaneous": 0.905, "helm_mmlu/Moral Scenarios": 0.476, "helm_mmlu/Nutrition": 0.846, "helm_mmlu/Prehistory": 0.877, "helm_mmlu/Public Relations": 0.727, "helm_mmlu/Security Studies": 0.792, "helm_mmlu/Sociology": 0.905, "helm_mmlu/Virology": 0.566, "helm_mmlu/World Religions": 0.865, "helm_mmlu/Mean win rate": 0.128 } }, { "id": "anthropic/claude-3-5-sonnet-20240620", "name": "Claude 3.5 Sonnet 20240620", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.885, "helm_lite/NarrativeQA": 0.746, "helm_lite/NaturalQuestions (closed-book)": 0.502, "helm_lite/OpenbookQA": 0.972, "helm_lite/MMLU": 0.799, "helm_lite/MATH": 0.813, "helm_lite/GSM8K": 0.949, "helm_lite/LegalBench": 0.707, "helm_lite/MedQA": 0.825, "helm_lite/WMT 2014": 0.229, "helm_mmlu/MMLU All Subjects": 0.865, "helm_mmlu/Abstract Algebra": 0.75, "helm_mmlu/Anatomy": 0.844, "helm_mmlu/College Physics": 0.696, "helm_mmlu/Computer Security": 0.89, "helm_mmlu/Econometrics": 0.807, "helm_mmlu/Global Facts": 0.72, "helm_mmlu/Jurisprudence": 0.889, "helm_mmlu/Philosophy": 0.891, "helm_mmlu/Professional Psychology": 0.922, "helm_mmlu/Us Foreign Policy": 0.96, "helm_mmlu/Astronomy": 0.961, "helm_mmlu/Business Ethics": 0.85, "helm_mmlu/Clinical Knowledge": 0.913, "helm_mmlu/Conceptual Physics": 0.885, "helm_mmlu/Electrical Engineering": 0.828, "helm_mmlu/Elementary Mathematics": 0.892, "helm_mmlu/Formal Logic": 0.698, "helm_mmlu/High School World History": 0.954, "helm_mmlu/Human Sexuality": 0.939, "helm_mmlu/International Law": 0.959, "helm_mmlu/Logical Fallacies": 0.926, "helm_mmlu/Machine Learning": 0.786, "helm_mmlu/Management": 0.942, "helm_mmlu/Marketing": 0.949, "helm_mmlu/Medical Genetics": 0.98, "helm_mmlu/Miscellaneous": 0.962, "helm_mmlu/Moral Scenarios": 0.882, "helm_mmlu/Nutrition": 0.912, "helm_mmlu/Prehistory": 0.951, "helm_mmlu/Public Relations": 0.855, "helm_mmlu/Security Studies": 0.878, "helm_mmlu/Sociology": 0.96, "helm_mmlu/Virology": 0.602, "helm_mmlu/World Religions": 0.924, "helm_mmlu/Mean win rate": 0.17, "reward-bench/Score": 0.6466, "reward-bench/Chat": 0.9637, "reward-bench/Chat Hard": 0.7401, "reward-bench/Safety": 0.8519, "reward-bench/Reasoning": 0.8469, "reward-bench/Factuality": 0.5284, "reward-bench/Precise IF": 0.3875, "reward-bench/Math": 0.5683, "reward-bench/Focus": 0.8697, "reward-bench/Ties": 0.674 } }, { "id": "anthropic/claude-3-5-sonnet-20241022", "name": "Claude 3.5 Sonnet 20241022", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.653, "helm_capabilities/MMLU-Pro": 0.777, "helm_capabilities/GPQA": 0.565, "helm_capabilities/IFEval": 0.856, "helm_capabilities/WildBench": 0.792, "helm_capabilities/Omni-MATH": 0.276, "helm_lite/Mean win rate": 0.846, "helm_lite/NarrativeQA": 0.77, "helm_lite/NaturalQuestions (closed-book)": 0.467, "helm_lite/OpenbookQA": 0.966, "helm_lite/MMLU": 0.809, "helm_lite/MATH": 0.904, "helm_lite/GSM8K": 0.956, "helm_lite/LegalBench": 0.647, "helm_lite/MedQA": 0.859, "helm_lite/WMT 2014": 0.226, "helm_mmlu/MMLU All Subjects": 0.873, "helm_mmlu/Abstract Algebra": 0.78, "helm_mmlu/Anatomy": 0.859, "helm_mmlu/College Physics": 0.775, "helm_mmlu/Computer Security": 0.87, "helm_mmlu/Econometrics": 0.807, "helm_mmlu/Global Facts": 0.8, "helm_mmlu/Jurisprudence": 0.898, "helm_mmlu/Philosophy": 0.891, "helm_mmlu/Professional Psychology": 0.922, "helm_mmlu/Us Foreign Policy": 0.96, "helm_mmlu/Astronomy": 0.974, "helm_mmlu/Business Ethics": 0.83, "helm_mmlu/Clinical Knowledge": 0.928, "helm_mmlu/Conceptual Physics": 0.906, "helm_mmlu/Electrical Engineering": 0.848, "helm_mmlu/Elementary Mathematics": 0.918, "helm_mmlu/Formal Logic": 0.786, "helm_mmlu/High School World History": 0.958, "helm_mmlu/Human Sexuality": 0.939, "helm_mmlu/International Law": 0.959, "helm_mmlu/Logical Fallacies": 0.914, "helm_mmlu/Machine Learning": 0.839, "helm_mmlu/Management": 0.932, "helm_mmlu/Marketing": 0.953, "helm_mmlu/Medical Genetics": 0.96, "helm_mmlu/Miscellaneous": 0.964, "helm_mmlu/Moral Scenarios": 0.888, "helm_mmlu/Nutrition": 0.922, "helm_mmlu/Prehistory": 0.941, "helm_mmlu/Public Relations": 0.8, "helm_mmlu/Security Studies": 0.882, "helm_mmlu/Sociology": 0.955, "helm_mmlu/Virology": 0.584, "helm_mmlu/World Religions": 0.901, "helm_mmlu/Mean win rate": 0.311 } }, { "id": "anthropic/claude-3-7-sonnet-20250219", "name": "claude-3-7-sonnet-20250219", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.8078, "global-mmlu-lite/Culturally Sensitive": 0.7794, "global-mmlu-lite/Culturally Agnostic": 0.8362, "global-mmlu-lite/Arabic": 0.7925, "global-mmlu-lite/English": 0.7625, "global-mmlu-lite/Bengali": 0.825, "global-mmlu-lite/German": 0.8125, "global-mmlu-lite/French": 0.7675, "global-mmlu-lite/Hindi": 0.805, "global-mmlu-lite/Indonesian": 0.8175, "global-mmlu-lite/Italian": 0.8225, "global-mmlu-lite/Japanese": 0.8425, "global-mmlu-lite/Korean": 0.83, "global-mmlu-lite/Portuguese": 0.77, "global-mmlu-lite/Spanish": 0.8075, "global-mmlu-lite/Swahili": 0.8125, "global-mmlu-lite/Yoruba": 0.81, "global-mmlu-lite/Chinese": 0.835, "global-mmlu-lite/Burmese": 0.8125, "helm_capabilities/Mean score": 0.674, "helm_capabilities/MMLU-Pro": 0.784, "helm_capabilities/GPQA": 0.608, "helm_capabilities/IFEval": 0.834, "helm_capabilities/WildBench": 0.814, "helm_capabilities/Omni-MATH": 0.33, "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.0, "livecodebenchpro/Easy Problems": 0.28169014084507044, "reward-bench/Score": 0.7539, "reward-bench/Factuality": 0.7326, "reward-bench/Precise IF": 0.5437, "reward-bench/Math": 0.75, "reward-bench/Safety": 0.9033, "reward-bench/Focus": 0.9212, "reward-bench/Ties": 0.6723 } }, { "id": "anthropic/claude-3-haiku-20240307", "name": "Claude 3 Haiku 20240307", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.263, "helm_lite/NarrativeQA": 0.244, "helm_lite/NaturalQuestions (closed-book)": 0.144, "helm_lite/OpenbookQA": 0.838, "helm_lite/MMLU": 0.662, "helm_lite/MATH": 0.131, "helm_lite/GSM8K": 0.699, "helm_lite/LegalBench": 0.46, "helm_lite/MedQA": 0.702, "helm_lite/WMT 2014": 0.148, "helm_mmlu/MMLU All Subjects": 0.738, "helm_mmlu/Abstract Algebra": 0.42, "helm_mmlu/Anatomy": 0.711, "helm_mmlu/College Physics": 0.48, "helm_mmlu/Computer Security": 0.79, "helm_mmlu/Econometrics": 0.632, "helm_mmlu/Global Facts": 0.47, "helm_mmlu/Jurisprudence": 0.861, "helm_mmlu/Philosophy": 0.814, "helm_mmlu/Professional Psychology": 0.802, "helm_mmlu/Us Foreign Policy": 0.95, "helm_mmlu/Astronomy": 0.901, "helm_mmlu/Business Ethics": 0.78, "helm_mmlu/Clinical Knowledge": 0.789, "helm_mmlu/Conceptual Physics": 0.715, "helm_mmlu/Electrical Engineering": 0.69, "helm_mmlu/Elementary Mathematics": 0.558, "helm_mmlu/Formal Logic": 0.579, "helm_mmlu/High School World History": 0.878, "helm_mmlu/Human Sexuality": 0.824, "helm_mmlu/International Law": 0.901, "helm_mmlu/Logical Fallacies": 0.791, "helm_mmlu/Machine Learning": 0.589, "helm_mmlu/Management": 0.874, "helm_mmlu/Marketing": 0.91, "helm_mmlu/Medical Genetics": 0.8, "helm_mmlu/Miscellaneous": 0.893, "helm_mmlu/Moral Scenarios": 0.502, "helm_mmlu/Nutrition": 0.83, "helm_mmlu/Prehistory": 0.824, "helm_mmlu/Public Relations": 0.755, "helm_mmlu/Security Studies": 0.808, "helm_mmlu/Sociology": 0.9, "helm_mmlu/Virology": 0.542, "helm_mmlu/World Religions": 0.871, "helm_mmlu/Mean win rate": 0.28, "reward-bench/Score": 0.3711, "reward-bench/Chat": 0.9274, "reward-bench/Chat Hard": 0.5197, "reward-bench/Safety": 0.595, "reward-bench/Reasoning": 0.706, "reward-bench/Prior Sets (0.5 weight)": 0.6635, "reward-bench/Factuality": 0.4042, "reward-bench/Precise IF": 0.2812, "reward-bench/Math": 0.3552, "reward-bench/Focus": 0.501, "reward-bench/Ties": 0.0899 } }, { "id": "anthropic/claude-3-opus-20240229", "name": "Claude 3 Opus 20240229", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.683, "helm_lite/NarrativeQA": 0.351, "helm_lite/NaturalQuestions (closed-book)": 0.441, "helm_lite/OpenbookQA": 0.956, "helm_lite/MMLU": 0.768, "helm_lite/MATH": 0.76, "helm_lite/GSM8K": 0.924, "helm_lite/LegalBench": 0.662, "helm_lite/MedQA": 0.775, "helm_lite/WMT 2014": 0.24, "helm_mmlu/MMLU All Subjects": 0.846, "helm_mmlu/Abstract Algebra": 0.64, "helm_mmlu/Anatomy": 0.8, "helm_mmlu/College Physics": 0.716, "helm_mmlu/Computer Security": 0.85, "helm_mmlu/Econometrics": 0.789, "helm_mmlu/Global Facts": 0.66, "helm_mmlu/Jurisprudence": 0.88, "helm_mmlu/Philosophy": 0.9, "helm_mmlu/Professional Psychology": 0.904, "helm_mmlu/Us Foreign Policy": 0.96, "helm_mmlu/Astronomy": 0.967, "helm_mmlu/Business Ethics": 0.86, "helm_mmlu/Clinical Knowledge": 0.879, "helm_mmlu/Conceptual Physics": 0.881, "helm_mmlu/Electrical Engineering": 0.814, "helm_mmlu/Elementary Mathematics": 0.862, "helm_mmlu/Formal Logic": 0.698, "helm_mmlu/High School World History": 0.941, "helm_mmlu/Human Sexuality": 0.908, "helm_mmlu/International Law": 0.901, "helm_mmlu/Logical Fallacies": 0.896, "helm_mmlu/Machine Learning": 0.741, "helm_mmlu/Management": 0.942, "helm_mmlu/Marketing": 0.944, "helm_mmlu/Medical Genetics": 0.93, "helm_mmlu/Miscellaneous": 0.951, "helm_mmlu/Moral Scenarios": 0.826, "helm_mmlu/Nutrition": 0.925, "helm_mmlu/Prehistory": 0.941, "helm_mmlu/Public Relations": 0.827, "helm_mmlu/Security Studies": 0.886, "helm_mmlu/Sociology": 0.94, "helm_mmlu/Virology": 0.578, "helm_mmlu/World Religions": 0.901, "helm_mmlu/Mean win rate": 0.014, "reward-bench/Score": 0.5744, "reward-bench/Chat": 0.9469, "reward-bench/Chat Hard": 0.6031, "reward-bench/Safety": 0.8378, "reward-bench/Reasoning": 0.7868, "reward-bench/Factuality": 0.5389, "reward-bench/Precise IF": 0.3312, "reward-bench/Math": 0.5137, "reward-bench/Focus": 0.6646, "reward-bench/Ties": 0.5601 } }, { "id": "anthropic/claude-3-sonnet-20240229", "name": "Claude 3 Sonnet 20240229", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.377, "helm_lite/NarrativeQA": 0.111, "helm_lite/NaturalQuestions (closed-book)": 0.028, "helm_lite/OpenbookQA": 0.918, "helm_lite/MMLU": 0.652, "helm_lite/MATH": 0.084, "helm_lite/GSM8K": 0.907, "helm_lite/LegalBench": 0.49, "helm_lite/MedQA": 0.684, "helm_lite/WMT 2014": 0.218, "helm_mmlu/MMLU All Subjects": 0.759, "helm_mmlu/Abstract Algebra": 0.39, "helm_mmlu/Anatomy": 0.711, "helm_mmlu/College Physics": 0.559, "helm_mmlu/Computer Security": 0.79, "helm_mmlu/Econometrics": 0.64, "helm_mmlu/Global Facts": 0.53, "helm_mmlu/Jurisprudence": 0.861, "helm_mmlu/Philosophy": 0.852, "helm_mmlu/Professional Psychology": 0.814, "helm_mmlu/Us Foreign Policy": 0.94, "helm_mmlu/Astronomy": 0.855, "helm_mmlu/Business Ethics": 0.82, "helm_mmlu/Clinical Knowledge": 0.804, "helm_mmlu/Conceptual Physics": 0.774, "helm_mmlu/Electrical Engineering": 0.703, "helm_mmlu/Elementary Mathematics": 0.635, "helm_mmlu/Formal Logic": 0.579, "helm_mmlu/High School World History": 0.895, "helm_mmlu/Human Sexuality": 0.809, "helm_mmlu/International Law": 0.909, "helm_mmlu/Logical Fallacies": 0.853, "helm_mmlu/Machine Learning": 0.643, "helm_mmlu/Management": 0.922, "helm_mmlu/Marketing": 0.85, "helm_mmlu/Medical Genetics": 0.79, "helm_mmlu/Miscellaneous": 0.872, "helm_mmlu/Moral Scenarios": 0.626, "helm_mmlu/Nutrition": 0.82, "helm_mmlu/Prehistory": 0.864, "helm_mmlu/Public Relations": 0.782, "helm_mmlu/Security Studies": 0.865, "helm_mmlu/Sociology": 0.905, "helm_mmlu/Virology": 0.578, "helm_mmlu/World Religions": 0.871, "helm_mmlu/Mean win rate": 0.082, "reward-bench/Score": 0.7458, "reward-bench/Chat": 0.9344, "reward-bench/Chat Hard": 0.5658, "reward-bench/Safety": 0.8169, "reward-bench/Reasoning": 0.6907, "reward-bench/Prior Sets (0.5 weight)": 0.6963 } }, { "id": "anthropic/claude-3.7-sonnet", "name": "anthropic/claude-3.7-sonnet", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.014084507042253521, "livecodebenchpro/Easy Problems": 0.15492957746478872 } }, { "id": "anthropic/claude-haiku-4-5-20251001-fc", "name": "Claude-Haiku-4-5-20251001 (FC)", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 6.0, "bfcl/bfcl.overall.overall_accuracy": 68.7, "bfcl/bfcl.overall.total_cost_usd": 14.23, "bfcl/bfcl.overall.latency_mean_s": 1.68, "bfcl/bfcl.overall.latency_std_s": 3.92, "bfcl/bfcl.overall.latency_p95_s": 3.15, "bfcl/bfcl.non_live.ast_accuracy": 86.5, "bfcl/bfcl.non_live.simple_ast_accuracy": 71.0, "bfcl/bfcl.non_live.multiple_ast_accuracy": 94.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 92.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 88.5, "bfcl/bfcl.live.live_accuracy": 78.68, "bfcl/bfcl.live.live_simple_ast_accuracy": 83.72, "bfcl/bfcl.live.live_multiple_ast_accuracy": 77.59, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 75.0, "bfcl/bfcl.multi_turn.accuracy": 53.62, "bfcl/bfcl.multi_turn.base_accuracy": 63.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 42.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 52.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 56.0, "bfcl/bfcl.web_search.accuracy": 83.5, "bfcl/bfcl.web_search.base_accuracy": 86.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 81.0, "bfcl/bfcl.memory.accuracy": 54.41, "bfcl/bfcl.memory.kv_accuracy": 51.61, "bfcl/bfcl.memory.vector_accuracy": 55.48, "bfcl/bfcl.memory.recursive_summarization_accuracy": 56.13, "bfcl/bfcl.relevance.relevance_detection_accuracy": 62.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 85.11 } }, { "id": "anthropic/claude-haiku-4-5-20251001-prompt", "name": "Claude-Haiku-4-5-20251001 (Prompt)", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 87.0, "bfcl/bfcl.overall.overall_accuracy": 25.26, "bfcl/bfcl.overall.total_cost_usd": 45.13, "bfcl/bfcl.overall.latency_mean_s": 3.75, "bfcl/bfcl.overall.latency_std_s": 19.96, "bfcl/bfcl.overall.latency_p95_s": 3.77, "bfcl/bfcl.non_live.ast_accuracy": 55.42, "bfcl/bfcl.non_live.simple_ast_accuracy": 55.67, "bfcl/bfcl.non_live.multiple_ast_accuracy": 84.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 38.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 44.0, "bfcl/bfcl.live.live_accuracy": 52.48, "bfcl/bfcl.live.live_simple_ast_accuracy": 66.67, "bfcl/bfcl.live.live_multiple_ast_accuracy": 49.76, "bfcl/bfcl.live.live_parallel_ast_accuracy": 56.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 16.67, "bfcl/bfcl.multi_turn.accuracy": 1.75, "bfcl/bfcl.multi_turn.base_accuracy": 1.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 4.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 1.5, "bfcl/bfcl.web_search.accuracy": 19.5, "bfcl/bfcl.web_search.base_accuracy": 20.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 19.0, "bfcl/bfcl.memory.accuracy": 2.58, "bfcl/bfcl.memory.kv_accuracy": 2.58, "bfcl/bfcl.memory.vector_accuracy": 1.94, "bfcl/bfcl.memory.recursive_summarization_accuracy": 3.23, "bfcl/bfcl.relevance.relevance_detection_accuracy": 31.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 95.29, "bfcl/bfcl.format_sensitivity.max_delta": 67.5, "bfcl/bfcl.format_sensitivity.stddev": 20.07 } }, { "id": "anthropic/claude-haiku-4.5", "name": "Claude Haiku 4.5", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 35.5 } }, { "id": "anthropic/claude-instant-1.2", "name": "Claude Instant 1.2", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.399, "helm_lite/NarrativeQA": 0.616, "helm_lite/NaturalQuestions (closed-book)": 0.343, "helm_lite/OpenbookQA": 0.844, "helm_lite/MMLU": 0.631, "helm_lite/MATH": 0.499, "helm_lite/GSM8K": 0.721, "helm_lite/LegalBench": 0.586, "helm_lite/MedQA": 0.559, "helm_lite/WMT 2014": 0.194, "helm_mmlu/MMLU All Subjects": 0.688, "helm_mmlu/Abstract Algebra": 0.37, "helm_mmlu/Anatomy": 0.637, "helm_mmlu/College Physics": 0.49, "helm_mmlu/Computer Security": 0.76, "helm_mmlu/Econometrics": 0.614, "helm_mmlu/Global Facts": 0.38, "helm_mmlu/Jurisprudence": 0.833, "helm_mmlu/Philosophy": 0.756, "helm_mmlu/Professional Psychology": 0.724, "helm_mmlu/Us Foreign Policy": 0.9, "helm_mmlu/Astronomy": 0.743, "helm_mmlu/Business Ethics": 0.7, "helm_mmlu/Clinical Knowledge": 0.709, "helm_mmlu/Conceptual Physics": 0.613, "helm_mmlu/Electrical Engineering": 0.641, "helm_mmlu/Elementary Mathematics": 0.45, "helm_mmlu/Formal Logic": 0.444, "helm_mmlu/High School World History": 0.878, "helm_mmlu/Human Sexuality": 0.794, "helm_mmlu/International Law": 0.851, "helm_mmlu/Logical Fallacies": 0.81, "helm_mmlu/Machine Learning": 0.67, "helm_mmlu/Management": 0.835, "helm_mmlu/Marketing": 0.885, "helm_mmlu/Medical Genetics": 0.71, "helm_mmlu/Miscellaneous": 0.828, "helm_mmlu/Moral Scenarios": 0.488, "helm_mmlu/Nutrition": 0.735, "helm_mmlu/Prehistory": 0.762, "helm_mmlu/Public Relations": 0.627, "helm_mmlu/Security Studies": 0.784, "helm_mmlu/Sociology": 0.841, "helm_mmlu/Virology": 0.548, "helm_mmlu/World Religions": 0.784, "helm_mmlu/Mean win rate": 0.186 } }, { "id": "anthropic/claude-opus-4-1-20250805", "name": "claude-opus-4-1-20250805", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.943, "global-mmlu-lite/Culturally Sensitive": 0.9331, "global-mmlu-lite/Culturally Agnostic": 0.9528, "global-mmlu-lite/Arabic": 0.945, "global-mmlu-lite/English": 0.9475, "global-mmlu-lite/Bengali": 0.9425, "global-mmlu-lite/German": 0.94, "global-mmlu-lite/French": 0.945, "global-mmlu-lite/Hindi": 0.9475, "global-mmlu-lite/Indonesian": 0.9425, "global-mmlu-lite/Italian": 0.94, "global-mmlu-lite/Japanese": 0.94, "global-mmlu-lite/Korean": 0.95, "global-mmlu-lite/Portuguese": 0.945, "global-mmlu-lite/Spanish": 0.945, "global-mmlu-lite/Swahili": 0.93, "global-mmlu-lite/Yoruba": 0.9375, "global-mmlu-lite/Chinese": 0.945, "global-mmlu-lite/Burmese": 0.945 } }, { "id": "anthropic/claude-opus-4-20250514", "name": "Claude 4 Opus 20250514", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.757, "helm_capabilities/MMLU-Pro": 0.859, "helm_capabilities/GPQA": 0.666, "helm_capabilities/IFEval": 0.918, "helm_capabilities/WildBench": 0.833, "helm_capabilities/Omni-MATH": 0.511, "reward-bench/Score": 0.7648, "reward-bench/Factuality": 0.8267, "reward-bench/Precise IF": 0.4188, "reward-bench/Math": 0.7491, "reward-bench/Safety": 0.8954, "reward-bench/Focus": 0.8616, "reward-bench/Ties": 0.8375 } }, { "id": "anthropic/claude-opus-4-20250514-thinking-10k", "name": "Claude 4 Opus 20250514, extended thinking", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.78, "helm_capabilities/MMLU-Pro": 0.875, "helm_capabilities/GPQA": 0.709, "helm_capabilities/IFEval": 0.849, "helm_capabilities/WildBench": 0.852, "helm_capabilities/Omni-MATH": 0.616 } }, { "id": "anthropic/claude-opus-4-5", "name": "claude-opus-4-5", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "appworld_test_normal/appworld/test_normal": 0.68, "browsecompplus/browsecompplus": 0.61, "swe-bench/swe-bench": 0.65, "tau-bench-2_airline/tau-bench-2/airline": 0.66, "tau-bench-2_retail/tau-bench-2/retail": 0.78, "tau-bench-2_telecom/tau-bench-2/telecom": 0.84 } }, { "id": "anthropic/claude-opus-4-5-20251101-fc", "name": "Claude-Opus-4-5-20251101 (FC)", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 1.0, "bfcl/bfcl.overall.overall_accuracy": 77.47, "bfcl/bfcl.overall.total_cost_usd": 86.55, "bfcl/bfcl.overall.latency_mean_s": 4.38, "bfcl/bfcl.overall.latency_std_s": 3.13, "bfcl/bfcl.overall.latency_p95_s": 7.56, "bfcl/bfcl.non_live.ast_accuracy": 88.58, "bfcl/bfcl.non_live.simple_ast_accuracy": 76.83, "bfcl/bfcl.non_live.multiple_ast_accuracy": 95.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 88.5, "bfcl/bfcl.live.live_accuracy": 79.79, "bfcl/bfcl.live.live_simple_ast_accuracy": 86.43, "bfcl/bfcl.live.live_multiple_ast_accuracy": 78.16, "bfcl/bfcl.live.live_parallel_ast_accuracy": 87.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 75.0, "bfcl/bfcl.multi_turn.accuracy": 68.38, "bfcl/bfcl.multi_turn.base_accuracy": 81.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 64.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 58.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 70.5, "bfcl/bfcl.web_search.accuracy": 84.5, "bfcl/bfcl.web_search.base_accuracy": 84.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 85.0, "bfcl/bfcl.memory.accuracy": 73.76, "bfcl/bfcl.memory.kv_accuracy": 70.97, "bfcl/bfcl.memory.vector_accuracy": 72.9, "bfcl/bfcl.memory.recursive_summarization_accuracy": 77.42, "bfcl/bfcl.relevance.relevance_detection_accuracy": 62.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 84.72 } }, { "id": "anthropic/claude-opus-4-5-20251101-prompt", "name": "Claude-Opus-4-5-20251101 (Prompt)", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 57.0, "bfcl/bfcl.overall.overall_accuracy": 33.47, "bfcl/bfcl.overall.total_cost_usd": 88.33, "bfcl/bfcl.overall.latency_mean_s": 3.76, "bfcl/bfcl.overall.latency_std_s": 13.19, "bfcl/bfcl.overall.latency_p95_s": 5.52, "bfcl/bfcl.non_live.ast_accuracy": 89.65, "bfcl/bfcl.non_live.simple_ast_accuracy": 79.58, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 93.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 92.5, "bfcl/bfcl.live.live_accuracy": 76.02, "bfcl/bfcl.live.live_simple_ast_accuracy": 84.5, "bfcl/bfcl.live.live_multiple_ast_accuracy": 74.17, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 62.5, "bfcl/bfcl.multi_turn.accuracy": 16.12, "bfcl/bfcl.multi_turn.base_accuracy": 20.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 9.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 21.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 13.5, "bfcl/bfcl.web_search.accuracy": 13.0, "bfcl/bfcl.web_search.base_accuracy": 13.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 13.0, "bfcl/bfcl.memory.accuracy": 1.94, "bfcl/bfcl.memory.kv_accuracy": 1.29, "bfcl/bfcl.memory.vector_accuracy": 1.94, "bfcl/bfcl.memory.recursive_summarization_accuracy": 2.58, "bfcl/bfcl.relevance.relevance_detection_accuracy": 68.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 90.75, "bfcl/bfcl.format_sensitivity.max_delta": 13.0, "bfcl/bfcl.format_sensitivity.stddev": 3.65 } }, { "id": "anthropic/claude-opus-4.1", "name": "Claude Opus 4.1", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 35.1 } }, { "id": "anthropic/claude-opus-4.5", "name": "Claude Opus 4.5", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 63.1 } }, { "id": "anthropic/claude-opus-4.6", "name": "Claude Opus 4.6", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 74.7 } }, { "id": "anthropic/claude-sonnet-4-20250514", "name": "claude-sonnet-4-20250514", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.9058, "global-mmlu-lite/Culturally Sensitive": 0.8913, "global-mmlu-lite/Culturally Agnostic": 0.9203, "global-mmlu-lite/Arabic": 0.9125, "global-mmlu-lite/English": 0.905, "global-mmlu-lite/Bengali": 0.9075, "global-mmlu-lite/German": 0.9125, "global-mmlu-lite/French": 0.91, "global-mmlu-lite/Hindi": 0.9, "global-mmlu-lite/Indonesian": 0.9025, "global-mmlu-lite/Italian": 0.9075, "global-mmlu-lite/Japanese": 0.9, "global-mmlu-lite/Korean": 0.9125, "global-mmlu-lite/Portuguese": 0.91, "global-mmlu-lite/Spanish": 0.9075, "global-mmlu-lite/Swahili": 0.8975, "global-mmlu-lite/Yoruba": 0.8975, "global-mmlu-lite/Chinese": 0.9175, "global-mmlu-lite/Burmese": 0.8925, "helm_capabilities/Mean score": 0.733, "helm_capabilities/MMLU-Pro": 0.843, "helm_capabilities/GPQA": 0.643, "helm_capabilities/IFEval": 0.839, "helm_capabilities/WildBench": 0.825, "helm_capabilities/Omni-MATH": 0.512, "reward-bench/Score": 0.7117, "reward-bench/Factuality": 0.7612, "reward-bench/Precise IF": 0.3594, "reward-bench/Math": 0.7049, "reward-bench/Safety": 0.8909, "reward-bench/Focus": 0.7596, "reward-bench/Ties": 0.7939 } }, { "id": "anthropic/claude-sonnet-4-20250514-thinking-10k", "name": "Claude 4 Sonnet 20250514, extended thinking", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.766, "helm_capabilities/MMLU-Pro": 0.843, "helm_capabilities/GPQA": 0.706, "helm_capabilities/IFEval": 0.84, "helm_capabilities/WildBench": 0.838, "helm_capabilities/Omni-MATH": 0.602 } }, { "id": "anthropic/claude-sonnet-4-5-20250929", "name": "claude-sonnet-4-5-20250929", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.0, "livecodebenchpro/Easy Problems": 0.5352 } }, { "id": "anthropic/claude-sonnet-4-5-20250929-fc", "name": "Claude-Sonnet-4-5-20250929 (FC)", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 2.0, "bfcl/bfcl.overall.overall_accuracy": 73.24, "bfcl/bfcl.overall.total_cost_usd": 43.73, "bfcl/bfcl.overall.latency_mean_s": 4.31, "bfcl/bfcl.overall.latency_std_s": 4.43, "bfcl/bfcl.overall.latency_p95_s": 7.27, "bfcl/bfcl.non_live.ast_accuracy": 88.65, "bfcl/bfcl.non_live.simple_ast_accuracy": 72.58, "bfcl/bfcl.non_live.multiple_ast_accuracy": 95.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 94.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 92.0, "bfcl/bfcl.live.live_accuracy": 81.13, "bfcl/bfcl.live.live_simple_ast_accuracy": 89.53, "bfcl/bfcl.live.live_multiple_ast_accuracy": 78.92, "bfcl/bfcl.live.live_parallel_ast_accuracy": 87.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 83.33, "bfcl/bfcl.multi_turn.accuracy": 61.37, "bfcl/bfcl.multi_turn.base_accuracy": 69.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 65.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 52.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 59.0, "bfcl/bfcl.web_search.accuracy": 81.0, "bfcl/bfcl.web_search.base_accuracy": 82.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 80.0, "bfcl/bfcl.memory.accuracy": 64.95, "bfcl/bfcl.memory.kv_accuracy": 54.19, "bfcl/bfcl.memory.vector_accuracy": 57.42, "bfcl/bfcl.memory.recursive_summarization_accuracy": 83.23, "bfcl/bfcl.relevance.relevance_detection_accuracy": 68.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 86.61 } }, { "id": "anthropic/claude-sonnet-4-5-20250929-prompt", "name": "Claude-Sonnet-4-5-20250929 (Prompt)", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 89.0, "bfcl/bfcl.overall.overall_accuracy": 24.9, "bfcl/bfcl.overall.total_cost_usd": 47.82, "bfcl/bfcl.overall.latency_mean_s": 3.84, "bfcl/bfcl.overall.latency_std_s": 1.53, "bfcl/bfcl.overall.latency_p95_s": 6.66, "bfcl/bfcl.non_live.ast_accuracy": 59.81, "bfcl/bfcl.non_live.simple_ast_accuracy": 47.25, "bfcl/bfcl.non_live.multiple_ast_accuracy": 79.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 53.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 59.0, "bfcl/bfcl.live.live_accuracy": 46.56, "bfcl/bfcl.live.live_simple_ast_accuracy": 73.26, "bfcl/bfcl.live.live_multiple_ast_accuracy": 40.17, "bfcl/bfcl.live.live_parallel_ast_accuracy": 56.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 33.33, "bfcl/bfcl.multi_turn.accuracy": 1.62, "bfcl/bfcl.multi_turn.base_accuracy": 2.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 3.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 1.5, "bfcl/bfcl.web_search.accuracy": 16.0, "bfcl/bfcl.web_search.base_accuracy": 16.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 16.0, "bfcl/bfcl.memory.accuracy": 5.38, "bfcl/bfcl.memory.kv_accuracy": 4.52, "bfcl/bfcl.memory.vector_accuracy": 9.68, "bfcl/bfcl.memory.recursive_summarization_accuracy": 1.94, "bfcl/bfcl.relevance.relevance_detection_accuracy": 37.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 95.03, "bfcl/bfcl.format_sensitivity.max_delta": 37.5, "bfcl/bfcl.format_sensitivity.stddev": 10.07 } }, { "id": "anthropic/claude-sonnet-4.5", "name": "Claude Sonnet 4.5", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 42.5 } }, { "id": "anthropic/claude-v1.3", "name": "Anthropic Claude v1.3", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "helm_instruct/Mean win rate": 0.611, "helm_instruct/Anthropic RLHF dataset": 4.965, "helm_instruct/Best ChatGPT Prompts": 4.995, "helm_instruct/Koala test dataset": 4.981, "helm_instruct/Open Assistant": 4.975, "helm_instruct/Self Instruct": 4.992, "helm_instruct/Vicuna": 4.989, "helm_lite/Mean win rate": 0.518, "helm_lite/NarrativeQA": 0.723, "helm_lite/NaturalQuestions (closed-book)": 0.409, "helm_lite/OpenbookQA": 0.908, "helm_lite/MMLU": 0.631, "helm_lite/MATH": 0.54, "helm_lite/GSM8K": 0.784, "helm_lite/LegalBench": 0.629, "helm_lite/MedQA": 0.618, "helm_lite/WMT 2014": 0.219 } }, { "id": "anthropic/Opus 4.1", "name": "Opus 4.1", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "ace/Overall Score": 0.4, "ace/Gaming Score": 0.318 } }, { "id": "anthropic/Opus 4.5", "name": "Opus 4.5", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "ace/Overall Score": 0.478, "ace/Gaming Score": 0.391, "apex-agents/Overall Pass@1": 0.184, "apex-agents/Overall Pass@8": 0.34, "apex-agents/Overall Mean Score": 0.348, "apex-agents/Investment Banking Pass@1": 0.216, "apex-agents/Management Consulting Pass@1": 0.132, "apex-agents/Corporate Law Pass@1": 0.202, "apex-agents/Corporate Lawyer Mean Score": 0.471, "apex-v1/Medicine (MD) Score": 0.65 } }, { "id": "anthropic/Opus 4.6", "name": "Opus 4.6", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Overall Pass@1": 0.298, "apex-agents/Corporate Lawyer Mean Score": 0.502 } }, { "id": "anthropic/Sonnet 4.5", "name": "Sonnet 4.5", "developer": "Anthropic", "evaluator_relationship": null, "benchmark_scores": { "ace/Overall Score": 0.44, "ace/Gaming Score": 0.373 } }, { "id": "apple/DCLM-7B", "name": "DCLM-7B", "developer": "apple", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2173, "hfopenllm_v2/BBH": 0.4232, "hfopenllm_v2/MATH Level 5": 0.037, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.3921, "hfopenllm_v2/MMLU-PRO": 0.3111 } }, { "id": "applied-compute/Applied Compute: Small", "name": "Applied Compute: Small", "developer": "applied-compute", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Overall Pass@1": 0.23, "apex-agents/Overall Mean Score": 0.401, "apex-agents/Corporate Law Pass@1": 0.266, "apex-agents/Corporate Lawyer Mean Score": 0.548 } }, { "id": "appvoid/arco-2", "name": "arco-2", "developer": "appvoid", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1991, "hfopenllm_v2/BBH": 0.3146, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2391, "hfopenllm_v2/MUSR": 0.3536, "hfopenllm_v2/MMLU-PRO": 0.1116 } }, { "id": "appvoid/arco-2-instruct", "name": "arco-2-instruct", "developer": "appvoid", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2164, "hfopenllm_v2/BBH": 0.3133, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2383, "hfopenllm_v2/MUSR": 0.3496, "hfopenllm_v2/MMLU-PRO": 0.1113 } }, { "id": "arcee-ai/Arcee-Blitz", "name": "Arcee-Blitz", "developer": "arcee-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5543, "hfopenllm_v2/BBH": 0.6607, "hfopenllm_v2/MATH Level 5": 0.3482, "hfopenllm_v2/GPQA": 0.3851, "hfopenllm_v2/MUSR": 0.5047, "hfopenllm_v2/MMLU-PRO": 0.6154 } }, { "id": "arcee-ai/Arcee-Maestro-7B-Preview", "name": "Arcee-Maestro-7B-Preview", "developer": "arcee-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.275, "hfopenllm_v2/BBH": 0.4648, "hfopenllm_v2/MATH Level 5": 0.4992, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.3885, "hfopenllm_v2/MMLU-PRO": 0.3039 } }, { "id": "arcee-ai/Arcee-Nova", "name": "Arcee-Nova", "developer": "arcee-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7907, "hfopenllm_v2/BBH": 0.6942, "hfopenllm_v2/MATH Level 5": 0.4381, "hfopenllm_v2/GPQA": 0.3851, "hfopenllm_v2/MUSR": 0.4562, "hfopenllm_v2/MMLU-PRO": 0.5452 } }, { "id": "arcee-ai/Arcee-Spark", "name": "Arcee-Spark", "developer": "arcee-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5718, "hfopenllm_v2/BBH": 0.5481, "hfopenllm_v2/MATH Level 5": 0.114, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4008, "hfopenllm_v2/MMLU-PRO": 0.3813 } }, { "id": "arcee-ai/Llama-3.1-SuperNova-Lite", "name": "Llama-3.1-SuperNova-Lite", "developer": "arcee-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8017, "hfopenllm_v2/BBH": 0.5152, "hfopenllm_v2/MATH Level 5": 0.1828, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4163, "hfopenllm_v2/MMLU-PRO": 0.3877 } }, { "id": "arcee-ai/Llama-Spark", "name": "Llama-Spark", "developer": "arcee-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7911, "hfopenllm_v2/BBH": 0.5054, "hfopenllm_v2/MATH Level 5": 0.139, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.3593, "hfopenllm_v2/MMLU-PRO": 0.3721 } }, { "id": "arcee-ai/raspberry-3B", "name": "raspberry-3B", "developer": "arcee-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3154, "hfopenllm_v2/BBH": 0.4269, "hfopenllm_v2/MATH Level 5": 0.1035, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4123, "hfopenllm_v2/MMLU-PRO": 0.2854 } }, { "id": "arcee-ai/SuperNova-Medius", "name": "SuperNova-Medius", "developer": "arcee-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7184, "hfopenllm_v2/BBH": 0.6377, "hfopenllm_v2/MATH Level 5": 0.469, "hfopenllm_v2/GPQA": 0.3331, "hfopenllm_v2/MUSR": 0.4233, "hfopenllm_v2/MMLU-PRO": 0.5035 } }, { "id": "arcee-ai/Virtuoso-Lite", "name": "Virtuoso-Lite", "developer": "arcee-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.81, "hfopenllm_v2/BBH": 0.6099, "hfopenllm_v2/MATH Level 5": 0.253, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.4595, "hfopenllm_v2/MMLU-PRO": 0.4441 } }, { "id": "arcee-ai/Virtuoso-Small", "name": "Virtuoso-Small", "developer": "arcee-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7935, "hfopenllm_v2/BBH": 0.6518, "hfopenllm_v2/MATH Level 5": 0.4094, "hfopenllm_v2/GPQA": 0.3364, "hfopenllm_v2/MUSR": 0.4339, "hfopenllm_v2/MMLU-PRO": 0.5191 } }, { "id": "arcee-ai/Virtuoso-Small-v2", "name": "Virtuoso-Small-v2", "developer": "arcee-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8273, "hfopenllm_v2/BBH": 0.6554, "hfopenllm_v2/MATH Level 5": 0.466, "hfopenllm_v2/GPQA": 0.3532, "hfopenllm_v2/MUSR": 0.4313, "hfopenllm_v2/MMLU-PRO": 0.5188 } }, { "id": "argilla-warehouse/Llama-3.1-8B-MagPie-Ultra", "name": "Llama-3.1-8B-MagPie-Ultra", "developer": "argilla-warehouse", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5757, "hfopenllm_v2/BBH": 0.462, "hfopenllm_v2/MATH Level 5": 0.077, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3543, "hfopenllm_v2/MMLU-PRO": 0.3144 } }, { "id": "argilla/notus-7b-v1", "name": "notus-7b-v1", "developer": "argilla", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5082, "hfopenllm_v2/BBH": 0.4512, "hfopenllm_v2/MATH Level 5": 0.0317, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.3364, "hfopenllm_v2/MMLU-PRO": 0.3004 } }, { "id": "argilla/notux-8x7b-v1", "name": "notux-8x7b-v1", "developer": "argilla", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5422, "hfopenllm_v2/BBH": 0.5363, "hfopenllm_v2/MATH Level 5": 0.0997, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4176, "hfopenllm_v2/MMLU-PRO": 0.366 } }, { "id": "arisin/orca-platypus-13B-slerp", "name": "orca-platypus-13B-slerp", "developer": "arisin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2672, "hfopenllm_v2/BBH": 0.4631, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4253, "hfopenllm_v2/MMLU-PRO": 0.2592 } }, { "id": "ark/ep-20250603132404-cgpjm", "name": "ep-20250603132404-cgpjm", "developer": "ark", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.0141, "livecodebenchpro/Easy Problems": 0.507 } }, { "id": "ArliAI/ArliAI-RPMax-12B-v1.1", "name": "ArliAI-RPMax-12B-v1.1", "developer": "ArliAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5349, "hfopenllm_v2/BBH": 0.4752, "hfopenllm_v2/MATH Level 5": 0.1125, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.3618, "hfopenllm_v2/MMLU-PRO": 0.3384 } }, { "id": "ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1", "name": "Llama-3.1-8B-ArliAI-RPMax-v1.1", "developer": "ArliAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6359, "hfopenllm_v2/BBH": 0.5016, "hfopenllm_v2/MATH Level 5": 0.1314, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3577, "hfopenllm_v2/MMLU-PRO": 0.3551 } }, { "id": "arshiaafshani/Arsh-V1", "name": "Arsh-V1", "developer": "arshiaafshani", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6043, "hfopenllm_v2/BBH": 0.674, "hfopenllm_v2/MATH Level 5": 0.2621, "hfopenllm_v2/GPQA": 0.3733, "hfopenllm_v2/MUSR": 0.4899, "hfopenllm_v2/MMLU-PRO": 0.5257 } }, { "id": "Arthur-LAGACHERIE/Precis-1B-Instruct", "name": "Precis-1B-Instruct", "developer": "Arthur-LAGACHERIE", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3671, "hfopenllm_v2/BBH": 0.3224, "hfopenllm_v2/MATH Level 5": 0.0038, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3436, "hfopenllm_v2/MMLU-PRO": 0.1426 } }, { "id": "Artples/L-MChat-7b", "name": "L-MChat-7b", "developer": "Artples", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5297, "hfopenllm_v2/BBH": 0.46, "hfopenllm_v2/MATH Level 5": 0.0921, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4029, "hfopenllm_v2/MMLU-PRO": 0.3299 } }, { "id": "Artples/L-MChat-Small", "name": "L-MChat-Small", "developer": "Artples", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3287, "hfopenllm_v2/BBH": 0.4823, "hfopenllm_v2/MATH Level 5": 0.0378, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3696, "hfopenllm_v2/MMLU-PRO": 0.2464 } }, { "id": "Aryanne/QwentileSwap", "name": "QwentileSwap", "developer": "Aryanne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7378, "hfopenllm_v2/BBH": 0.7008, "hfopenllm_v2/MATH Level 5": 0.4222, "hfopenllm_v2/GPQA": 0.3674, "hfopenllm_v2/MUSR": 0.464, "hfopenllm_v2/MMLU-PRO": 0.5946 } }, { "id": "Aryanne/SHBA", "name": "SHBA", "developer": "Aryanne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7817, "hfopenllm_v2/BBH": 0.5233, "hfopenllm_v2/MATH Level 5": 0.1798, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4161, "hfopenllm_v2/MMLU-PRO": 0.3892 } }, { "id": "Aryanne/SuperHeart", "name": "SuperHeart", "developer": "Aryanne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5192, "hfopenllm_v2/BBH": 0.5215, "hfopenllm_v2/MATH Level 5": 0.1563, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4436, "hfopenllm_v2/MMLU-PRO": 0.3912 } }, { "id": "asharsha30/LLAMA_Harsha_8_B_ORDP_10k", "name": "LLAMA_Harsha_8_B_ORDP_10k", "developer": "asharsha30", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3464, "hfopenllm_v2/BBH": 0.4669, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3697, "hfopenllm_v2/MMLU-PRO": 0.281 } }, { "id": "ashercn97/a1-v0.0.1", "name": "a1-v0.0.1", "developer": "ashercn97", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2198, "hfopenllm_v2/BBH": 0.5188, "hfopenllm_v2/MATH Level 5": 0.2145, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.412, "hfopenllm_v2/MMLU-PRO": 0.4165 } }, { "id": "ashercn97/a1-v002", "name": "a1-v002", "developer": "ashercn97", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2585, "hfopenllm_v2/BBH": 0.5261, "hfopenllm_v2/MATH Level 5": 0.2341, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4159, "hfopenllm_v2/MMLU-PRO": 0.4175 } }, { "id": "assskelad/smollm2-360M-sft_SmallThoughts", "name": "smollm2-360M-sft_SmallThoughts", "developer": "assskelad", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2007, "hfopenllm_v2/BBH": 0.315, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3395, "hfopenllm_v2/MMLU-PRO": 0.1182 } }, { "id": "AtAndDev/Qwen2.5-1.5B-continuous-learnt", "name": "Qwen2.5-1.5B-continuous-learnt", "developer": "AtAndDev", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4511, "hfopenllm_v2/BBH": 0.4275, "hfopenllm_v2/MATH Level 5": 0.1473, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3623, "hfopenllm_v2/MMLU-PRO": 0.2806 } }, { "id": "Ateron/Glowing-Forest-12B", "name": "Glowing-Forest-12B", "developer": "Ateron", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3592, "hfopenllm_v2/BBH": 0.5492, "hfopenllm_v2/MATH Level 5": 0.0778, "hfopenllm_v2/GPQA": 0.3331, "hfopenllm_v2/MUSR": 0.4449, "hfopenllm_v2/MMLU-PRO": 0.3718 } }, { "id": "Ateron/Lotus-Magpic", "name": "Lotus-Magpic", "developer": "Ateron", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6286, "hfopenllm_v2/BBH": 0.5254, "hfopenllm_v2/MATH Level 5": 0.0997, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4332, "hfopenllm_v2/MMLU-PRO": 0.3491 } }, { "id": "Ateron/Way_of_MagPicaro", "name": "Way_of_MagPicaro", "developer": "Ateron", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2637, "hfopenllm_v2/BBH": 0.5427, "hfopenllm_v2/MATH Level 5": 0.0589, "hfopenllm_v2/GPQA": 0.3339, "hfopenllm_v2/MUSR": 0.4649, "hfopenllm_v2/MMLU-PRO": 0.3536 } }, { "id": "athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit", "name": "Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit", "developer": "athirdpath", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4521, "hfopenllm_v2/BBH": 0.4939, "hfopenllm_v2/MATH Level 5": 0.102, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.3864, "hfopenllm_v2/MMLU-PRO": 0.3565 } }, { "id": "AtlaAI/Selene-1", "name": "AtlaAI/Selene-1", "developer": "AtlaAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9241, "reward-bench/Chat": 0.9777, "reward-bench/Chat Hard": 0.8399, "reward-bench/Safety": 0.9216, "reward-bench/Reasoning": 0.9572 } }, { "id": "AtlaAI/Selene-1-Mini-Llama-3.1-8B", "name": "AtlaAI/Selene-1-Mini-Llama-3.1-8B", "developer": "AtlaAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8913, "reward-bench/Chat": 0.9358, "reward-bench/Chat Hard": 0.7939, "reward-bench/Safety": 0.8926, "reward-bench/Reasoning": 0.9429 } }, { "id": "AuraIndustries/Aura-4B", "name": "Aura-4B", "developer": "AuraIndustries", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3816, "hfopenllm_v2/BBH": 0.449, "hfopenllm_v2/MATH Level 5": 0.0423, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3938, "hfopenllm_v2/MMLU-PRO": 0.2706 } }, { "id": "AuraIndustries/Aura-8B", "name": "Aura-8B", "developer": "AuraIndustries", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7205, "hfopenllm_v2/BBH": 0.5131, "hfopenllm_v2/MATH Level 5": 0.1518, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4004, "hfopenllm_v2/MMLU-PRO": 0.3874 } }, { "id": "AuraIndustries/Aura-MoE-2x4B", "name": "Aura-MoE-2x4B", "developer": "AuraIndustries", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4601, "hfopenllm_v2/BBH": 0.4339, "hfopenllm_v2/MATH Level 5": 0.031, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.4085, "hfopenllm_v2/MMLU-PRO": 0.265 } }, { "id": "AuraIndustries/Aura-MoE-2x4B-v2", "name": "Aura-MoE-2x4B-v2", "developer": "AuraIndustries", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4778, "hfopenllm_v2/BBH": 0.4315, "hfopenllm_v2/MATH Level 5": 0.0317, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.4101, "hfopenllm_v2/MMLU-PRO": 0.261 } }, { "id": "Aurel9/testmerge-7b", "name": "testmerge-7b", "developer": "Aurel9", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.398, "hfopenllm_v2/BBH": 0.519, "hfopenllm_v2/MATH Level 5": 0.0657, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4659, "hfopenllm_v2/MMLU-PRO": 0.3053 } }, { "id": "automerger/YamshadowExperiment28-7B", "name": "YamshadowExperiment28-7B", "developer": "automerger", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.407, "hfopenllm_v2/BBH": 0.515, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4306, "hfopenllm_v2/MMLU-PRO": 0.306 } }, { "id": "avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI", "name": "GRAG-NEMO-12B-ORPO-HESSIAN-AI", "developer": "avemio", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0, "hfopenllm_v2/BBH": 0.2607, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3447, "hfopenllm_v2/MMLU-PRO": 0.1061 } }, { "id": "awnr/Mistral-7B-v0.1-signtensors-1-over-2", "name": "Mistral-7B-v0.1-signtensors-1-over-2", "developer": "awnr", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2179, "hfopenllm_v2/BBH": 0.4423, "hfopenllm_v2/MATH Level 5": 0.034, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4006, "hfopenllm_v2/MMLU-PRO": 0.3 } }, { "id": "awnr/Mistral-7B-v0.1-signtensors-1-over-4", "name": "Mistral-7B-v0.1-signtensors-1-over-4", "developer": "awnr", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2133, "hfopenllm_v2/BBH": 0.3507, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.346, "hfopenllm_v2/MMLU-PRO": 0.2311 } }, { "id": "awnr/Mistral-7B-v0.1-signtensors-3-over-8", "name": "Mistral-7B-v0.1-signtensors-3-over-8", "developer": "awnr", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2394, "hfopenllm_v2/BBH": 0.43, "hfopenllm_v2/MATH Level 5": 0.0332, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.3818, "hfopenllm_v2/MMLU-PRO": 0.3001 } }, { "id": "awnr/Mistral-7B-v0.1-signtensors-5-over-16", "name": "Mistral-7B-v0.1-signtensors-5-over-16", "developer": "awnr", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2118, "hfopenllm_v2/BBH": 0.4124, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3686, "hfopenllm_v2/MMLU-PRO": 0.2958 } }, { "id": "awnr/Mistral-7B-v0.1-signtensors-7-over-16", "name": "Mistral-7B-v0.1-signtensors-7-over-16", "developer": "awnr", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2294, "hfopenllm_v2/BBH": 0.4316, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.3952, "hfopenllm_v2/MMLU-PRO": 0.303 } }, { "id": "aws-prototyping/MegaBeam-Mistral-7B-512k", "name": "MegaBeam-Mistral-7B-512k", "developer": "aws-prototyping", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5973, "hfopenllm_v2/BBH": 0.3662, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.3994, "hfopenllm_v2/MMLU-PRO": 0.2589 } }, { "id": "axolotl-ai-co/romulus-mistral-nemo-12b-simpo", "name": "romulus-mistral-nemo-12b-simpo", "developer": "axolotl-ai-co", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6079, "hfopenllm_v2/BBH": 0.5395, "hfopenllm_v2/MATH Level 5": 0.114, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.4233, "hfopenllm_v2/MMLU-PRO": 0.3469 } }, { "id": "Ayush-Singh/Llama1B-sft-2", "name": "Llama1B-sft-2", "developer": "Ayush-Singh", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1374, "hfopenllm_v2/BBH": 0.2834, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.3552, "hfopenllm_v2/MMLU-PRO": 0.1117 } }, { "id": "Azure99/blossom-v5-32b", "name": "blossom-v5-32b", "developer": "Azure99", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5235, "hfopenllm_v2/BBH": 0.5955, "hfopenllm_v2/MATH Level 5": 0.1866, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.402, "hfopenllm_v2/MMLU-PRO": 0.4235 } }, { "id": "Azure99/blossom-v5-llama3-8b", "name": "blossom-v5-llama3-8b", "developer": "Azure99", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4343, "hfopenllm_v2/BBH": 0.4185, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.367, "hfopenllm_v2/MMLU-PRO": 0.2206 } }, { "id": "Azure99/blossom-v5.1-34b", "name": "blossom-v5.1-34b", "developer": "Azure99", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5697, "hfopenllm_v2/BBH": 0.6109, "hfopenllm_v2/MATH Level 5": 0.2591, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.3928, "hfopenllm_v2/MMLU-PRO": 0.4558 } }, { "id": "Azure99/blossom-v5.1-9b", "name": "blossom-v5.1-9b", "developer": "Azure99", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5086, "hfopenllm_v2/BBH": 0.5343, "hfopenllm_v2/MATH Level 5": 0.2122, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.3994, "hfopenllm_v2/MMLU-PRO": 0.3979 } }, { "id": "Azure99/Blossom-V6-14B", "name": "Blossom-V6-14B", "developer": "Azure99", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6395, "hfopenllm_v2/BBH": 0.5069, "hfopenllm_v2/MATH Level 5": 0.5257, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.4035, "hfopenllm_v2/MMLU-PRO": 0.4544 } }, { "id": "Azure99/Blossom-V6-7B", "name": "Blossom-V6-7B", "developer": "Azure99", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5538, "hfopenllm_v2/BBH": 0.4974, "hfopenllm_v2/MATH Level 5": 0.4585, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4301, "hfopenllm_v2/MMLU-PRO": 0.4144 } }, { "id": "Ba2han/Llama-Phi-3_DoRA", "name": "Llama-Phi-3_DoRA", "developer": "Ba2han", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5131, "hfopenllm_v2/BBH": 0.5515, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.4069, "hfopenllm_v2/MMLU-PRO": 0.3915 } }, { "id": "BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference", "name": "Gemma2-9B-IT-Simpo-Infinity-Preference", "developer": "BAAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3176, "hfopenllm_v2/BBH": 0.5979, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.3398, "hfopenllm_v2/MUSR": 0.3966, "hfopenllm_v2/MMLU-PRO": 0.3869 } }, { "id": "BAAI/Infinity-Instruct-3M-0613-Llama3-70B", "name": "Infinity-Instruct-3M-0613-Llama3-70B", "developer": "BAAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6821, "hfopenllm_v2/BBH": 0.6642, "hfopenllm_v2/MATH Level 5": 0.2153, "hfopenllm_v2/GPQA": 0.3582, "hfopenllm_v2/MUSR": 0.4523, "hfopenllm_v2/MMLU-PRO": 0.473 } }, { "id": "BAAI/Infinity-Instruct-3M-0613-Mistral-7B", "name": "Infinity-Instruct-3M-0613-Mistral-7B", "developer": "BAAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.532, "hfopenllm_v2/BBH": 0.4958, "hfopenllm_v2/MATH Level 5": 0.0816, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4351, "hfopenllm_v2/MMLU-PRO": 0.3161 } }, { "id": "BAAI/Infinity-Instruct-3M-0625-Llama3-70B", "name": "Infinity-Instruct-3M-0625-Llama3-70B", "developer": "BAAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7442, "hfopenllm_v2/BBH": 0.667, "hfopenllm_v2/MATH Level 5": 0.2251, "hfopenllm_v2/GPQA": 0.3574, "hfopenllm_v2/MUSR": 0.4617, "hfopenllm_v2/MMLU-PRO": 0.4586 } }, { "id": "BAAI/Infinity-Instruct-3M-0625-Llama3-8B", "name": "Infinity-Instruct-3M-0625-Llama3-8B", "developer": "BAAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.605, "hfopenllm_v2/BBH": 0.4955, "hfopenllm_v2/MATH Level 5": 0.0884, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3712, "hfopenllm_v2/MMLU-PRO": 0.3252 } }, { "id": "BAAI/Infinity-Instruct-3M-0625-Mistral-7B", "name": "Infinity-Instruct-3M-0625-Mistral-7B", "developer": "BAAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5867, "hfopenllm_v2/BBH": 0.494, "hfopenllm_v2/MATH Level 5": 0.0763, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4272, "hfopenllm_v2/MMLU-PRO": 0.323 } }, { "id": "BAAI/Infinity-Instruct-3M-0625-Qwen2-7B", "name": "Infinity-Instruct-3M-0625-Qwen2-7B", "developer": "BAAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5554, "hfopenllm_v2/BBH": 0.5346, "hfopenllm_v2/MATH Level 5": 0.1926, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.3888, "hfopenllm_v2/MMLU-PRO": 0.396 } }, { "id": "BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B", "name": "Infinity-Instruct-3M-0625-Yi-1.5-9B", "developer": "BAAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5186, "hfopenllm_v2/BBH": 0.5509, "hfopenllm_v2/MATH Level 5": 0.1639, "hfopenllm_v2/GPQA": 0.354, "hfopenllm_v2/MUSR": 0.4575, "hfopenllm_v2/MMLU-PRO": 0.4118 } }, { "id": "BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B", "name": "Infinity-Instruct-7M-0729-Llama3_1-8B", "developer": "BAAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6132, "hfopenllm_v2/BBH": 0.5077, "hfopenllm_v2/MATH Level 5": 0.1276, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.3578, "hfopenllm_v2/MMLU-PRO": 0.3224 } }, { "id": "BAAI/Infinity-Instruct-7M-0729-mistral-7B", "name": "Infinity-Instruct-7M-0729-mistral-7B", "developer": "BAAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6162, "hfopenllm_v2/BBH": 0.4964, "hfopenllm_v2/MATH Level 5": 0.0831, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4062, "hfopenllm_v2/MMLU-PRO": 0.3274 } }, { "id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B", "name": "Infinity-Instruct-7M-Gen-Llama3_1-70B", "developer": "BAAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7335, "hfopenllm_v2/BBH": 0.6695, "hfopenllm_v2/MATH Level 5": 0.2523, "hfopenllm_v2/GPQA": 0.3758, "hfopenllm_v2/MUSR": 0.4539, "hfopenllm_v2/MMLU-PRO": 0.4607 } }, { "id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B", "name": "Infinity-Instruct-7M-Gen-Llama3_1-8B", "developer": "BAAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6132, "hfopenllm_v2/BBH": 0.5077, "hfopenllm_v2/MATH Level 5": 0.1276, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.3578, "hfopenllm_v2/MMLU-PRO": 0.3224 } }, { "id": "BAAI/Infinity-Instruct-7M-Gen-mistral-7B", "name": "Infinity-Instruct-7M-Gen-mistral-7B", "developer": "BAAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6147, "hfopenllm_v2/BBH": 0.4964, "hfopenllm_v2/MATH Level 5": 0.0831, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4062, "hfopenllm_v2/MMLU-PRO": 0.3274 } }, { "id": "BAAI/OPI-Llama-3.1-8B-Instruct", "name": "OPI-Llama-3.1-8B-Instruct", "developer": "BAAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2075, "hfopenllm_v2/BBH": 0.3551, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3233, "hfopenllm_v2/MMLU-PRO": 0.2124 } }, { "id": "baconnier/Napoleon_24B_V0.0", "name": "Napoleon_24B_V0.0", "developer": "baconnier", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1801, "hfopenllm_v2/BBH": 0.6367, "hfopenllm_v2/MATH Level 5": 0.2273, "hfopenllm_v2/GPQA": 0.3792, "hfopenllm_v2/MUSR": 0.442, "hfopenllm_v2/MMLU-PRO": 0.504 } }, { "id": "baconnier/Napoleon_24B_V0.2", "name": "Napoleon_24B_V0.2", "developer": "baconnier", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2527, "hfopenllm_v2/BBH": 0.5911, "hfopenllm_v2/MATH Level 5": 0.1435, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.446, "hfopenllm_v2/MMLU-PRO": 0.4357 } }, { "id": "baebee/7B-Cetacea", "name": "7B-Cetacea", "developer": "baebee", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5279, "hfopenllm_v2/BBH": 0.4757, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4136, "hfopenllm_v2/MMLU-PRO": 0.2955 } }, { "id": "baebee/mergekit-model_stock-nzjnheg", "name": "mergekit-model_stock-nzjnheg", "developer": "baebee", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4844, "hfopenllm_v2/BBH": 0.5287, "hfopenllm_v2/MATH Level 5": 0.1677, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3847, "hfopenllm_v2/MMLU-PRO": 0.3699 } }, { "id": "baebee/mergekit-ties-fnjenli", "name": "mergekit-ties-fnjenli", "developer": "baebee", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1988, "hfopenllm_v2/BBH": 0.3024, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.245, "hfopenllm_v2/MUSR": 0.4019, "hfopenllm_v2/MMLU-PRO": 0.1129 } }, { "id": "bamec66557/mergekit-model_stock-zdaysvi", "name": "mergekit-model_stock-zdaysvi", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6426, "hfopenllm_v2/BBH": 0.5063, "hfopenllm_v2/MATH Level 5": 0.1352, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4124, "hfopenllm_v2/MMLU-PRO": 0.3688 } }, { "id": "bamec66557/mergekit-ties-sinbkow", "name": "mergekit-ties-sinbkow", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6432, "hfopenllm_v2/BBH": 0.5092, "hfopenllm_v2/MATH Level 5": 0.145, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4045, "hfopenllm_v2/MMLU-PRO": 0.3603 } }, { "id": "bamec66557/MISCHIEVOUS-12B", "name": "MISCHIEVOUS-12B", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3852, "hfopenllm_v2/BBH": 0.5405, "hfopenllm_v2/MATH Level 5": 0.1276, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4145, "hfopenllm_v2/MMLU-PRO": 0.3672 } }, { "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.1v", "name": "MISCHIEVOUS-12B-Mix_0.1v", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3636, "hfopenllm_v2/BBH": 0.5436, "hfopenllm_v2/MATH Level 5": 0.1329, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4132, "hfopenllm_v2/MMLU-PRO": 0.3674 } }, { "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.2v", "name": "MISCHIEVOUS-12B-Mix_0.2v", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3624, "hfopenllm_v2/BBH": 0.5434, "hfopenllm_v2/MATH Level 5": 0.1261, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.4158, "hfopenllm_v2/MMLU-PRO": 0.3663 } }, { "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.3v", "name": "MISCHIEVOUS-12B-Mix_0.3v", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.387, "hfopenllm_v2/BBH": 0.5431, "hfopenllm_v2/MATH Level 5": 0.1337, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4131, "hfopenllm_v2/MMLU-PRO": 0.3664 } }, { "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.4v", "name": "MISCHIEVOUS-12B-Mix_0.4v", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6508, "hfopenllm_v2/BBH": 0.5094, "hfopenllm_v2/MATH Level 5": 0.1352, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4176, "hfopenllm_v2/MMLU-PRO": 0.3683 } }, { "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.5v", "name": "MISCHIEVOUS-12B-Mix_0.5v", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3746, "hfopenllm_v2/BBH": 0.5422, "hfopenllm_v2/MATH Level 5": 0.1367, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4132, "hfopenllm_v2/MMLU-PRO": 0.3661 } }, { "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.6v", "name": "MISCHIEVOUS-12B-Mix_0.6v", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4366, "hfopenllm_v2/BBH": 0.5449, "hfopenllm_v2/MATH Level 5": 0.1254, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4185, "hfopenllm_v2/MMLU-PRO": 0.3662 } }, { "id": "bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V", "name": "MISCHIEVOUS-12B-Mix_III_ex_V", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4316, "hfopenllm_v2/BBH": 0.5449, "hfopenllm_v2/MATH Level 5": 0.1322, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4198, "hfopenllm_v2/MMLU-PRO": 0.3649 } }, { "id": "bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V", "name": "MISCHIEVOUS-12B-Mix_III_IV_V", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4031, "hfopenllm_v2/BBH": 0.5465, "hfopenllm_v2/MATH Level 5": 0.1292, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4198, "hfopenllm_v2/MMLU-PRO": 0.3664 } }, { "id": "bamec66557/MISCHIEVOUS-12B-Mix_Neo", "name": "MISCHIEVOUS-12B-Mix_Neo", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.625, "hfopenllm_v2/BBH": 0.5078, "hfopenllm_v2/MATH Level 5": 0.1329, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.415, "hfopenllm_v2/MMLU-PRO": 0.3685 } }, { "id": "bamec66557/Mistral-Nemo-VICIOUS_MESH-12B-2407", "name": "Mistral-Nemo-VICIOUS_MESH-12B-2407", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6706, "hfopenllm_v2/BBH": 0.5156, "hfopenllm_v2/MATH Level 5": 0.1367, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.431, "hfopenllm_v2/MMLU-PRO": 0.3677 } }, { "id": "bamec66557/NameLess-12B-prob", "name": "NameLess-12B-prob", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6602, "hfopenllm_v2/BBH": 0.5158, "hfopenllm_v2/MATH Level 5": 0.1261, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4336, "hfopenllm_v2/MMLU-PRO": 0.3684 } }, { "id": "bamec66557/VICIOUS_MESH-12B", "name": "VICIOUS_MESH-12B", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3716, "hfopenllm_v2/BBH": 0.5436, "hfopenllm_v2/MATH Level 5": 0.1344, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4105, "hfopenllm_v2/MMLU-PRO": 0.3679 } }, { "id": "bamec66557/VICIOUS_MESH-12B-0.1v", "name": "VICIOUS_MESH-12B-0.1v", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3657, "hfopenllm_v2/BBH": 0.5412, "hfopenllm_v2/MATH Level 5": 0.1322, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4158, "hfopenllm_v2/MMLU-PRO": 0.3683 } }, { "id": "bamec66557/VICIOUS_MESH-12B-0.X.ver", "name": "VICIOUS_MESH-12B-0.X.ver", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3776, "hfopenllm_v2/BBH": 0.5416, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4198, "hfopenllm_v2/MMLU-PRO": 0.3671 } }, { "id": "bamec66557/VICIOUS_MESH-12B-ALPHA", "name": "VICIOUS_MESH-12B-ALPHA", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6365, "hfopenllm_v2/BBH": 0.5094, "hfopenllm_v2/MATH Level 5": 0.1367, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4203, "hfopenllm_v2/MMLU-PRO": 0.3697 } }, { "id": "bamec66557/VICIOUS_MESH-12B-BETA", "name": "VICIOUS_MESH-12B-BETA", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6721, "hfopenllm_v2/BBH": 0.5156, "hfopenllm_v2/MATH Level 5": 0.1329, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.431, "hfopenllm_v2/MMLU-PRO": 0.3679 } }, { "id": "bamec66557/VICIOUS_MESH-12B-DELTA", "name": "VICIOUS_MESH-12B-DELTA", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6469, "hfopenllm_v2/BBH": 0.5055, "hfopenllm_v2/MATH Level 5": 0.1375, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4057, "hfopenllm_v2/MMLU-PRO": 0.3651 } }, { "id": "bamec66557/VICIOUS_MESH-12B-DIGAMMA", "name": "VICIOUS_MESH-12B-DIGAMMA", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6429, "hfopenllm_v2/BBH": 0.5061, "hfopenllm_v2/MATH Level 5": 0.1337, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4097, "hfopenllm_v2/MMLU-PRO": 0.3659 } }, { "id": "bamec66557/VICIOUS_MESH-12B-EPSILON", "name": "VICIOUS_MESH-12B-EPSILON", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6305, "hfopenllm_v2/BBH": 0.5038, "hfopenllm_v2/MATH Level 5": 0.1261, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.407, "hfopenllm_v2/MMLU-PRO": 0.3648 } }, { "id": "bamec66557/VICIOUS_MESH-12B-GAMMA", "name": "VICIOUS_MESH-12B-GAMMA", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6362, "hfopenllm_v2/BBH": 0.5182, "hfopenllm_v2/MATH Level 5": 0.1307, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4363, "hfopenllm_v2/MMLU-PRO": 0.3666 } }, { "id": "bamec66557/VICIOUS_MESH-12B-NEMO", "name": "VICIOUS_MESH-12B-NEMO", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4022, "hfopenllm_v2/BBH": 0.5442, "hfopenllm_v2/MATH Level 5": 0.1269, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4251, "hfopenllm_v2/MMLU-PRO": 0.3716 } }, { "id": "bamec66557/VICIOUS_MESH-12B-OMEGA", "name": "VICIOUS_MESH-12B-OMEGA", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.67, "hfopenllm_v2/BBH": 0.5166, "hfopenllm_v2/MATH Level 5": 0.1344, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.4323, "hfopenllm_v2/MMLU-PRO": 0.3677 } }, { "id": "bamec66557/VICIOUS_MESH-12B-UNION", "name": "VICIOUS_MESH-12B-UNION", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6429, "hfopenllm_v2/BBH": 0.5107, "hfopenllm_v2/MATH Level 5": 0.139, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4257, "hfopenllm_v2/MMLU-PRO": 0.3672 } }, { "id": "bamec66557/VICIOUS_MESH-12B_Razor", "name": "VICIOUS_MESH-12B_Razor", "developer": "bamec66557", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3736, "hfopenllm_v2/BBH": 0.5447, "hfopenllm_v2/MATH Level 5": 0.1299, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4092, "hfopenllm_v2/MMLU-PRO": 0.3669 } }, { "id": "Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB", "name": "LeTriomphant2.2_ECE_iLAB", "developer": "Baptiste-HUVELLE-10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5076, "hfopenllm_v2/BBH": 0.7256, "hfopenllm_v2/MATH Level 5": 0.4449, "hfopenllm_v2/GPQA": 0.3993, "hfopenllm_v2/MUSR": 0.4626, "hfopenllm_v2/MMLU-PRO": 0.5851 } }, { "id": "BEE-spoke-data/Meta-Llama-3-8Bee", "name": "Meta-Llama-3-8Bee", "developer": "BEE-spoke-data", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1951, "hfopenllm_v2/BBH": 0.4626, "hfopenllm_v2/MATH Level 5": 0.0483, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.3654, "hfopenllm_v2/MMLU-PRO": 0.322 } }, { "id": "BEE-spoke-data/smol_llama-101M-GQA", "name": "smol_llama-101M-GQA", "developer": "BEE-spoke-data", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1384, "hfopenllm_v2/BBH": 0.3018, "hfopenllm_v2/MATH Level 5": 0.006, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3713, "hfopenllm_v2/MMLU-PRO": 0.1107 } }, { "id": "BEE-spoke-data/smol_llama-220M-GQA", "name": "smol_llama-220M-GQA", "developer": "BEE-spoke-data", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2386, "hfopenllm_v2/BBH": 0.3032, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.4059, "hfopenllm_v2/MMLU-PRO": 0.1149 } }, { "id": "BEE-spoke-data/smol_llama-220M-GQA-fineweb_edu", "name": "smol_llama-220M-GQA-fineweb_edu", "developer": "BEE-spoke-data", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1988, "hfopenllm_v2/BBH": 0.2929, "hfopenllm_v2/MATH Level 5": 0.0068, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.4368, "hfopenllm_v2/MMLU-PRO": 0.1127 } }, { "id": "BEE-spoke-data/smol_llama-220M-openhermes", "name": "smol_llama-220M-openhermes", "developer": "BEE-spoke-data", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1555, "hfopenllm_v2/BBH": 0.3028, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3847, "hfopenllm_v2/MMLU-PRO": 0.112 } }, { "id": "BEE-spoke-data/tFINE-900m-e16-d32-flan", "name": "tFINE-900m-e16-d32-flan", "developer": "BEE-spoke-data", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1506, "hfopenllm_v2/BBH": 0.3028, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2332, "hfopenllm_v2/MUSR": 0.3724, "hfopenllm_v2/MMLU-PRO": 0.1307 } }, { "id": "BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024", "name": "tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024", "developer": "BEE-spoke-data", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1321, "hfopenllm_v2/BBH": 0.3138, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.4393, "hfopenllm_v2/MMLU-PRO": 0.1237 } }, { "id": "BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e", "name": "tFINE-900m-e16-d32-instruct_2e", "developer": "BEE-spoke-data", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1403, "hfopenllm_v2/BBH": 0.3135, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.4207, "hfopenllm_v2/MMLU-PRO": 0.1237 } }, { "id": "BEE-spoke-data/tFINE-900m-instruct-orpo", "name": "tFINE-900m-instruct-orpo", "developer": "BEE-spoke-data", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.133, "hfopenllm_v2/BBH": 0.3022, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3409, "hfopenllm_v2/MMLU-PRO": 0.1152 } }, { "id": "belztjti/dffghgjh", "name": "dffghgjh", "developer": "belztjti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5784, "hfopenllm_v2/BBH": 0.3582, "hfopenllm_v2/MATH Level 5": 0.0234, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3475, "hfopenllm_v2/MMLU-PRO": 0.3422 } }, { "id": "belztjti/dtfgv", "name": "dtfgv", "developer": "belztjti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3345, "hfopenllm_v2/BBH": 0.3282, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3794, "hfopenllm_v2/MMLU-PRO": 0.1504 } }, { "id": "BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0", "name": "Qwen2.5-72B-2x-Instruct-TIES-v1.0", "developer": "BenevolenceMessiah", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5473, "hfopenllm_v2/BBH": 0.7273, "hfopenllm_v2/MATH Level 5": 0.5785, "hfopenllm_v2/GPQA": 0.3674, "hfopenllm_v2/MUSR": 0.4207, "hfopenllm_v2/MMLU-PRO": 0.5628 } }, { "id": "BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0", "name": "Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0", "developer": "BenevolenceMessiah", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3012, "hfopenllm_v2/BBH": 0.4909, "hfopenllm_v2/MATH Level 5": 0.0415, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.408, "hfopenllm_v2/MMLU-PRO": 0.268 } }, { "id": "benhaotang/phi4-qwq-sky-t1", "name": "phi4-qwq-sky-t1", "developer": "benhaotang", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.046, "hfopenllm_v2/BBH": 0.6711, "hfopenllm_v2/MATH Level 5": 0.4101, "hfopenllm_v2/GPQA": 0.3951, "hfopenllm_v2/MUSR": 0.49, "hfopenllm_v2/MMLU-PRO": 0.5244 } }, { "id": "beomi/gemma-mling-7b", "name": "gemma-mling-7b", "developer": "beomi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2029, "hfopenllm_v2/BBH": 0.4068, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3759, "hfopenllm_v2/MMLU-PRO": 0.2633 } }, { "id": "beowolx/CodeNinja-1.0-OpenChat-7B", "name": "CodeNinja-1.0-OpenChat-7B", "developer": "beowolx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5447, "hfopenllm_v2/BBH": 0.4441, "hfopenllm_v2/MATH Level 5": 0.0672, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4243, "hfopenllm_v2/MMLU-PRO": 0.3015 } }, { "id": "berkeley-nest/Starling-LM-7B-alpha", "name": "Starling-LM-7B-alpha", "developer": "berkeley-nest", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.548, "hfopenllm_v2/BBH": 0.444, "hfopenllm_v2/MATH Level 5": 0.0838, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.412, "hfopenllm_v2/MMLU-PRO": 0.3172 } }, { "id": "berkeley-nest/Starling-RM-7B-alpha", "name": "berkeley-nest/Starling-RM-7B-alpha", "developer": "berkeley-nest", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7113, "reward-bench/Chat": 0.9804, "reward-bench/Chat Hard": 0.4561, "reward-bench/Safety": 0.8446, "reward-bench/Reasoning": 0.58, "reward-bench/Prior Sets (0.5 weight)": 0.6794 } }, { "id": "bfuzzy1/acheron", "name": "acheron", "developer": "bfuzzy1", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1983, "hfopenllm_v2/BBH": 0.3108, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2391, "hfopenllm_v2/MUSR": 0.3511, "hfopenllm_v2/MMLU-PRO": 0.1096 } }, { "id": "bfuzzy1/acheron-c", "name": "acheron-c", "developer": "bfuzzy1", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1929, "hfopenllm_v2/BBH": 0.3026, "hfopenllm_v2/MATH Level 5": 0.003, "hfopenllm_v2/GPQA": 0.2475, "hfopenllm_v2/MUSR": 0.3382, "hfopenllm_v2/MMLU-PRO": 0.1172 } }, { "id": "bfuzzy1/acheron-d", "name": "acheron-d", "developer": "bfuzzy1", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1925, "hfopenllm_v2/BBH": 0.314, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2366, "hfopenllm_v2/MUSR": 0.3497, "hfopenllm_v2/MMLU-PRO": 0.1134 } }, { "id": "bfuzzy1/acheron-m", "name": "acheron-m", "developer": "bfuzzy1", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1758, "hfopenllm_v2/BBH": 0.2928, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3487, "hfopenllm_v2/MMLU-PRO": 0.1113 } }, { "id": "bfuzzy1/acheron-m1a-llama", "name": "acheron-m1a-llama", "developer": "bfuzzy1", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1125, "hfopenllm_v2/BBH": 0.2956, "hfopenllm_v2/MATH Level 5": 0.0076, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3633, "hfopenllm_v2/MMLU-PRO": 0.1146 } }, { "id": "bfuzzy1/Gunny", "name": "Gunny", "developer": "bfuzzy1", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7129, "hfopenllm_v2/BBH": 0.4546, "hfopenllm_v2/MATH Level 5": 0.173, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.3583, "hfopenllm_v2/MMLU-PRO": 0.3039 } }, { "id": "bfuzzy1/llambses-1", "name": "llambses-1", "developer": "bfuzzy1", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3554, "hfopenllm_v2/BBH": 0.5047, "hfopenllm_v2/MATH Level 5": 0.0687, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4529, "hfopenllm_v2/MMLU-PRO": 0.314 } }, { "id": "bhuvneshsaini/merged_model", "name": "merged_model", "developer": "bhuvneshsaini", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1813, "hfopenllm_v2/BBH": 0.336, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3497, "hfopenllm_v2/MMLU-PRO": 0.1445 } }, { "id": "bigcode/starcoder2-15b", "name": "starcoder2-15b", "developer": "bigcode", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.278, "hfopenllm_v2/BBH": 0.4448, "hfopenllm_v2/MATH Level 5": 0.0597, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3501, "hfopenllm_v2/MMLU-PRO": 0.2353 } }, { "id": "bigcode/starcoder2-3b", "name": "starcoder2-3b", "developer": "bigcode", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2037, "hfopenllm_v2/BBH": 0.3509, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2441, "hfopenllm_v2/MUSR": 0.3435, "hfopenllm_v2/MMLU-PRO": 0.1636 } }, { "id": "bigcode/starcoder2-7b", "name": "starcoder2-7b", "developer": "bigcode", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2209, "hfopenllm_v2/BBH": 0.3661, "hfopenllm_v2/MATH Level 5": 0.031, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3793, "hfopenllm_v2/MMLU-PRO": 0.1642 } }, { "id": "bigscience/BLOOM-176B", "name": "BLOOM 176B", "developer": "bigscience", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.446, "helm_classic/MMLU": 0.299, "helm_classic/BoolQ": 0.704, "helm_classic/NarrativeQA": 0.662, "helm_classic/NaturalQuestions (open-book)": 0.621, "helm_classic/QuAC": 0.361, "helm_classic/HellaSwag": 0.744, "helm_classic/OpenbookQA": 0.534, "helm_classic/TruthfulQA": 0.205, "helm_classic/MS MARCO (TREC)": 0.386, "helm_classic/CNN/DailyMail": 0.08, "helm_classic/XSUM": 0.03, "helm_classic/IMDB": 0.945, "helm_classic/CivilComments": 0.62, "helm_classic/RAFT": 0.592 } }, { "id": "bigscience/bloom-1b1", "name": "bloom-1b1", "developer": "bigscience", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1373, "hfopenllm_v2/BBH": 0.3107, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.37, "hfopenllm_v2/MMLU-PRO": 0.1108 } }, { "id": "bigscience/bloom-1b7", "name": "bloom-1b7", "developer": "bigscience", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1044, "hfopenllm_v2/BBH": 0.3141, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3886, "hfopenllm_v2/MMLU-PRO": 0.1086 } }, { "id": "bigscience/bloom-3b", "name": "bloom-3b", "developer": "bigscience", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1271, "hfopenllm_v2/BBH": 0.3063, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2399, "hfopenllm_v2/MUSR": 0.3981, "hfopenllm_v2/MMLU-PRO": 0.1133 } }, { "id": "bigscience/bloom-560m", "name": "bloom-560m", "developer": "bigscience", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.062, "hfopenllm_v2/BBH": 0.3026, "hfopenllm_v2/MATH Level 5": 0.0038, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.4031, "hfopenllm_v2/MMLU-PRO": 0.1164 } }, { "id": "bigscience/bloom-7b1", "name": "bloom-7b1", "developer": "bigscience", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1322, "hfopenllm_v2/BBH": 0.3114, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3487, "hfopenllm_v2/MMLU-PRO": 0.1105 } }, { "id": "bigscience/T0pp-11B", "name": "T0pp 11B", "developer": "bigscience", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.197, "helm_classic/MMLU": 0.407, "helm_classic/BoolQ": 0.0, "helm_classic/NarrativeQA": 0.151, "helm_classic/NaturalQuestions (open-book)": 0.19, "helm_classic/QuAC": 0.121, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.377, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": 0.122, "helm_classic/XSUM": 0.09, "helm_classic/IMDB": 0.207, "helm_classic/CivilComments": 0.234, "helm_classic/RAFT": 0.118 } }, { "id": "bittensor/bitagent-bounty-8b", "name": "BitAgent-Bounty-8B", "developer": "bittensor", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 36.0, "bfcl/bfcl.overall.overall_accuracy": 46.23, "bfcl/bfcl.overall.total_cost_usd": 18.02, "bfcl/bfcl.overall.latency_mean_s": 16.52, "bfcl/bfcl.overall.latency_std_s": 30.73, "bfcl/bfcl.overall.latency_p95_s": 77.12, "bfcl/bfcl.non_live.ast_accuracy": 81.6, "bfcl/bfcl.non_live.simple_ast_accuracy": 72.42, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 83.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 78.0, "bfcl/bfcl.live.live_accuracy": 93.12, "bfcl/bfcl.live.live_simple_ast_accuracy": 90.31, "bfcl/bfcl.live.live_multiple_ast_accuracy": 94.02, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 95.83, "bfcl/bfcl.multi_turn.accuracy": 62.38, "bfcl/bfcl.multi_turn.base_accuracy": 75.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 49.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 68.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 57.0, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 1.51, "bfcl/bfcl.memory.kv_accuracy": 1.29, "bfcl/bfcl.memory.vector_accuracy": 1.29, "bfcl/bfcl.memory.recursive_summarization_accuracy": 1.94, "bfcl/bfcl.relevance.relevance_detection_accuracy": 68.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 97.48 } }, { "id": "BlackBeenie/Bloslain-8B-v0.2", "name": "Bloslain-8B-v0.2", "developer": "BlackBeenie", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5023, "hfopenllm_v2/BBH": 0.5111, "hfopenllm_v2/MATH Level 5": 0.145, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4076, "hfopenllm_v2/MMLU-PRO": 0.3654 } }, { "id": "BlackBeenie/llama-3-luminous-merged", "name": "llama-3-luminous-merged", "developer": "BlackBeenie", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4323, "hfopenllm_v2/BBH": 0.5154, "hfopenllm_v2/MATH Level 5": 0.0869, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4149, "hfopenllm_v2/MMLU-PRO": 0.3773 } }, { "id": "BlackBeenie/llama-3.1-8B-Galore-openassistant-guanaco", "name": "llama-3.1-8B-Galore-openassistant-guanaco", "developer": "BlackBeenie", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2635, "hfopenllm_v2/BBH": 0.5213, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4406, "hfopenllm_v2/MMLU-PRO": 0.3206 } }, { "id": "BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1", "name": "Llama-3.1-8B-OpenO1-SFT-v0.1", "developer": "BlackBeenie", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5124, "hfopenllm_v2/BBH": 0.4787, "hfopenllm_v2/MATH Level 5": 0.1526, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3618, "hfopenllm_v2/MMLU-PRO": 0.3492 } }, { "id": "BlackBeenie/Llama-3.1-8B-pythonic-passthrough-merge", "name": "Llama-3.1-8B-pythonic-passthrough-merge", "developer": "BlackBeenie", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2316, "hfopenllm_v2/BBH": 0.3454, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3778, "hfopenllm_v2/MMLU-PRO": 0.1332 } }, { "id": "BlackBeenie/Neos-Gemma-2-9b", "name": "Neos-Gemma-2-9b", "developer": "BlackBeenie", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5876, "hfopenllm_v2/BBH": 0.5503, "hfopenllm_v2/MATH Level 5": 0.0982, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.3618, "hfopenllm_v2/MMLU-PRO": 0.3981 } }, { "id": "BlackBeenie/Neos-Llama-3.1-8B", "name": "Neos-Llama-3.1-8B", "developer": "BlackBeenie", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4944, "hfopenllm_v2/BBH": 0.4425, "hfopenllm_v2/MATH Level 5": 0.1322, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.375, "hfopenllm_v2/MMLU-PRO": 0.3262 } }, { "id": "BlackBeenie/Neos-Llama-3.1-base", "name": "Neos-Llama-3.1-base", "developer": "BlackBeenie", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1751, "hfopenllm_v2/BBH": 0.293, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2374, "hfopenllm_v2/MUSR": 0.3499, "hfopenllm_v2/MMLU-PRO": 0.1112 } }, { "id": "BlackBeenie/Neos-Phi-3-14B-v0.1", "name": "Neos-Phi-3-14B-v0.1", "developer": "BlackBeenie", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4022, "hfopenllm_v2/BBH": 0.6212, "hfopenllm_v2/MATH Level 5": 0.1782, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4125, "hfopenllm_v2/MMLU-PRO": 0.4564 } }, { "id": "Bllossom/llama-3.2-Korean-Bllossom-AICA-5B", "name": "llama-3.2-Korean-Bllossom-AICA-5B", "developer": "Bllossom", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5172, "hfopenllm_v2/BBH": 0.4293, "hfopenllm_v2/MATH Level 5": 0.1239, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.3834, "hfopenllm_v2/MMLU-PRO": 0.271 } }, { "id": "bluuwhale/L3-SthenoMaid-8B-V1", "name": "L3-SthenoMaid-8B-V1", "developer": "bluuwhale", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7345, "hfopenllm_v2/BBH": 0.5219, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3687, "hfopenllm_v2/MMLU-PRO": 0.3656 } }, { "id": "BoltMonkey/DreadMix", "name": "DreadMix", "developer": "BoltMonkey", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7095, "hfopenllm_v2/BBH": 0.5435, "hfopenllm_v2/MATH Level 5": 0.1556, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4212, "hfopenllm_v2/MMLU-PRO": 0.379 } }, { "id": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", "name": "NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated", "developer": "BoltMonkey", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7999, "hfopenllm_v2/BBH": 0.5152, "hfopenllm_v2/MATH Level 5": 0.1193, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4019, "hfopenllm_v2/MMLU-PRO": 0.3733 } }, { "id": "BoltMonkey/SuperNeuralDreadDevil-8b", "name": "SuperNeuralDreadDevil-8b", "developer": "BoltMonkey", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.771, "hfopenllm_v2/BBH": 0.5286, "hfopenllm_v2/MATH Level 5": 0.0929, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.3977, "hfopenllm_v2/MMLU-PRO": 0.3679 } }, { "id": "bond005/meno-tiny-0.1", "name": "meno-tiny-0.1", "developer": "bond005", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.455, "hfopenllm_v2/BBH": 0.4263, "hfopenllm_v2/MATH Level 5": 0.139, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4185, "hfopenllm_v2/MMLU-PRO": 0.2786 } }, { "id": "bosonai/Higgs-Llama-3-70B", "name": "Higgs-Llama-3-70B", "developer": "bosonai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5561, "hfopenllm_v2/BBH": 0.6258, "hfopenllm_v2/MATH Level 5": 0.2523, "hfopenllm_v2/GPQA": 0.3666, "hfopenllm_v2/MUSR": 0.4471, "hfopenllm_v2/MMLU-PRO": 0.4902 } }, { "id": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt", "name": "DeepSeek-R1-Distill-Qwen-1.5B-Blunt", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2611, "hfopenllm_v2/BBH": 0.2774, "hfopenllm_v2/MATH Level 5": 0.1382, "hfopenllm_v2/GPQA": 0.2475, "hfopenllm_v2/MUSR": 0.3595, "hfopenllm_v2/MMLU-PRO": 0.1184 } }, { "id": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective", "name": "DeepSeek-R1-Distill-Qwen-1.5B-Reflective", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3033, "hfopenllm_v2/BBH": 0.2908, "hfopenllm_v2/MATH Level 5": 0.1631, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3356, "hfopenllm_v2/MMLU-PRO": 0.113 } }, { "id": "braindao/DeepSeek-R1-Distill-Qwen-14B", "name": "DeepSeek-R1-Distill-Qwen-14B", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4172, "hfopenllm_v2/BBH": 0.3033, "hfopenllm_v2/MATH Level 5": 0.176, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.4488, "hfopenllm_v2/MMLU-PRO": 0.1127 } }, { "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST", "name": "DeepSeek-R1-Distill-Qwen-14B-ABUB-ST", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3752, "hfopenllm_v2/BBH": 0.4927, "hfopenllm_v2/MATH Level 5": 0.5015, "hfopenllm_v2/GPQA": 0.3448, "hfopenllm_v2/MUSR": 0.4221, "hfopenllm_v2/MMLU-PRO": 0.4243 } }, { "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt", "name": "DeepSeek-R1-Distill-Qwen-14B-Blunt", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5612, "hfopenllm_v2/BBH": 0.3283, "hfopenllm_v2/MATH Level 5": 0.1639, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4554, "hfopenllm_v2/MMLU-PRO": 0.1447 } }, { "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored", "name": "DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5422, "hfopenllm_v2/BBH": 0.317, "hfopenllm_v2/MATH Level 5": 0.1631, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4487, "hfopenllm_v2/MMLU-PRO": 0.1431 } }, { "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt", "name": "DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5221, "hfopenllm_v2/BBH": 0.3199, "hfopenllm_v2/MATH Level 5": 0.2508, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.4527, "hfopenllm_v2/MMLU-PRO": 0.1484 } }, { "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective", "name": "DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.554, "hfopenllm_v2/BBH": 0.3371, "hfopenllm_v2/MATH Level 5": 0.2372, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4248, "hfopenllm_v2/MMLU-PRO": 0.1504 } }, { "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective", "name": "DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5139, "hfopenllm_v2/BBH": 0.3013, "hfopenllm_v2/MATH Level 5": 0.1473, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.4433, "hfopenllm_v2/MMLU-PRO": 0.1289 } }, { "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective", "name": "DeepSeek-R1-Distill-Qwen-14B-Reflective", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.429, "hfopenllm_v2/BBH": 0.3012, "hfopenllm_v2/MATH Level 5": 0.1918, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.4554, "hfopenllm_v2/MMLU-PRO": 0.1129 } }, { "id": "braindao/DeepSeek-R1-Distill-Qwen-7B", "name": "DeepSeek-R1-Distill-Qwen-7B", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3968, "hfopenllm_v2/BBH": 0.2887, "hfopenllm_v2/MATH Level 5": 0.1918, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3767, "hfopenllm_v2/MMLU-PRO": 0.1141 } }, { "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt", "name": "DeepSeek-R1-Distill-Qwen-7B-Blunt", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4266, "hfopenllm_v2/BBH": 0.2902, "hfopenllm_v2/MATH Level 5": 0.2145, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3885, "hfopenllm_v2/MMLU-PRO": 0.1169 } }, { "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored", "name": "DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3655, "hfopenllm_v2/BBH": 0.2958, "hfopenllm_v2/MATH Level 5": 0.1737, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3846, "hfopenllm_v2/MMLU-PRO": 0.1133 } }, { "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective", "name": "DeepSeek-R1-Distill-Qwen-7B-Reflective", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3922, "hfopenllm_v2/BBH": 0.2907, "hfopenllm_v2/MATH Level 5": 0.2024, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.39, "hfopenllm_v2/MMLU-PRO": 0.1155 } }, { "id": "braindao/iq-code-evmind-0.5b", "name": "iq-code-evmind-0.5b", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3216, "hfopenllm_v2/BBH": 0.3164, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.2416, "hfopenllm_v2/MUSR": 0.3304, "hfopenllm_v2/MMLU-PRO": 0.1189 } }, { "id": "braindao/Qwen2.5-14B", "name": "Qwen2.5-14B", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5409, "hfopenllm_v2/BBH": 0.5853, "hfopenllm_v2/MATH Level 5": 0.2923, "hfopenllm_v2/GPQA": 0.3733, "hfopenllm_v2/MUSR": 0.4124, "hfopenllm_v2/MMLU-PRO": 0.4884 } }, { "id": "braindao/Qwen2.5-14B-Instruct", "name": "Qwen2.5-14B-Instruct", "developer": "braindao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8143, "hfopenllm_v2/BBH": 0.6404, "hfopenllm_v2/MATH Level 5": 0.5529, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.414, "hfopenllm_v2/MMLU-PRO": 0.4889 } }, { "id": "BrainWave-ML/llama3.2-3B-maths-orpo", "name": "llama3.2-3B-maths-orpo", "developer": "BrainWave-ML", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2049, "hfopenllm_v2/BBH": 0.2912, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3575, "hfopenllm_v2/MMLU-PRO": 0.1168 } }, { "id": "BramVanroy/fietje-2", "name": "fietje-2", "developer": "BramVanroy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2098, "hfopenllm_v2/BBH": 0.4036, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3696, "hfopenllm_v2/MMLU-PRO": 0.1986 } }, { "id": "BramVanroy/fietje-2-chat", "name": "fietje-2-chat", "developer": "BramVanroy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2917, "hfopenllm_v2/BBH": 0.415, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2399, "hfopenllm_v2/MUSR": 0.3528, "hfopenllm_v2/MMLU-PRO": 0.2055 } }, { "id": "BramVanroy/fietje-2-instruct", "name": "fietje-2-instruct", "developer": "BramVanroy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.279, "hfopenllm_v2/BBH": 0.4136, "hfopenllm_v2/MATH Level 5": 0.0227, "hfopenllm_v2/GPQA": 0.2332, "hfopenllm_v2/MUSR": 0.3369, "hfopenllm_v2/MMLU-PRO": 0.2104 } }, { "id": "BramVanroy/GEITje-7B-ultra", "name": "GEITje-7B-ultra", "developer": "BramVanroy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3723, "hfopenllm_v2/BBH": 0.3776, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.329, "hfopenllm_v2/MMLU-PRO": 0.2011 } }, { "id": "brgx53/3Bgeneral-ECE-PRYMMAL-Martial", "name": "3Bgeneral-ECE-PRYMMAL-Martial", "developer": "brgx53", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3289, "hfopenllm_v2/BBH": 0.5458, "hfopenllm_v2/MATH Level 5": 0.1314, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4373, "hfopenllm_v2/MMLU-PRO": 0.3934 } }, { "id": "brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial", "name": "3Bgeneralv2-ECE-PRYMMAL-Martial", "developer": "brgx53", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5677, "hfopenllm_v2/BBH": 0.5607, "hfopenllm_v2/MATH Level 5": 0.3497, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4356, "hfopenllm_v2/MMLU-PRO": 0.4505 } }, { "id": "brgx53/3Blareneg-ECE-PRYMMAL-Martial", "name": "3Blareneg-ECE-PRYMMAL-Martial", "developer": "brgx53", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2876, "hfopenllm_v2/BBH": 0.5358, "hfopenllm_v2/MATH Level 5": 0.1208, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.4429, "hfopenllm_v2/MMLU-PRO": 0.4016 } }, { "id": "brgx53/3Blarenegv2-ECE-PRYMMAL-Martial", "name": "3Blarenegv2-ECE-PRYMMAL-Martial", "developer": "brgx53", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5662, "hfopenllm_v2/BBH": 0.5607, "hfopenllm_v2/MATH Level 5": 0.3497, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4356, "hfopenllm_v2/MMLU-PRO": 0.4505 } }, { "id": "brgx53/Barracuda-PRYMMAL-ECE-TW3", "name": "Barracuda-PRYMMAL-ECE-TW3", "developer": "brgx53", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.164, "hfopenllm_v2/BBH": 0.3002, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3609, "hfopenllm_v2/MMLU-PRO": 0.1093 } }, { "id": "brgx53/LaConfiance-PRYMMAL-ECE-TW3", "name": "LaConfiance-PRYMMAL-ECE-TW3", "developer": "brgx53", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1579, "hfopenllm_v2/BBH": 0.2962, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3846, "hfopenllm_v2/MMLU-PRO": 0.1146 } }, { "id": "BSC-LT/salamandra-7b", "name": "salamandra-7b", "developer": "BSC-LT", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1367, "hfopenllm_v2/BBH": 0.3517, "hfopenllm_v2/MATH Level 5": 0.0038, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3501, "hfopenllm_v2/MMLU-PRO": 0.1493 } }, { "id": "BSC-LT/salamandra-7b-instruct", "name": "salamandra-7b-instruct", "developer": "BSC-LT", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2451, "hfopenllm_v2/BBH": 0.3851, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.4134, "hfopenllm_v2/MMLU-PRO": 0.1805 } }, { "id": "bunnycore/Best-Mix-Llama-3.1-8B", "name": "Best-Mix-Llama-3.1-8B", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2067, "hfopenllm_v2/BBH": 0.3432, "hfopenllm_v2/MATH Level 5": 0.2054, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.2929, "hfopenllm_v2/MMLU-PRO": 0.1565 } }, { "id": "bunnycore/Blabbertron-1.0", "name": "Blabbertron-1.0", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7433, "hfopenllm_v2/BBH": 0.5497, "hfopenllm_v2/MATH Level 5": 0.4924, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4337, "hfopenllm_v2/MMLU-PRO": 0.4354 } }, { "id": "bunnycore/Blabbertron-1.1", "name": "Blabbertron-1.1", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7265, "hfopenllm_v2/BBH": 0.5534, "hfopenllm_v2/MATH Level 5": 0.4804, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4416, "hfopenllm_v2/MMLU-PRO": 0.4431 } }, { "id": "bunnycore/CyberCore-Qwen-2.1-7B", "name": "CyberCore-Qwen-2.1-7B", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5766, "hfopenllm_v2/BBH": 0.5572, "hfopenllm_v2/MATH Level 5": 0.3588, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4145, "hfopenllm_v2/MMLU-PRO": 0.4445 } }, { "id": "bunnycore/DeepQwen-3B-LCoT-SCE", "name": "DeepQwen-3B-LCoT-SCE", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.449, "hfopenllm_v2/BBH": 0.4512, "hfopenllm_v2/MATH Level 5": 0.247, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3514, "hfopenllm_v2/MMLU-PRO": 0.329 } }, { "id": "bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex", "name": "DeepSeek-R1-Distill-Qwen-7B-RRP-Ex", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3901, "hfopenllm_v2/BBH": 0.3494, "hfopenllm_v2/MATH Level 5": 0.1654, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.3663, "hfopenllm_v2/MMLU-PRO": 0.2508 } }, { "id": "bunnycore/DeepThinker-7B-Sce-v1", "name": "DeepThinker-7B-Sce-v1", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1218, "hfopenllm_v2/BBH": 0.3018, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.4194, "hfopenllm_v2/MMLU-PRO": 0.1123 } }, { "id": "bunnycore/DeepThinker-7B-Sce-v2", "name": "DeepThinker-7B-Sce-v2", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1631, "hfopenllm_v2/BBH": 0.3057, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.4101, "hfopenllm_v2/MMLU-PRO": 0.1146 } }, { "id": "bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct", "name": "FuseCyberMix-Qwen-2.5-7B-Instruct", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7019, "hfopenllm_v2/BBH": 0.5518, "hfopenllm_v2/MATH Level 5": 0.4841, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.402, "hfopenllm_v2/MMLU-PRO": 0.4337 } }, { "id": "bunnycore/FuseQwQen-7B", "name": "FuseQwQen-7B", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7275, "hfopenllm_v2/BBH": 0.5504, "hfopenllm_v2/MATH Level 5": 0.4366, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4217, "hfopenllm_v2/MMLU-PRO": 0.4407 } }, { "id": "bunnycore/FwF-Qwen-7B-0.1", "name": "FwF-Qwen-7B-0.1", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3005, "hfopenllm_v2/BBH": 0.5019, "hfopenllm_v2/MATH Level 5": 0.2764, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3952, "hfopenllm_v2/MMLU-PRO": 0.4061 } }, { "id": "bunnycore/FwF-Qwen-7B-0.2", "name": "FwF-Qwen-7B-0.2", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4479, "hfopenllm_v2/BBH": 0.5596, "hfopenllm_v2/MATH Level 5": 0.426, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4218, "hfopenllm_v2/MMLU-PRO": 0.4382 } }, { "id": "bunnycore/Gemma-2-2B-Smart", "name": "Gemma-2-2B-Smart", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1321, "hfopenllm_v2/BBH": 0.3974, "hfopenllm_v2/MATH Level 5": 0.0332, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4249, "hfopenllm_v2/MMLU-PRO": 0.2426 } }, { "id": "bunnycore/Gemma2-9B-TitanFusion", "name": "Gemma2-9B-TitanFusion", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1618, "hfopenllm_v2/BBH": 0.5712, "hfopenllm_v2/MATH Level 5": 0.077, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.4136, "hfopenllm_v2/MMLU-PRO": 0.396 } }, { "id": "bunnycore/HyperLlama-3.1-8B", "name": "HyperLlama-3.1-8B", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7883, "hfopenllm_v2/BBH": 0.5103, "hfopenllm_v2/MATH Level 5": 0.1828, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.3829, "hfopenllm_v2/MMLU-PRO": 0.3783 } }, { "id": "bunnycore/Llama-3.1-8B-TitanFusion-Mix", "name": "Llama-3.1-8B-TitanFusion-Mix", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4925, "hfopenllm_v2/BBH": 0.5756, "hfopenllm_v2/MATH Level 5": 0.1284, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4317, "hfopenllm_v2/MMLU-PRO": 0.3695 } }, { "id": "bunnycore/Llama-3.1-8B-TitanFusion-v3", "name": "Llama-3.1-8B-TitanFusion-v3", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.481, "hfopenllm_v2/BBH": 0.5262, "hfopenllm_v2/MATH Level 5": 0.142, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4302, "hfopenllm_v2/MMLU-PRO": 0.3806 } }, { "id": "bunnycore/Llama-3.2-3B-All-Mix", "name": "Llama-3.2-3B-All-Mix", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7226, "hfopenllm_v2/BBH": 0.4508, "hfopenllm_v2/MATH Level 5": 0.1503, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3287, "hfopenllm_v2/MMLU-PRO": 0.316 } }, { "id": "bunnycore/Llama-3.2-3B-Bespoke-Thought", "name": "Llama-3.2-3B-Bespoke-Thought", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4113, "hfopenllm_v2/BBH": 0.4522, "hfopenllm_v2/MATH Level 5": 0.1647, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3302, "hfopenllm_v2/MMLU-PRO": 0.311 } }, { "id": "bunnycore/Llama-3.2-3B-Booval", "name": "Llama-3.2-3B-Booval", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6669, "hfopenllm_v2/BBH": 0.4514, "hfopenllm_v2/MATH Level 5": 0.1269, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3394, "hfopenllm_v2/MMLU-PRO": 0.3058 } }, { "id": "bunnycore/Llama-3.2-3B-Deep-Test", "name": "Llama-3.2-3B-Deep-Test", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4652, "hfopenllm_v2/BBH": 0.4531, "hfopenllm_v2/MATH Level 5": 0.1284, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3394, "hfopenllm_v2/MMLU-PRO": 0.3152 } }, { "id": "bunnycore/Llama-3.2-3B-Della", "name": "Llama-3.2-3B-Della", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3561, "hfopenllm_v2/BBH": 0.3683, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3902, "hfopenllm_v2/MMLU-PRO": 0.2128 } }, { "id": "bunnycore/Llama-3.2-3B-Long-Think", "name": "Llama-3.2-3B-Long-Think", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5473, "hfopenllm_v2/BBH": 0.461, "hfopenllm_v2/MATH Level 5": 0.1458, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3396, "hfopenllm_v2/MMLU-PRO": 0.3048 } }, { "id": "bunnycore/Llama-3.2-3B-Mix-Skill", "name": "Llama-3.2-3B-Mix-Skill", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6404, "hfopenllm_v2/BBH": 0.4582, "hfopenllm_v2/MATH Level 5": 0.1473, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3396, "hfopenllm_v2/MMLU-PRO": 0.3121 } }, { "id": "bunnycore/Llama-3.2-3B-ProdigyPlus", "name": "Llama-3.2-3B-ProdigyPlus", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4015, "hfopenllm_v2/BBH": 0.4392, "hfopenllm_v2/MATH Level 5": 0.1156, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.358, "hfopenllm_v2/MMLU-PRO": 0.2817 } }, { "id": "bunnycore/Llama-3.2-3B-ProdigyPlusPlus", "name": "Llama-3.2-3B-ProdigyPlusPlus", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1645, "hfopenllm_v2/BBH": 0.369, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3541, "hfopenllm_v2/MMLU-PRO": 0.15 } }, { "id": "bunnycore/Llama-3.2-3B-RP-DeepThink", "name": "Llama-3.2-3B-RP-DeepThink", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7144, "hfopenllm_v2/BBH": 0.4563, "hfopenllm_v2/MATH Level 5": 0.1609, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3302, "hfopenllm_v2/MMLU-PRO": 0.3242 } }, { "id": "bunnycore/Llama-3.2-3b-RP-Toxic-Fuse", "name": "Llama-3.2-3b-RP-Toxic-Fuse", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6834, "hfopenllm_v2/BBH": 0.465, "hfopenllm_v2/MATH Level 5": 0.2402, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3954, "hfopenllm_v2/MMLU-PRO": 0.3106 } }, { "id": "bunnycore/Llama-3.2-3B-RRStock", "name": "Llama-3.2-3B-RRStock", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6657, "hfopenllm_v2/BBH": 0.4568, "hfopenllm_v2/MATH Level 5": 0.1699, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3314, "hfopenllm_v2/MMLU-PRO": 0.3236 } }, { "id": "bunnycore/Llama-3.2-3B-ToxicKod", "name": "Llama-3.2-3B-ToxicKod", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6319, "hfopenllm_v2/BBH": 0.4525, "hfopenllm_v2/MATH Level 5": 0.1699, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3475, "hfopenllm_v2/MMLU-PRO": 0.288 } }, { "id": "bunnycore/Maestro-S1k-7B-Sce", "name": "Maestro-S1k-7B-Sce", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2523, "hfopenllm_v2/BBH": 0.3104, "hfopenllm_v2/MATH Level 5": 0.0279, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3768, "hfopenllm_v2/MMLU-PRO": 0.117 } }, { "id": "bunnycore/Phi-3.5-mini-TitanFusion-0.1", "name": "Phi-3.5-mini-TitanFusion-0.1", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5228, "hfopenllm_v2/BBH": 0.5374, "hfopenllm_v2/MATH Level 5": 0.1186, "hfopenllm_v2/GPQA": 0.3314, "hfopenllm_v2/MUSR": 0.4453, "hfopenllm_v2/MMLU-PRO": 0.3807 } }, { "id": "bunnycore/Phi-4-Model-Stock", "name": "Phi-4-Model-Stock", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6879, "hfopenllm_v2/BBH": 0.689, "hfopenllm_v2/MATH Level 5": 0.4298, "hfopenllm_v2/GPQA": 0.3549, "hfopenllm_v2/MUSR": 0.4441, "hfopenllm_v2/MMLU-PRO": 0.5368 } }, { "id": "bunnycore/Phi-4-Model-Stock-v2", "name": "Phi-4-Model-Stock-v2", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6375, "hfopenllm_v2/BBH": 0.6825, "hfopenllm_v2/MATH Level 5": 0.3754, "hfopenllm_v2/GPQA": 0.349, "hfopenllm_v2/MUSR": 0.4662, "hfopenllm_v2/MMLU-PRO": 0.5331 } }, { "id": "bunnycore/Phi-4-Model-Stock-v3", "name": "Phi-4-Model-Stock-v3", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5912, "hfopenllm_v2/BBH": 0.6726, "hfopenllm_v2/MATH Level 5": 0.4902, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4166, "hfopenllm_v2/MMLU-PRO": 0.5381 } }, { "id": "bunnycore/Phi-4-Model-Stock-v4", "name": "Phi-4-Model-Stock-v4", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.711, "hfopenllm_v2/BBH": 0.6924, "hfopenllm_v2/MATH Level 5": 0.3829, "hfopenllm_v2/GPQA": 0.3691, "hfopenllm_v2/MUSR": 0.4611, "hfopenllm_v2/MMLU-PRO": 0.5394 } }, { "id": "bunnycore/Phi-4-ReasoningRP", "name": "Phi-4-ReasoningRP", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6736, "hfopenllm_v2/BBH": 0.6922, "hfopenllm_v2/MATH Level 5": 0.4569, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.4491, "hfopenllm_v2/MMLU-PRO": 0.5421 } }, { "id": "bunnycore/Phi-4-RP-v0", "name": "Phi-4-RP-v0", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6827, "hfopenllm_v2/BBH": 0.6856, "hfopenllm_v2/MATH Level 5": 0.3316, "hfopenllm_v2/GPQA": 0.3523, "hfopenllm_v2/MUSR": 0.4141, "hfopenllm_v2/MMLU-PRO": 0.5364 } }, { "id": "bunnycore/Phi-4-RR-Shoup", "name": "Phi-4-RR-Shoup", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6587, "hfopenllm_v2/BBH": 0.6947, "hfopenllm_v2/MATH Level 5": 0.4992, "hfopenllm_v2/GPQA": 0.3372, "hfopenllm_v2/MUSR": 0.444, "hfopenllm_v2/MMLU-PRO": 0.5429 } }, { "id": "bunnycore/Phi-4-RStock-v0.1", "name": "Phi-4-RStock-v0.1", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7019, "hfopenllm_v2/BBH": 0.6928, "hfopenllm_v2/MATH Level 5": 0.395, "hfopenllm_v2/GPQA": 0.3649, "hfopenllm_v2/MUSR": 0.4584, "hfopenllm_v2/MMLU-PRO": 0.5401 } }, { "id": "bunnycore/Phi-4-Sce-exp-v0.1", "name": "Phi-4-Sce-exp-v0.1", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6595, "hfopenllm_v2/BBH": 0.6943, "hfopenllm_v2/MATH Level 5": 0.503, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.4441, "hfopenllm_v2/MMLU-PRO": 0.5423 } }, { "id": "bunnycore/Phi-4-Stock-Ex", "name": "Phi-4-Stock-Ex", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6575, "hfopenllm_v2/BBH": 0.6864, "hfopenllm_v2/MATH Level 5": 0.4086, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.4624, "hfopenllm_v2/MMLU-PRO": 0.5375 } }, { "id": "bunnycore/Phi-4-Stock-RP", "name": "Phi-4-Stock-RP", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6399, "hfopenllm_v2/BBH": 0.686, "hfopenllm_v2/MATH Level 5": 0.3414, "hfopenllm_v2/GPQA": 0.3582, "hfopenllm_v2/MUSR": 0.4715, "hfopenllm_v2/MMLU-PRO": 0.5317 } }, { "id": "bunnycore/Phi-4-Trim-Exp1", "name": "Phi-4-Trim-Exp1", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1219, "hfopenllm_v2/BBH": 0.2852, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.255, "hfopenllm_v2/MUSR": 0.4177, "hfopenllm_v2/MMLU-PRO": 0.1147 } }, { "id": "bunnycore/Phi-Seek-4-Sce-V1", "name": "Phi-Seek-4-Sce-V1", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2935, "hfopenllm_v2/BBH": 0.6459, "hfopenllm_v2/MATH Level 5": 0.2145, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3982, "hfopenllm_v2/MMLU-PRO": 0.5123 } }, { "id": "bunnycore/Qandora-2.5-7B-Creative", "name": "Qandora-2.5-7B-Creative", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6803, "hfopenllm_v2/BBH": 0.5542, "hfopenllm_v2/MATH Level 5": 0.3059, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4212, "hfopenllm_v2/MMLU-PRO": 0.448 } }, { "id": "bunnycore/QandoraExp-7B", "name": "QandoraExp-7B", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7509, "hfopenllm_v2/BBH": 0.5478, "hfopenllm_v2/MATH Level 5": 0.4743, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4312, "hfopenllm_v2/MMLU-PRO": 0.441 } }, { "id": "bunnycore/QandoraExp-7B-Persona", "name": "QandoraExp-7B-Persona", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6247, "hfopenllm_v2/BBH": 0.5558, "hfopenllm_v2/MATH Level 5": 0.3104, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4372, "hfopenllm_v2/MMLU-PRO": 0.4407 } }, { "id": "bunnycore/QandoraExp-7B-v2", "name": "QandoraExp-7B-v2", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5607, "hfopenllm_v2/BBH": 0.5445, "hfopenllm_v2/MATH Level 5": 0.4713, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4045, "hfopenllm_v2/MMLU-PRO": 0.3909 } }, { "id": "bunnycore/Qwen-2.5-7B-Deep-Sky-T1", "name": "Qwen-2.5-7B-Deep-Sky-T1", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4208, "hfopenllm_v2/BBH": 0.414, "hfopenllm_v2/MATH Level 5": 0.0551, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4018, "hfopenllm_v2/MMLU-PRO": 0.2104 } }, { "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v1", "name": "Qwen-2.5-7B-Deep-Stock-v1", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5695, "hfopenllm_v2/BBH": 0.5361, "hfopenllm_v2/MATH Level 5": 0.2644, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4109, "hfopenllm_v2/MMLU-PRO": 0.4066 } }, { "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v4", "name": "Qwen-2.5-7B-Deep-Stock-v4", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7753, "hfopenllm_v2/BBH": 0.5453, "hfopenllm_v2/MATH Level 5": 0.4894, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4127, "hfopenllm_v2/MMLU-PRO": 0.4342 } }, { "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v5", "name": "Qwen-2.5-7B-Deep-Stock-v5", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4509, "hfopenllm_v2/BBH": 0.4672, "hfopenllm_v2/MATH Level 5": 0.1473, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3648, "hfopenllm_v2/MMLU-PRO": 0.2832 } }, { "id": "bunnycore/Qwen-2.5-7B-Exp-Sce", "name": "Qwen-2.5-7B-Exp-Sce", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7652, "hfopenllm_v2/BBH": 0.5506, "hfopenllm_v2/MATH Level 5": 0.3255, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.443, "hfopenllm_v2/MMLU-PRO": 0.4259 } }, { "id": "bunnycore/Qwen-2.5-7B-R1-Stock", "name": "Qwen-2.5-7B-R1-Stock", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7573, "hfopenllm_v2/BBH": 0.5393, "hfopenllm_v2/MATH Level 5": 0.5008, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.3994, "hfopenllm_v2/MMLU-PRO": 0.4294 } }, { "id": "bunnycore/Qwen-2.5-7b-S1k", "name": "Qwen-2.5-7b-S1k", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7162, "hfopenllm_v2/BBH": 0.5563, "hfopenllm_v2/MATH Level 5": 0.4781, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4071, "hfopenllm_v2/MMLU-PRO": 0.4382 } }, { "id": "bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke", "name": "Qwen-2.5-7B-Stock-Deep-Bespoke", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5206, "hfopenllm_v2/BBH": 0.492, "hfopenllm_v2/MATH Level 5": 0.1888, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4068, "hfopenllm_v2/MMLU-PRO": 0.358 } }, { "id": "bunnycore/Qwen2.5-1.5B-Model-Stock", "name": "Qwen2.5-1.5B-Model-Stock", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1829, "hfopenllm_v2/BBH": 0.2874, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3674, "hfopenllm_v2/MMLU-PRO": 0.11 } }, { "id": "bunnycore/Qwen2.5-3B-Model-Stock", "name": "Qwen2.5-3B-Model-Stock", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6381, "hfopenllm_v2/BBH": 0.4712, "hfopenllm_v2/MATH Level 5": 0.3799, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.3942, "hfopenllm_v2/MMLU-PRO": 0.325 } }, { "id": "bunnycore/Qwen2.5-3B-Model-Stock-v2", "name": "Qwen2.5-3B-Model-Stock-v2", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.649, "hfopenllm_v2/BBH": 0.4677, "hfopenllm_v2/MATH Level 5": 0.3867, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.3915, "hfopenllm_v2/MMLU-PRO": 0.327 } }, { "id": "bunnycore/Qwen2.5-3B-Model-Stock-v3.1", "name": "Qwen2.5-3B-Model-Stock-v3.1", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6481, "hfopenllm_v2/BBH": 0.4737, "hfopenllm_v2/MATH Level 5": 0.3897, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.3968, "hfopenllm_v2/MMLU-PRO": 0.329 } }, { "id": "bunnycore/Qwen2.5-3B-Model-Stock-v3.2", "name": "Qwen2.5-3B-Model-Stock-v3.2", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6353, "hfopenllm_v2/BBH": 0.4727, "hfopenllm_v2/MATH Level 5": 0.3754, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3928, "hfopenllm_v2/MMLU-PRO": 0.3294 } }, { "id": "bunnycore/Qwen2.5-3B-Model-Stock-v4.1", "name": "Qwen2.5-3B-Model-Stock-v4.1", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6381, "hfopenllm_v2/BBH": 0.482, "hfopenllm_v2/MATH Level 5": 0.3769, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3941, "hfopenllm_v2/MMLU-PRO": 0.3387 } }, { "id": "bunnycore/Qwen2.5-3B-RP-Mix", "name": "Qwen2.5-3B-RP-Mix", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5721, "hfopenllm_v2/BBH": 0.4894, "hfopenllm_v2/MATH Level 5": 0.2153, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.4284, "hfopenllm_v2/MMLU-PRO": 0.3728 } }, { "id": "bunnycore/Qwen2.5-3B-RP-Thinker", "name": "Qwen2.5-3B-RP-Thinker", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5894, "hfopenllm_v2/BBH": 0.4164, "hfopenllm_v2/MATH Level 5": 0.3353, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3287, "hfopenllm_v2/MMLU-PRO": 0.315 } }, { "id": "bunnycore/Qwen2.5-3B-RP-Thinker-V2", "name": "Qwen2.5-3B-RP-Thinker-V2", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.642, "hfopenllm_v2/BBH": 0.4678, "hfopenllm_v2/MATH Level 5": 0.3829, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.3981, "hfopenllm_v2/MMLU-PRO": 0.3271 } }, { "id": "bunnycore/Qwen2.5-7B-CyberRombos", "name": "Qwen2.5-7B-CyberRombos", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7518, "hfopenllm_v2/BBH": 0.5465, "hfopenllm_v2/MATH Level 5": 0.4962, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4125, "hfopenllm_v2/MMLU-PRO": 0.4391 } }, { "id": "bunnycore/Qwen2.5-7B-Fuse-Exp", "name": "Qwen2.5-7B-Fuse-Exp", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5469, "hfopenllm_v2/BBH": 0.5109, "hfopenllm_v2/MATH Level 5": 0.3142, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.4573, "hfopenllm_v2/MMLU-PRO": 0.3309 } }, { "id": "bunnycore/Qwen2.5-7B-Instruct-Fusion", "name": "Qwen2.5-7B-Instruct-Fusion", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6962, "hfopenllm_v2/BBH": 0.5492, "hfopenllm_v2/MATH Level 5": 0.3406, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4297, "hfopenllm_v2/MMLU-PRO": 0.4467 } }, { "id": "bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1", "name": "Qwen2.5-7B-Instruct-Merge-Stock-v0.1", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7509, "hfopenllm_v2/BBH": 0.5529, "hfopenllm_v2/MATH Level 5": 0.4894, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4231, "hfopenllm_v2/MMLU-PRO": 0.4383 } }, { "id": "bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3", "name": "Qwen2.5-7B-MixStock-Sce-V0.3", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.212, "hfopenllm_v2/BBH": 0.3479, "hfopenllm_v2/MATH Level 5": 0.2576, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3714, "hfopenllm_v2/MMLU-PRO": 0.1779 } }, { "id": "bunnycore/Qwen2.5-7B-MixStock-V0.1", "name": "Qwen2.5-7B-MixStock-V0.1", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7673, "hfopenllm_v2/BBH": 0.5479, "hfopenllm_v2/MATH Level 5": 0.3172, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4416, "hfopenllm_v2/MMLU-PRO": 0.4256 } }, { "id": "bunnycore/Qwen2.5-7B-R1-Bespoke-Stock", "name": "Qwen2.5-7B-R1-Bespoke-Stock", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3726, "hfopenllm_v2/BBH": 0.4822, "hfopenllm_v2/MATH Level 5": 0.2047, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.3926, "hfopenllm_v2/MMLU-PRO": 0.3472 } }, { "id": "bunnycore/Qwen2.5-7B-R1-Bespoke-Task", "name": "Qwen2.5-7B-R1-Bespoke-Task", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3787, "hfopenllm_v2/BBH": 0.415, "hfopenllm_v2/MATH Level 5": 0.1782, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3569, "hfopenllm_v2/MMLU-PRO": 0.2688 } }, { "id": "bunnycore/Qwen2.5-7B-RRP-1M", "name": "Qwen2.5-7B-RRP-1M", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7481, "hfopenllm_v2/BBH": 0.5452, "hfopenllm_v2/MATH Level 5": 0.3248, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4483, "hfopenllm_v2/MMLU-PRO": 0.4266 } }, { "id": "bunnycore/Qwen2.5-7B-RRP-1M-Thinker", "name": "Qwen2.5-7B-RRP-1M-Thinker", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2308, "hfopenllm_v2/BBH": 0.3482, "hfopenllm_v2/MATH Level 5": 0.2719, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3767, "hfopenllm_v2/MMLU-PRO": 0.1769 } }, { "id": "bunnycore/Qwen2.5-7B-RRP-ID", "name": "Qwen2.5-7B-RRP-ID", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7473, "hfopenllm_v2/BBH": 0.548, "hfopenllm_v2/MATH Level 5": 0.4864, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.418, "hfopenllm_v2/MMLU-PRO": 0.4387 } }, { "id": "bunnycore/Qwen2.5-7B-Sky-R1-Mini", "name": "Qwen2.5-7B-Sky-R1-Mini", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2305, "hfopenllm_v2/BBH": 0.3503, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.3448, "hfopenllm_v2/MMLU-PRO": 0.1253 } }, { "id": "bunnycore/QwenMosaic-7B", "name": "QwenMosaic-7B", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5819, "hfopenllm_v2/BBH": 0.5564, "hfopenllm_v2/MATH Level 5": 0.4441, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.4164, "hfopenllm_v2/MMLU-PRO": 0.431 } }, { "id": "bunnycore/QwQen-3B-LCoT", "name": "QwQen-3B-LCoT", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6025, "hfopenllm_v2/BBH": 0.4899, "hfopenllm_v2/MATH Level 5": 0.3618, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.4178, "hfopenllm_v2/MMLU-PRO": 0.3699 } }, { "id": "bunnycore/QwQen-3B-LCoT-R1", "name": "QwQen-3B-LCoT-R1", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5342, "hfopenllm_v2/BBH": 0.4799, "hfopenllm_v2/MATH Level 5": 0.3353, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.4138, "hfopenllm_v2/MMLU-PRO": 0.3723 } }, { "id": "bunnycore/Smol-Llama-3.2-3B", "name": "Smol-Llama-3.2-3B", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6679, "hfopenllm_v2/BBH": 0.4539, "hfopenllm_v2/MATH Level 5": 0.1382, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.346, "hfopenllm_v2/MMLU-PRO": 0.3228 } }, { "id": "bunnycore/SmolLM2-1.7-Persona", "name": "SmolLM2-1.7-Persona", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5465, "hfopenllm_v2/BBH": 0.3623, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3341, "hfopenllm_v2/MMLU-PRO": 0.1974 } }, { "id": "bunnycore/SmolLM2-1.7B-roleplay-lora", "name": "SmolLM2-1.7B-roleplay-lora", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5382, "hfopenllm_v2/BBH": 0.361, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3395, "hfopenllm_v2/MMLU-PRO": 0.1966 } }, { "id": "bunnycore/Tulu-3.1-8B-SuperNova", "name": "Tulu-3.1-8B-SuperNova", "developer": "bunnycore", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8194, "hfopenllm_v2/BBH": 0.5254, "hfopenllm_v2/MATH Level 5": 0.2462, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.3935, "hfopenllm_v2/MMLU-PRO": 0.3814 } }, { "id": "byroneverson/Mistral-Small-Instruct-2409-abliterated", "name": "Mistral-Small-Instruct-2409-abliterated", "developer": "byroneverson", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6971, "hfopenllm_v2/BBH": 0.5238, "hfopenllm_v2/MATH Level 5": 0.2477, "hfopenllm_v2/GPQA": 0.3331, "hfopenllm_v2/MUSR": 0.3697, "hfopenllm_v2/MMLU-PRO": 0.3923 } }, { "id": "byroneverson/Yi-1.5-9B-Chat-16K-abliterated", "name": "Yi-1.5-9B-Chat-16K-abliterated", "developer": "byroneverson", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5528, "hfopenllm_v2/BBH": 0.5282, "hfopenllm_v2/MATH Level 5": 0.1412, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4734, "hfopenllm_v2/MMLU-PRO": 0.3823 } }, { "id": "byroneverson/Yi-1.5-9B-Chat-abliterated", "name": "Yi-1.5-9B-Chat-abliterated", "developer": "byroneverson", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5723, "hfopenllm_v2/BBH": 0.5401, "hfopenllm_v2/MATH Level 5": 0.1662, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4389, "hfopenllm_v2/MMLU-PRO": 0.3715 } }, { "id": "bytedance/doubao-seed-1-6-thinking-250615", "name": "doubao-seed-1-6-thinking-250615", "developer": "ByteDance", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.07042253521126761, "livecodebenchpro/Easy Problems": 0.5774647887323944 } }, { "id": "c10x/longthinker", "name": "longthinker", "developer": "c10x", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3609, "hfopenllm_v2/BBH": 0.4927, "hfopenllm_v2/MATH Level 5": 0.2319, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.391, "hfopenllm_v2/MMLU-PRO": 0.3527 } }, { "id": "c10x/Q-Pluse", "name": "Q-Pluse", "developer": "c10x", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1123, "hfopenllm_v2/BBH": 0.2875, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2466, "hfopenllm_v2/MUSR": 0.3938, "hfopenllm_v2/MMLU-PRO": 0.1135 } }, { "id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct", "name": "Llama-3.2-Rabbit-Ko-3B-Instruct", "developer": "CarrotAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7199, "hfopenllm_v2/BBH": 0.4427, "hfopenllm_v2/MATH Level 5": 0.2054, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3649, "hfopenllm_v2/MMLU-PRO": 0.2822 } }, { "id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412", "name": "Llama-3.2-Rabbit-Ko-3B-Instruct-2412", "developer": "CarrotAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4782, "hfopenllm_v2/BBH": 0.4358, "hfopenllm_v2/MATH Level 5": 0.176, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.3872, "hfopenllm_v2/MMLU-PRO": 0.3134 } }, { "id": "carsenk/flippa-v6", "name": "flippa-v6", "developer": "carsenk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3439, "hfopenllm_v2/BBH": 0.5047, "hfopenllm_v2/MATH Level 5": 0.1405, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4089, "hfopenllm_v2/MMLU-PRO": 0.3668 } }, { "id": "carsenk/phi3.5_mini_exp_825_uncensored", "name": "phi3.5_mini_exp_825_uncensored", "developer": "carsenk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1364, "hfopenllm_v2/BBH": 0.2965, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.3644, "hfopenllm_v2/MMLU-PRO": 0.1175 } }, { "id": "Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B", "name": "L3-Umbral-Mind-RP-v2.0-8B", "developer": "Casual-Autopsy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7123, "hfopenllm_v2/BBH": 0.5262, "hfopenllm_v2/MATH Level 5": 0.1095, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.3687, "hfopenllm_v2/MMLU-PRO": 0.3723 } }, { "id": "cat-searcher/gemma-2-9b-it-sppo-iter-1", "name": "gemma-2-9b-it-sppo-iter-1", "developer": "cat-searcher", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3015, "hfopenllm_v2/BBH": 0.5972, "hfopenllm_v2/MATH Level 5": 0.0831, "hfopenllm_v2/GPQA": 0.3448, "hfopenllm_v2/MUSR": 0.3927, "hfopenllm_v2/MMLU-PRO": 0.3854 } }, { "id": "cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1", "name": "gemma-2-9b-it-sppo-iter-1-evol-1", "developer": "cat-searcher", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2942, "hfopenllm_v2/BBH": 0.5939, "hfopenllm_v2/MATH Level 5": 0.0853, "hfopenllm_v2/GPQA": 0.3406, "hfopenllm_v2/MUSR": 0.3926, "hfopenllm_v2/MMLU-PRO": 0.38 } }, { "id": "CausalLM/14B", "name": "14B", "developer": "CausalLM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2788, "hfopenllm_v2/BBH": 0.47, "hfopenllm_v2/MATH Level 5": 0.0755, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4155, "hfopenllm_v2/MMLU-PRO": 0.3221 } }, { "id": "CausalLM/34b-beta", "name": "34b-beta", "developer": "CausalLM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3043, "hfopenllm_v2/BBH": 0.5591, "hfopenllm_v2/MATH Level 5": 0.0483, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.3749, "hfopenllm_v2/MMLU-PRO": 0.5325 } }, { "id": "CausalLM/preview-1-hf", "name": "preview-1-hf", "developer": "CausalLM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5559, "hfopenllm_v2/BBH": 0.3615, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3422, "hfopenllm_v2/MMLU-PRO": 0.3597 } }, { "id": "cckm/tinymistral_950m", "name": "tinymistral_950m", "developer": "cckm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2395, "hfopenllm_v2/BBH": 0.2969, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3554, "hfopenllm_v2/MMLU-PRO": 0.1096 } }, { "id": "cgato/TheSalt-L3-8b-v0.3.2", "name": "TheSalt-L3-8b-v0.3.2", "developer": "cgato", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2705, "hfopenllm_v2/BBH": 0.2968, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3896, "hfopenllm_v2/MMLU-PRO": 0.1139 } }, { "id": "Changgil/K2S3-14b-v0.2", "name": "K2S3-14b-v0.2", "developer": "Changgil", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3243, "hfopenllm_v2/BBH": 0.4613, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3923, "hfopenllm_v2/MMLU-PRO": 0.2644 } }, { "id": "Changgil/K2S3-v0.1", "name": "K2S3-v0.1", "developer": "Changgil", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3277, "hfopenllm_v2/BBH": 0.4655, "hfopenllm_v2/MATH Level 5": 0.0461, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.4014, "hfopenllm_v2/MMLU-PRO": 0.2562 } }, { "id": "chargoddard/prometheus-2-llama-3-8b", "name": "prometheus-2-llama-3-8b", "developer": "chargoddard", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5289, "hfopenllm_v2/BBH": 0.4931, "hfopenllm_v2/MATH Level 5": 0.0823, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3396, "hfopenllm_v2/MMLU-PRO": 0.3087 } }, { "id": "chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO", "name": "Llama-3-Instruct-8B-SimPO-ExPO", "developer": "chujiezheng", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6434, "hfopenllm_v2/BBH": 0.4765, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.392, "hfopenllm_v2/MMLU-PRO": 0.3401 } }, { "id": "chujiezheng/Mistral7B-PairRM-SPPO-ExPO", "name": "Mistral7B-PairRM-SPPO-ExPO", "developer": "chujiezheng", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3673, "hfopenllm_v2/BBH": 0.3882, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.4055, "hfopenllm_v2/MMLU-PRO": 0.2552 } }, { "id": "CIR-AMS/BTRM_Qwen2_7b_0613", "name": "CIR-AMS/BTRM_Qwen2_7b_0613", "developer": "CIR-AMS", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8172, "reward-bench/Factuality": 0.5347, "reward-bench/Precise IF": 0.3563, "reward-bench/Math": 0.6066, "reward-bench/Safety": 0.9014, "reward-bench/Focus": 0.5737, "reward-bench/Ties": 0.6527, "reward-bench/Chat": 0.9749, "reward-bench/Chat Hard": 0.5724, "reward-bench/Reasoning": 0.8775, "reward-bench/Prior Sets (0.5 weight)": 0.7029 } }, { "id": "cjvt/GaMS-1B", "name": "GaMS-1B", "developer": "cjvt", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1635, "hfopenllm_v2/BBH": 0.3075, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3684, "hfopenllm_v2/MMLU-PRO": 0.1149 } }, { "id": "ClaudioItaly/Albacus", "name": "Albacus", "developer": "ClaudioItaly", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4667, "hfopenllm_v2/BBH": 0.5113, "hfopenllm_v2/MATH Level 5": 0.071, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.4135, "hfopenllm_v2/MMLU-PRO": 0.3165 } }, { "id": "ClaudioItaly/Book-Gut12B", "name": "Book-Gut12B", "developer": "ClaudioItaly", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3998, "hfopenllm_v2/BBH": 0.5417, "hfopenllm_v2/MATH Level 5": 0.102, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4635, "hfopenllm_v2/MMLU-PRO": 0.367 } }, { "id": "ClaudioItaly/Evolutionstory-7B-v2.2", "name": "Evolutionstory-7B-v2.2", "developer": "ClaudioItaly", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4814, "hfopenllm_v2/BBH": 0.5108, "hfopenllm_v2/MATH Level 5": 0.071, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.4135, "hfopenllm_v2/MMLU-PRO": 0.3159 } }, { "id": "ClaudioItaly/intelligence-cod-rag-7b-v3", "name": "intelligence-cod-rag-7b-v3", "developer": "ClaudioItaly", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6898, "hfopenllm_v2/BBH": 0.5366, "hfopenllm_v2/MATH Level 5": 0.3807, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.4153, "hfopenllm_v2/MMLU-PRO": 0.4195 } }, { "id": "cloudyu/Llama-3-70Bx2-MOE", "name": "Llama-3-70Bx2-MOE", "developer": "cloudyu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5482, "hfopenllm_v2/BBH": 0.6636, "hfopenllm_v2/MATH Level 5": 0.2175, "hfopenllm_v2/GPQA": 0.3935, "hfopenllm_v2/MUSR": 0.4812, "hfopenllm_v2/MMLU-PRO": 0.5142 } }, { "id": "cloudyu/Llama-3.2-3Bx4", "name": "Llama-3.2-3Bx4", "developer": "cloudyu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5069, "hfopenllm_v2/BBH": 0.4332, "hfopenllm_v2/MATH Level 5": 0.1073, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3496, "hfopenllm_v2/MMLU-PRO": 0.2985 } }, { "id": "cloudyu/Mixtral_11Bx2_MoE_19B", "name": "Mixtral_11Bx2_MoE_19B", "developer": "cloudyu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3851, "hfopenllm_v2/BBH": 0.5209, "hfopenllm_v2/MATH Level 5": 0.0672, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4297, "hfopenllm_v2/MMLU-PRO": 0.3311 } }, { "id": "cloudyu/Mixtral_34Bx2_MoE_60B", "name": "Mixtral_34Bx2_MoE_60B", "developer": "cloudyu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4538, "hfopenllm_v2/BBH": 0.587, "hfopenllm_v2/MATH Level 5": 0.077, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4625, "hfopenllm_v2/MMLU-PRO": 0.4766 } }, { "id": "cloudyu/Mixtral_7Bx2_MoE", "name": "Mixtral_7Bx2_MoE", "developer": "cloudyu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.448, "hfopenllm_v2/BBH": 0.516, "hfopenllm_v2/MATH Level 5": 0.0687, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4473, "hfopenllm_v2/MMLU-PRO": 0.3044 } }, { "id": "cloudyu/S1-Llama-3.2-3Bx4-MoE", "name": "S1-Llama-3.2-3Bx4-MoE", "developer": "cloudyu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5302, "hfopenllm_v2/BBH": 0.4358, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.3456, "hfopenllm_v2/MMLU-PRO": 0.3044 } }, { "id": "cloudyu/Yi-34Bx2-MoE-60B-DPO", "name": "Yi-34Bx2-MoE-60B-DPO", "developer": "cloudyu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5319, "hfopenllm_v2/BBH": 0.5168, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.4375, "hfopenllm_v2/MMLU-PRO": 0.4677 } }, { "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-ipo", "name": "Llama-3.1-8B-paraphrase-type-generation-apty-ipo", "developer": "cluebbers", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1327, "hfopenllm_v2/BBH": 0.38, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.4332, "hfopenllm_v2/MMLU-PRO": 0.2591 } }, { "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid", "name": "Llama-3.1-8B-paraphrase-type-generation-apty-sigmoid", "developer": "cluebbers", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1318, "hfopenllm_v2/BBH": 0.3789, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.4306, "hfopenllm_v2/MMLU-PRO": 0.2562 } }, { "id": "cluebbers/Llama-3.1-8B-paraphrase-type-generation-etpc", "name": "Llama-3.1-8B-paraphrase-type-generation-etpc", "developer": "cluebbers", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1209, "hfopenllm_v2/BBH": 0.3781, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.4319, "hfopenllm_v2/MMLU-PRO": 0.2556 } }, { "id": "cognitivecomputations/dolphin-2.9-llama3-8b", "name": "dolphin-2.9-llama3-8b", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.385, "hfopenllm_v2/BBH": 0.495, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4375, "hfopenllm_v2/MMLU-PRO": 0.2771 } }, { "id": "cognitivecomputations/dolphin-2.9.1-llama-3-70b", "name": "dolphin-2.9.1-llama-3-70b", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.376, "hfopenllm_v2/BBH": 0.5205, "hfopenllm_v2/MATH Level 5": 0.182, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4976, "hfopenllm_v2/MMLU-PRO": 0.413 } }, { "id": "cognitivecomputations/dolphin-2.9.1-yi-1.5-34b", "name": "dolphin-2.9.1-yi-1.5-34b", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3853, "hfopenllm_v2/BBH": 0.6076, "hfopenllm_v2/MATH Level 5": 0.1866, "hfopenllm_v2/GPQA": 0.3431, "hfopenllm_v2/MUSR": 0.4598, "hfopenllm_v2/MMLU-PRO": 0.4519 } }, { "id": "cognitivecomputations/dolphin-2.9.1-yi-1.5-9b", "name": "dolphin-2.9.1-yi-1.5-9b", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4465, "hfopenllm_v2/BBH": 0.5484, "hfopenllm_v2/MATH Level 5": 0.1518, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4348, "hfopenllm_v2/MMLU-PRO": 0.3967 } }, { "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium", "name": "dolphin-2.9.2-Phi-3-Medium", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4248, "hfopenllm_v2/BBH": 0.6457, "hfopenllm_v2/MATH Level 5": 0.1828, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.4191, "hfopenllm_v2/MMLU-PRO": 0.4555 } }, { "id": "cognitivecomputations/dolphin-2.9.2-Phi-3-Medium-abliterated", "name": "dolphin-2.9.2-Phi-3-Medium-abliterated", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4124, "hfopenllm_v2/BBH": 0.6383, "hfopenllm_v2/MATH Level 5": 0.182, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4349, "hfopenllm_v2/MMLU-PRO": 0.4525 } }, { "id": "cognitivecomputations/dolphin-2.9.2-qwen2-72b", "name": "dolphin-2.9.2-qwen2-72b", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6344, "hfopenllm_v2/BBH": 0.6296, "hfopenllm_v2/MATH Level 5": 0.2802, "hfopenllm_v2/GPQA": 0.37, "hfopenllm_v2/MUSR": 0.4521, "hfopenllm_v2/MMLU-PRO": 0.5471 } }, { "id": "cognitivecomputations/dolphin-2.9.2-qwen2-7b", "name": "dolphin-2.9.2-qwen2-7b", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3535, "hfopenllm_v2/BBH": 0.4894, "hfopenllm_v2/MATH Level 5": 0.1344, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4191, "hfopenllm_v2/MMLU-PRO": 0.4051 } }, { "id": "cognitivecomputations/dolphin-2.9.3-mistral-7B-32k", "name": "dolphin-2.9.3-mistral-7B-32k", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4126, "hfopenllm_v2/BBH": 0.4813, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4643, "hfopenllm_v2/MMLU-PRO": 0.2821 } }, { "id": "cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b", "name": "dolphin-2.9.3-mistral-nemo-12b", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5601, "hfopenllm_v2/BBH": 0.548, "hfopenllm_v2/MATH Level 5": 0.074, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.443, "hfopenllm_v2/MMLU-PRO": 0.3377 } }, { "id": "cognitivecomputations/dolphin-2.9.3-Yi-1.5-34B-32k", "name": "dolphin-2.9.3-Yi-1.5-34B-32k", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3639, "hfopenllm_v2/BBH": 0.6047, "hfopenllm_v2/MATH Level 5": 0.1669, "hfopenllm_v2/GPQA": 0.3431, "hfopenllm_v2/MUSR": 0.4311, "hfopenllm_v2/MMLU-PRO": 0.463 } }, { "id": "cognitivecomputations/dolphin-2.9.4-gemma2-2b", "name": "dolphin-2.9.4-gemma2-2b", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0896, "hfopenllm_v2/BBH": 0.4081, "hfopenllm_v2/MATH Level 5": 0.0491, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.418, "hfopenllm_v2/MMLU-PRO": 0.2105 } }, { "id": "cognitivecomputations/dolphin-2.9.4-llama3.1-8b", "name": "dolphin-2.9.4-llama3.1-8b", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2757, "hfopenllm_v2/BBH": 0.3524, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3236, "hfopenllm_v2/MMLU-PRO": 0.1237 } }, { "id": "cognitivecomputations/Dolphin3.0-Llama3.1-8B", "name": "Dolphin3.0-Llama3.1-8B", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7621, "hfopenllm_v2/BBH": 0.4916, "hfopenllm_v2/MATH Level 5": 0.1231, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.3653, "hfopenllm_v2/MMLU-PRO": 0.2992 } }, { "id": "cognitivecomputations/Dolphin3.0-Llama3.2-1B", "name": "Dolphin3.0-Llama3.2-1B", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5428, "hfopenllm_v2/BBH": 0.3122, "hfopenllm_v2/MATH Level 5": 0.0279, "hfopenllm_v2/GPQA": 0.2299, "hfopenllm_v2/MUSR": 0.3249, "hfopenllm_v2/MMLU-PRO": 0.1375 } }, { "id": "cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B", "name": "Dolphin3.0-Qwen2.5-0.5B", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4697, "hfopenllm_v2/BBH": 0.3114, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.2349, "hfopenllm_v2/MUSR": 0.3555, "hfopenllm_v2/MMLU-PRO": 0.1413 } }, { "id": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B", "name": "Dolphin3.0-R1-Mistral-24B", "developer": "cognitivecomputations", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4068, "hfopenllm_v2/BBH": 0.536, "hfopenllm_v2/MATH Level 5": 0.3119, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3952, "hfopenllm_v2/MMLU-PRO": 0.3005 } }, { "id": "Cohere March 2024", "name": "Cohere March 2024", "developer": "unknown", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8511, "reward-bench/Chat": 0.9469, "reward-bench/Chat Hard": 0.6513, "reward-bench/Safety": 0.877, "reward-bench/Reasoning": 0.9817, "reward-bench/Prior Sets (0.5 weight)": 0.7458 } }, { "id": "Cohere May 2024", "name": "Cohere May 2024", "developer": "unknown", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8816, "reward-bench/Chat": 0.9637, "reward-bench/Chat Hard": 0.7127, "reward-bench/Safety": 0.923, "reward-bench/Reasoning": 0.9768, "reward-bench/Prior Sets (0.5 weight)": 0.782 } }, { "id": "cohere/aya-expanse-32b", "name": "aya-expanse-32b", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.7353, "global-mmlu-lite/Culturally Sensitive": 0.6891, "global-mmlu-lite/Culturally Agnostic": 0.7815, "global-mmlu-lite/Arabic": 0.7425, "global-mmlu-lite/English": 0.7544, "global-mmlu-lite/Bengali": 0.7343, "global-mmlu-lite/German": 0.7425, "global-mmlu-lite/French": 0.7325, "global-mmlu-lite/Hindi": 0.7375, "global-mmlu-lite/Indonesian": 0.7594, "global-mmlu-lite/Italian": 0.7305, "global-mmlu-lite/Japanese": 0.7419, "global-mmlu-lite/Korean": 0.7525, "global-mmlu-lite/Portuguese": 0.7544, "global-mmlu-lite/Spanish": 0.7362, "global-mmlu-lite/Swahili": 0.7071, "global-mmlu-lite/Yoruba": 0.6942, "global-mmlu-lite/Chinese": 0.743, "global-mmlu-lite/Burmese": 0.7025 } }, { "id": "cohere/Cohere-Command-beta-52.4B", "name": "Cohere Command beta 52.4B", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.874, "helm_classic/MMLU": 0.452, "helm_classic/BoolQ": 0.856, "helm_classic/NarrativeQA": 0.752, "helm_classic/NaturalQuestions (open-book)": 0.76, "helm_classic/QuAC": 0.432, "helm_classic/HellaSwag": 0.811, "helm_classic/OpenbookQA": 0.582, "helm_classic/TruthfulQA": 0.269, "helm_classic/MS MARCO (TREC)": 0.762, "helm_classic/CNN/DailyMail": 0.161, "helm_classic/XSUM": 0.152, "helm_classic/IMDB": 0.96, "helm_classic/CivilComments": 0.601, "helm_classic/RAFT": 0.667 } }, { "id": "cohere/Cohere-Command-beta-6.1B", "name": "Cohere Command beta 6.1B", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.675, "helm_classic/MMLU": 0.406, "helm_classic/BoolQ": 0.798, "helm_classic/NarrativeQA": 0.709, "helm_classic/NaturalQuestions (open-book)": 0.717, "helm_classic/QuAC": 0.375, "helm_classic/HellaSwag": 0.752, "helm_classic/OpenbookQA": 0.55, "helm_classic/TruthfulQA": 0.203, "helm_classic/MS MARCO (TREC)": 0.709, "helm_classic/CNN/DailyMail": 0.153, "helm_classic/XSUM": 0.122, "helm_classic/IMDB": 0.961, "helm_classic/CivilComments": 0.54, "helm_classic/RAFT": 0.634 } }, { "id": "cohere/Cohere-large-v20220720-13.1B", "name": "Cohere large v20220720 13.1B", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.372, "helm_classic/MMLU": 0.324, "helm_classic/BoolQ": 0.725, "helm_classic/NarrativeQA": 0.625, "helm_classic/NaturalQuestions (open-book)": 0.573, "helm_classic/QuAC": 0.338, "helm_classic/HellaSwag": 0.736, "helm_classic/OpenbookQA": 0.542, "helm_classic/TruthfulQA": 0.181, "helm_classic/MS MARCO (TREC)": 0.33, "helm_classic/CNN/DailyMail": 0.126, "helm_classic/XSUM": 0.108, "helm_classic/IMDB": 0.933, "helm_classic/CivilComments": 0.507, "helm_classic/RAFT": 0.596 } }, { "id": "cohere/Cohere-medium-v20220720-6.1B", "name": "Cohere medium v20220720 6.1B", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.23, "helm_classic/MMLU": 0.279, "helm_classic/BoolQ": 0.659, "helm_classic/NarrativeQA": 0.559, "helm_classic/NaturalQuestions (open-book)": 0.504, "helm_classic/QuAC": 0.279, "helm_classic/HellaSwag": 0.706, "helm_classic/OpenbookQA": 0.496, "helm_classic/TruthfulQA": 0.19, "helm_classic/MS MARCO (TREC)": 0.374, "helm_classic/CNN/DailyMail": 0.077, "helm_classic/XSUM": 0.087, "helm_classic/IMDB": 0.935, "helm_classic/CivilComments": 0.504, "helm_classic/RAFT": 0.52 } }, { "id": "cohere/Cohere-medium-v20221108-6.1B", "name": "Cohere medium v20221108 6.1B", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.312, "helm_classic/MMLU": 0.254, "helm_classic/BoolQ": 0.7, "helm_classic/NarrativeQA": 0.61, "helm_classic/NaturalQuestions (open-book)": 0.517, "helm_classic/QuAC": 0.314, "helm_classic/HellaSwag": 0.726, "helm_classic/OpenbookQA": 0.538, "helm_classic/TruthfulQA": 0.215, "helm_classic/MS MARCO (TREC)": 0.373, "helm_classic/CNN/DailyMail": 0.121, "helm_classic/XSUM": 0.099, "helm_classic/IMDB": 0.935, "helm_classic/CivilComments": 0.5, "helm_classic/RAFT": 0.591 } }, { "id": "cohere/Cohere-small-v20220720-410M", "name": "Cohere small v20220720 410M", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.109, "helm_classic/MMLU": 0.264, "helm_classic/BoolQ": 0.457, "helm_classic/NarrativeQA": 0.294, "helm_classic/NaturalQuestions (open-book)": 0.309, "helm_classic/QuAC": 0.219, "helm_classic/HellaSwag": 0.483, "helm_classic/OpenbookQA": 0.348, "helm_classic/TruthfulQA": 0.217, "helm_classic/MS MARCO (TREC)": 0.304, "helm_classic/CNN/DailyMail": 0.063, "helm_classic/XSUM": 0.033, "helm_classic/IMDB": 0.578, "helm_classic/CivilComments": 0.501, "helm_classic/RAFT": 0.492 } }, { "id": "cohere/Cohere-xlarge-v20220609-52.4B", "name": "Cohere xlarge v20220609 52.4B", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.56, "helm_classic/MMLU": 0.353, "helm_classic/BoolQ": 0.718, "helm_classic/NarrativeQA": 0.65, "helm_classic/NaturalQuestions (open-book)": 0.595, "helm_classic/QuAC": 0.361, "helm_classic/HellaSwag": 0.811, "helm_classic/OpenbookQA": 0.55, "helm_classic/TruthfulQA": 0.198, "helm_classic/MS MARCO (TREC)": 0.459, "helm_classic/CNN/DailyMail": 0.144, "helm_classic/XSUM": 0.129, "helm_classic/IMDB": 0.956, "helm_classic/CivilComments": 0.532, "helm_classic/RAFT": 0.633 } }, { "id": "cohere/Cohere-xlarge-v20221108-52.4B", "name": "Cohere xlarge v20221108 52.4B", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.664, "helm_classic/MMLU": 0.382, "helm_classic/BoolQ": 0.762, "helm_classic/NarrativeQA": 0.672, "helm_classic/NaturalQuestions (open-book)": 0.628, "helm_classic/QuAC": 0.374, "helm_classic/HellaSwag": 0.81, "helm_classic/OpenbookQA": 0.588, "helm_classic/TruthfulQA": 0.169, "helm_classic/MS MARCO (TREC)": 0.55, "helm_classic/CNN/DailyMail": 0.153, "helm_classic/XSUM": 0.153, "helm_classic/IMDB": 0.956, "helm_classic/CivilComments": 0.524, "helm_classic/RAFT": 0.624 } }, { "id": "cohere/command", "name": "Command", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.327, "helm_lite/NarrativeQA": 0.749, "helm_lite/NaturalQuestions (closed-book)": 0.391, "helm_lite/OpenbookQA": 0.774, "helm_lite/MMLU": 0.525, "helm_lite/MATH": 0.236, "helm_lite/GSM8K": 0.452, "helm_lite/LegalBench": 0.578, "helm_lite/MedQA": 0.445, "helm_lite/WMT 2014": 0.088 } }, { "id": "cohere/command-a-03-2025", "name": "command-a-03-2025", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.8385, "global-mmlu-lite/Culturally Sensitive": 0.7993, "global-mmlu-lite/Culturally Agnostic": 0.8778, "global-mmlu-lite/Arabic": 0.8425, "global-mmlu-lite/English": 0.855, "global-mmlu-lite/Bengali": 0.8225, "global-mmlu-lite/German": 0.8425, "global-mmlu-lite/French": 0.8375, "global-mmlu-lite/Hindi": 0.8421, "global-mmlu-lite/Indonesian": 0.8546, "global-mmlu-lite/Italian": 0.8375, "global-mmlu-lite/Japanese": 0.845, "global-mmlu-lite/Korean": 0.85, "global-mmlu-lite/Portuguese": 0.84, "global-mmlu-lite/Spanish": 0.8525, "global-mmlu-lite/Swahili": 0.8275, "global-mmlu-lite/Yoruba": 0.815, "global-mmlu-lite/Chinese": 0.835, "global-mmlu-lite/Burmese": 0.8175 } }, { "id": "cohere/command-a-fc", "name": "Command A (FC)", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 35.0, "bfcl/bfcl.overall.overall_accuracy": 46.49, "bfcl/bfcl.overall.total_cost_usd": 91.37, "bfcl/bfcl.overall.latency_mean_s": 2.09, "bfcl/bfcl.overall.latency_std_s": 7.36, "bfcl/bfcl.overall.latency_p95_s": 4.94, "bfcl/bfcl.non_live.ast_accuracy": 87.56, "bfcl/bfcl.non_live.simple_ast_accuracy": 75.75, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 88.0, "bfcl/bfcl.live.live_accuracy": 78.53, "bfcl/bfcl.live.live_simple_ast_accuracy": 85.66, "bfcl/bfcl.live.live_multiple_ast_accuracy": 76.92, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 29.5, "bfcl/bfcl.multi_turn.base_accuracy": 38.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 23.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 32.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 25.0, "bfcl/bfcl.web_search.accuracy": 46.5, "bfcl/bfcl.web_search.base_accuracy": 60.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 33.0, "bfcl/bfcl.memory.accuracy": 16.56, "bfcl/bfcl.memory.kv_accuracy": 4.52, "bfcl/bfcl.memory.vector_accuracy": 5.16, "bfcl/bfcl.memory.recursive_summarization_accuracy": 40.0, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 84.19 } }, { "id": "cohere/command-a-reasoning-fc", "name": "Command A Reasoning (FC)", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 13.0, "bfcl/bfcl.overall.overall_accuracy": 57.06, "bfcl/bfcl.overall.total_cost_usd": 3.04, "bfcl/bfcl.overall.latency_mean_s": 3.44, "bfcl/bfcl.overall.latency_std_s": 4.91, "bfcl/bfcl.overall.latency_p95_s": 8.39, "bfcl/bfcl.non_live.ast_accuracy": 86.27, "bfcl/bfcl.non_live.simple_ast_accuracy": 73.58, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 89.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 88.5, "bfcl/bfcl.live.live_accuracy": 78.61, "bfcl/bfcl.live.live_simple_ast_accuracy": 80.23, "bfcl/bfcl.live.live_multiple_ast_accuracy": 78.35, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 75.0, "bfcl/bfcl.multi_turn.accuracy": 50.12, "bfcl/bfcl.multi_turn.base_accuracy": 61.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 41.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 49.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 48.5, "bfcl/bfcl.web_search.accuracy": 55.5, "bfcl/bfcl.web_search.base_accuracy": 65.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 46.0, "bfcl/bfcl.memory.accuracy": 28.82, "bfcl/bfcl.memory.kv_accuracy": 16.13, "bfcl/bfcl.memory.vector_accuracy": 23.87, "bfcl/bfcl.memory.recursive_summarization_accuracy": 46.45, "bfcl/bfcl.relevance.relevance_detection_accuracy": 68.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 86.75 } }, { "id": "cohere/command-light", "name": "Command Light", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.105, "helm_lite/NarrativeQA": 0.629, "helm_lite/NaturalQuestions (closed-book)": 0.195, "helm_lite/OpenbookQA": 0.398, "helm_lite/MMLU": 0.386, "helm_lite/MATH": 0.098, "helm_lite/GSM8K": 0.149, "helm_lite/LegalBench": 0.397, "helm_lite/MedQA": 0.312, "helm_lite/WMT 2014": 0.023 } }, { "id": "cohere/command-r", "name": "Command R", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.299, "helm_lite/NarrativeQA": 0.742, "helm_lite/NaturalQuestions (closed-book)": 0.352, "helm_lite/OpenbookQA": 0.782, "helm_lite/MMLU": 0.567, "helm_lite/MATH": 0.266, "helm_lite/GSM8K": 0.551, "helm_lite/LegalBench": 0.507, "helm_lite/MedQA": 0.555, "helm_lite/WMT 2014": 0.149, "helm_mmlu/MMLU All Subjects": 0.652, "helm_mmlu/Abstract Algebra": 0.33, "helm_mmlu/Anatomy": 0.615, "helm_mmlu/College Physics": 0.382, "helm_mmlu/Computer Security": 0.78, "helm_mmlu/Econometrics": 0.456, "helm_mmlu/Global Facts": 0.42, "helm_mmlu/Jurisprudence": 0.796, "helm_mmlu/Philosophy": 0.685, "helm_mmlu/Professional Psychology": 0.681, "helm_mmlu/Us Foreign Policy": 0.82, "helm_mmlu/Astronomy": 0.743, "helm_mmlu/Business Ethics": 0.63, "helm_mmlu/Clinical Knowledge": 0.751, "helm_mmlu/Conceptual Physics": 0.528, "helm_mmlu/Electrical Engineering": 0.593, "helm_mmlu/Elementary Mathematics": 0.437, "helm_mmlu/Formal Logic": 0.405, "helm_mmlu/High School World History": 0.84, "helm_mmlu/Human Sexuality": 0.763, "helm_mmlu/International Law": 0.802, "helm_mmlu/Logical Fallacies": 0.798, "helm_mmlu/Machine Learning": 0.446, "helm_mmlu/Management": 0.796, "helm_mmlu/Marketing": 0.872, "helm_mmlu/Medical Genetics": 0.81, "helm_mmlu/Miscellaneous": 0.848, "helm_mmlu/Moral Scenarios": 0.451, "helm_mmlu/Nutrition": 0.703, "helm_mmlu/Prehistory": 0.728, "helm_mmlu/Public Relations": 0.7, "helm_mmlu/Security Studies": 0.714, "helm_mmlu/Sociology": 0.866, "helm_mmlu/Virology": 0.542, "helm_mmlu/World Religions": 0.813, "helm_mmlu/Mean win rate": 0.959 } }, { "id": "cohere/command-r-plus", "name": "Command R Plus", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.441, "helm_lite/NarrativeQA": 0.735, "helm_lite/NaturalQuestions (closed-book)": 0.343, "helm_lite/OpenbookQA": 0.828, "helm_lite/MMLU": 0.59, "helm_lite/MATH": 0.403, "helm_lite/GSM8K": 0.738, "helm_lite/LegalBench": 0.672, "helm_lite/MedQA": 0.567, "helm_lite/WMT 2014": 0.203, "helm_mmlu/MMLU All Subjects": 0.694, "helm_mmlu/Abstract Algebra": 0.21, "helm_mmlu/Anatomy": 0.644, "helm_mmlu/College Physics": 0.52, "helm_mmlu/Computer Security": 0.74, "helm_mmlu/Econometrics": 0.561, "helm_mmlu/Global Facts": 0.5, "helm_mmlu/Jurisprudence": 0.806, "helm_mmlu/Philosophy": 0.695, "helm_mmlu/Professional Psychology": 0.735, "helm_mmlu/Us Foreign Policy": 0.89, "helm_mmlu/Astronomy": 0.783, "helm_mmlu/Business Ethics": 0.77, "helm_mmlu/Clinical Knowledge": 0.743, "helm_mmlu/Conceptual Physics": 0.591, "helm_mmlu/Electrical Engineering": 0.71, "helm_mmlu/Elementary Mathematics": 0.474, "helm_mmlu/Formal Logic": 0.484, "helm_mmlu/High School World History": 0.827, "helm_mmlu/Human Sexuality": 0.786, "helm_mmlu/International Law": 0.835, "helm_mmlu/Logical Fallacies": 0.791, "helm_mmlu/Machine Learning": 0.518, "helm_mmlu/Management": 0.835, "helm_mmlu/Marketing": 0.927, "helm_mmlu/Medical Genetics": 0.77, "helm_mmlu/Miscellaneous": 0.844, "helm_mmlu/Moral Scenarios": 0.585, "helm_mmlu/Nutrition": 0.742, "helm_mmlu/Prehistory": 0.821, "helm_mmlu/Public Relations": 0.709, "helm_mmlu/Security Studies": 0.751, "helm_mmlu/Sociology": 0.876, "helm_mmlu/Virology": 0.56, "helm_mmlu/World Religions": 0.842, "helm_mmlu/Mean win rate": 0.825 } }, { "id": "cohere/command-r7b-fc", "name": "Command R7B (FC)", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 61.0, "bfcl/bfcl.overall.overall_accuracy": 32.07, "bfcl/bfcl.overall.total_cost_usd": 1.5, "bfcl/bfcl.overall.latency_mean_s": 1.38, "bfcl/bfcl.overall.latency_std_s": 2.87, "bfcl/bfcl.overall.latency_p95_s": 2.69, "bfcl/bfcl.non_live.ast_accuracy": 80.96, "bfcl/bfcl.non_live.simple_ast_accuracy": 67.33, "bfcl/bfcl.non_live.multiple_ast_accuracy": 89.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 85.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 81.5, "bfcl/bfcl.live.live_accuracy": 69.06, "bfcl/bfcl.live.live_simple_ast_accuracy": 62.79, "bfcl/bfcl.live.live_multiple_ast_accuracy": 70.94, "bfcl/bfcl.live.live_parallel_ast_accuracy": 43.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 8.25, "bfcl/bfcl.multi_turn.base_accuracy": 12.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 10.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 10.0, "bfcl/bfcl.web_search.accuracy": 27.0, "bfcl/bfcl.web_search.base_accuracy": 43.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 11.0, "bfcl/bfcl.memory.accuracy": 5.16, "bfcl/bfcl.memory.kv_accuracy": 2.58, "bfcl/bfcl.memory.vector_accuracy": 9.68, "bfcl/bfcl.memory.recursive_summarization_accuracy": 3.23, "bfcl/bfcl.relevance.relevance_detection_accuracy": 68.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 81.65 } }, { "id": "cohere/command-xlarge-beta", "name": "Cohere Command beta 52.4B", "developer": "cohere", "evaluator_relationship": null, "benchmark_scores": { "helm_instruct/Mean win rate": 0.089, "helm_instruct/Anthropic RLHF dataset": 4.214, "helm_instruct/Best ChatGPT Prompts": 4.988, "helm_instruct/Koala test dataset": 4.969, "helm_instruct/Open Assistant": 4.967, "helm_instruct/Self Instruct": 4.971, "helm_instruct/Vicuna": 4.995 } }, { "id": "CohereForAI/aya-23-35B", "name": "aya-23-35B", "developer": "CohereForAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6462, "hfopenllm_v2/BBH": 0.54, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.431, "hfopenllm_v2/MMLU-PRO": 0.3356 } }, { "id": "CohereForAI/aya-23-8B", "name": "aya-23-8B", "developer": "CohereForAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4699, "hfopenllm_v2/BBH": 0.4296, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.3941, "hfopenllm_v2/MMLU-PRO": 0.2278 } }, { "id": "CohereForAI/aya-expanse-32b", "name": "aya-expanse-32b", "developer": "CohereForAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7302, "hfopenllm_v2/BBH": 0.5649, "hfopenllm_v2/MATH Level 5": 0.1533, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.3873, "hfopenllm_v2/MMLU-PRO": 0.413 } }, { "id": "CohereForAI/aya-expanse-8b", "name": "aya-expanse-8b", "developer": "CohereForAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6359, "hfopenllm_v2/BBH": 0.4977, "hfopenllm_v2/MATH Level 5": 0.0861, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.3729, "hfopenllm_v2/MMLU-PRO": 0.3004 } }, { "id": "CohereForAI/c4ai-command-r-plus", "name": "CohereForAI/c4ai-command-r-plus", "developer": "CohereForAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7664, "hfopenllm_v2/BBH": 0.5815, "hfopenllm_v2/MATH Level 5": 0.0801, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4807, "hfopenllm_v2/MMLU-PRO": 0.3992, "reward-bench/Score": 0.7057, "reward-bench/Chat": 0.9511, "reward-bench/Chat Hard": 0.5757, "reward-bench/Safety": 0.5986, "reward-bench/Reasoning": 0.704, "reward-bench/Prior Sets (0.5 weight)": 0.6924 } }, { "id": "CohereForAI/c4ai-command-r-plus-08-2024", "name": "c4ai-command-r-plus-08-2024", "developer": "CohereForAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.754, "hfopenllm_v2/BBH": 0.5996, "hfopenllm_v2/MATH Level 5": 0.1239, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.4829, "hfopenllm_v2/MMLU-PRO": 0.4421 } }, { "id": "CohereForAI/c4ai-command-r-v01", "name": "c4ai-command-r-v01", "developer": "CohereForAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6748, "hfopenllm_v2/BBH": 0.5406, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4517, "hfopenllm_v2/MMLU-PRO": 0.3369 } }, { "id": "CohereForAI/c4ai-command-r7b-12-2024", "name": "c4ai-command-r7b-12-2024", "developer": "CohereForAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7713, "hfopenllm_v2/BBH": 0.5503, "hfopenllm_v2/MATH Level 5": 0.2991, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4125, "hfopenllm_v2/MMLU-PRO": 0.3572 } }, { "id": "collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2", "name": "Collaiborator-MEDLLM-Llama-3-8B-v2", "developer": "collaiborateorg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3809, "hfopenllm_v2/BBH": 0.4648, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.3331, "hfopenllm_v2/MUSR": 0.3434, "hfopenllm_v2/MMLU-PRO": 0.3481 } }, { "id": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0", "name": "LION-Gemma-2b-dpo-v1.0", "developer": "Columbia-NLP", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3278, "hfopenllm_v2/BBH": 0.392, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.412, "hfopenllm_v2/MMLU-PRO": 0.1666 } }, { "id": "Columbia-NLP/LION-Gemma-2b-odpo-v1.0", "name": "LION-Gemma-2b-odpo-v1.0", "developer": "Columbia-NLP", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3066, "hfopenllm_v2/BBH": 0.3896, "hfopenllm_v2/MATH Level 5": 0.0695, "hfopenllm_v2/GPQA": 0.2424, "hfopenllm_v2/MUSR": 0.4279, "hfopenllm_v2/MMLU-PRO": 0.1692 } }, { "id": "Columbia-NLP/LION-Gemma-2b-sft-v1.0", "name": "LION-Gemma-2b-sft-v1.0", "developer": "Columbia-NLP", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3692, "hfopenllm_v2/BBH": 0.3879, "hfopenllm_v2/MATH Level 5": 0.068, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.4027, "hfopenllm_v2/MMLU-PRO": 0.1782 } }, { "id": "Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0", "name": "LION-LLaMA-3-8b-dpo-v1.0", "developer": "Columbia-NLP", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4957, "hfopenllm_v2/BBH": 0.5028, "hfopenllm_v2/MATH Level 5": 0.1171, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4097, "hfopenllm_v2/MMLU-PRO": 0.3219 } }, { "id": "Columbia-NLP/LION-LLaMA-3-8b-odpo-v1.0", "name": "LION-LLaMA-3-8b-odpo-v1.0", "developer": "Columbia-NLP", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3968, "hfopenllm_v2/BBH": 0.5024, "hfopenllm_v2/MATH Level 5": 0.1065, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4057, "hfopenllm_v2/MMLU-PRO": 0.3152 } }, { "id": "Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0", "name": "LION-LLaMA-3-8b-sft-v1.0", "developer": "Columbia-NLP", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3817, "hfopenllm_v2/BBH": 0.5088, "hfopenllm_v2/MATH Level 5": 0.114, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4503, "hfopenllm_v2/MMLU-PRO": 0.3237 } }, { "id": "CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES", "name": "huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES", "developer": "CombinHorizon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8206, "hfopenllm_v2/BBH": 0.6929, "hfopenllm_v2/MATH Level 5": 0.5944, "hfopenllm_v2/GPQA": 0.3389, "hfopenllm_v2/MUSR": 0.4207, "hfopenllm_v2/MMLU-PRO": 0.5721 } }, { "id": "CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES", "name": "huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES", "developer": "CombinHorizon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8176, "hfopenllm_v2/BBH": 0.6336, "hfopenllm_v2/MATH Level 5": 0.5476, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.426, "hfopenllm_v2/MMLU-PRO": 0.491 } }, { "id": "CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES", "name": "Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES", "developer": "CombinHorizon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.824, "hfopenllm_v2/BBH": 0.637, "hfopenllm_v2/MATH Level 5": 0.5317, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.426, "hfopenllm_v2/MMLU-PRO": 0.4979 } }, { "id": "CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES", "name": "Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES", "developer": "CombinHorizon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7564, "hfopenllm_v2/BBH": 0.5402, "hfopenllm_v2/MATH Level 5": 0.4932, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4033, "hfopenllm_v2/MMLU-PRO": 0.4342 } }, { "id": "CombinHorizon/YiSM-blossom5.1-34B-SLERP", "name": "YiSM-blossom5.1-34B-SLERP", "developer": "CombinHorizon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5033, "hfopenllm_v2/BBH": 0.6208, "hfopenllm_v2/MATH Level 5": 0.2153, "hfopenllm_v2/GPQA": 0.3557, "hfopenllm_v2/MUSR": 0.4413, "hfopenllm_v2/MMLU-PRO": 0.4741 } }, { "id": "CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES", "name": "zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES", "developer": "CombinHorizon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8328, "hfopenllm_v2/BBH": 0.6955, "hfopenllm_v2/MATH Level 5": 0.5853, "hfopenllm_v2/GPQA": 0.3674, "hfopenllm_v2/MUSR": 0.4314, "hfopenllm_v2/MMLU-PRO": 0.5685 } }, { "id": "ContactDoctor/Bio-Medical-3B-CoT-012025", "name": "Bio-Medical-3B-CoT-012025", "developer": "ContactDoctor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3604, "hfopenllm_v2/BBH": 0.4383, "hfopenllm_v2/MATH Level 5": 0.2213, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.3368, "hfopenllm_v2/MMLU-PRO": 0.2934 } }, { "id": "ContactDoctor/Bio-Medical-Llama-3-8B", "name": "Bio-Medical-Llama-3-8B", "developer": "ContactDoctor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4422, "hfopenllm_v2/BBH": 0.4863, "hfopenllm_v2/MATH Level 5": 0.0672, "hfopenllm_v2/GPQA": 0.3339, "hfopenllm_v2/MUSR": 0.3514, "hfopenllm_v2/MMLU-PRO": 0.3648 } }, { "id": "ContextualAI/archangel_sft-dpo_llama13b", "name": "ContextualAI/archangel_sft-dpo_llama13b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.54, "reward-bench/Chat": 0.7123, "reward-bench/Chat Hard": 0.4298, "reward-bench/Safety": 0.5649, "reward-bench/Reasoning": 0.4401, "reward-bench/Prior Sets (0.5 weight)": 0.5656 } }, { "id": "ContextualAI/archangel_sft-dpo_llama30b", "name": "ContextualAI/archangel_sft-dpo_llama30b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5618, "reward-bench/Chat": 0.6927, "reward-bench/Chat Hard": 0.4474, "reward-bench/Safety": 0.6284, "reward-bench/Reasoning": 0.4745, "reward-bench/Prior Sets (0.5 weight)": 0.5705 } }, { "id": "ContextualAI/archangel_sft-dpo_llama7b", "name": "ContextualAI/archangel_sft-dpo_llama7b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5304, "reward-bench/Chat": 0.5782, "reward-bench/Chat Hard": 0.4452, "reward-bench/Safety": 0.5203, "reward-bench/Reasoning": 0.5658, "reward-bench/Prior Sets (0.5 weight)": 0.5544 } }, { "id": "ContextualAI/archangel_sft-dpo_pythia1-4b", "name": "ContextualAI/archangel_sft-dpo_pythia1-4b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5233, "reward-bench/Chat": 0.6397, "reward-bench/Chat Hard": 0.3728, "reward-bench/Safety": 0.5041, "reward-bench/Reasoning": 0.5672, "reward-bench/Prior Sets (0.5 weight)": 0.5427 } }, { "id": "ContextualAI/archangel_sft-dpo_pythia12-0b", "name": "ContextualAI/archangel_sft-dpo_pythia12-0b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5009, "reward-bench/Chat": 0.6676, "reward-bench/Chat Hard": 0.364, "reward-bench/Safety": 0.5432, "reward-bench/Reasoning": 0.4139, "reward-bench/Prior Sets (0.5 weight)": 0.5303 } }, { "id": "ContextualAI/archangel_sft-dpo_pythia2-8b", "name": "ContextualAI/archangel_sft-dpo_pythia2-8b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5286, "reward-bench/Chat": 0.8073, "reward-bench/Chat Hard": 0.3355, "reward-bench/Safety": 0.4473, "reward-bench/Reasoning": 0.5135, "reward-bench/Prior Sets (0.5 weight)": 0.5501 } }, { "id": "ContextualAI/archangel_sft-dpo_pythia6-9b", "name": "ContextualAI/archangel_sft-dpo_pythia6-9b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5263, "reward-bench/Chat": 0.7486, "reward-bench/Chat Hard": 0.3421, "reward-bench/Safety": 0.5176, "reward-bench/Reasoning": 0.4847, "reward-bench/Prior Sets (0.5 weight)": 0.551 } }, { "id": "ContextualAI/archangel_sft-kto_llama13b", "name": "ContextualAI/archangel_sft-kto_llama13b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5952, "reward-bench/Chat": 0.8408, "reward-bench/Chat Hard": 0.3772, "reward-bench/Safety": 0.4649, "reward-bench/Reasoning": 0.7077, "reward-bench/Prior Sets (0.5 weight)": 0.576 } }, { "id": "ContextualAI/archangel_sft-kto_llama30b", "name": "ContextualAI/archangel_sft-kto_llama30b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5901, "reward-bench/Chat": 0.8436, "reward-bench/Chat Hard": 0.4057, "reward-bench/Safety": 0.6054, "reward-bench/Reasoning": 0.5075, "reward-bench/Prior Sets (0.5 weight)": 0.5862 } }, { "id": "ContextualAI/archangel_sft-kto_llama7b", "name": "ContextualAI/archangel_sft-kto_llama7b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5388, "reward-bench/Chat": 0.5587, "reward-bench/Chat Hard": 0.4364, "reward-bench/Safety": 0.4568, "reward-bench/Reasoning": 0.6941, "reward-bench/Prior Sets (0.5 weight)": 0.5575 } }, { "id": "ContextualAI/archangel_sft-kto_pythia1-4b", "name": "ContextualAI/archangel_sft-kto_pythia1-4b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5581, "reward-bench/Chat": 0.6844, "reward-bench/Chat Hard": 0.3794, "reward-bench/Safety": 0.5257, "reward-bench/Reasoning": 0.6447, "reward-bench/Prior Sets (0.5 weight)": 0.5546 } }, { "id": "ContextualAI/archangel_sft-kto_pythia12-0b", "name": "ContextualAI/archangel_sft-kto_pythia12-0b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5053, "reward-bench/Chat": 0.7486, "reward-bench/Chat Hard": 0.3618, "reward-bench/Safety": 0.4757, "reward-bench/Reasoning": 0.4127, "reward-bench/Prior Sets (0.5 weight)": 0.55 } }, { "id": "ContextualAI/archangel_sft-kto_pythia2-8b", "name": "ContextualAI/archangel_sft-kto_pythia2-8b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5497, "reward-bench/Chat": 0.757, "reward-bench/Chat Hard": 0.3421, "reward-bench/Safety": 0.4743, "reward-bench/Reasoning": 0.6216, "reward-bench/Prior Sets (0.5 weight)": 0.557 } }, { "id": "ContextualAI/archangel_sft-kto_pythia6-9b", "name": "ContextualAI/archangel_sft-kto_pythia6-9b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5561, "reward-bench/Chat": 0.7765, "reward-bench/Chat Hard": 0.3618, "reward-bench/Safety": 0.5365, "reward-bench/Reasoning": 0.5415, "reward-bench/Prior Sets (0.5 weight)": 0.5723 } }, { "id": "ContextualAI/LMUnit-llama3.1-70b", "name": "ContextualAI/LMUnit-llama3.1-70b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8054, "reward-bench/Factuality": 0.8463, "reward-bench/Precise IF": 0.4875, "reward-bench/Math": 0.7158, "reward-bench/Safety": 0.9067, "reward-bench/Focus": 0.9697, "reward-bench/Ties": 0.9063 } }, { "id": "ContextualAI/LMUnit-qwen2.5-72b", "name": "ContextualAI/LMUnit-qwen2.5-72b", "developer": "ContextualAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8208, "reward-bench/Factuality": 0.8716, "reward-bench/Precise IF": 0.5437, "reward-bench/Math": 0.7268, "reward-bench/Safety": 0.9133, "reward-bench/Focus": 0.9677, "reward-bench/Ties": 0.9014 } }, { "id": "CoolSpring/Qwen2-0.5B-Abyme", "name": "Qwen2-0.5B-Abyme", "developer": "CoolSpring", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1915, "hfopenllm_v2/BBH": 0.2862, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3542, "hfopenllm_v2/MMLU-PRO": 0.1333 } }, { "id": "CoolSpring/Qwen2-0.5B-Abyme-merge2", "name": "Qwen2-0.5B-Abyme-merge2", "developer": "CoolSpring", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2022, "hfopenllm_v2/BBH": 0.2994, "hfopenllm_v2/MATH Level 5": 0.0332, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3687, "hfopenllm_v2/MMLU-PRO": 0.1489 } }, { "id": "CoolSpring/Qwen2-0.5B-Abyme-merge3", "name": "Qwen2-0.5B-Abyme-merge3", "developer": "CoolSpring", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2386, "hfopenllm_v2/BBH": 0.3003, "hfopenllm_v2/MATH Level 5": 0.0317, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3501, "hfopenllm_v2/MMLU-PRO": 0.15 } }, { "id": "Corianas/llama-3-reactor", "name": "llama-3-reactor", "developer": "Corianas", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.23, "hfopenllm_v2/BBH": 0.4457, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.3977, "hfopenllm_v2/MMLU-PRO": 0.2801 } }, { "id": "Corianas/Neural-Mistral-7B", "name": "Neural-Mistral-7B", "developer": "Corianas", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5489, "hfopenllm_v2/BBH": 0.4428, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3873, "hfopenllm_v2/MMLU-PRO": 0.2738 } }, { "id": "Corianas/Quokka_2.7b", "name": "Quokka_2.7b", "developer": "Corianas", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1749, "hfopenllm_v2/BBH": 0.3055, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3908, "hfopenllm_v2/MMLU-PRO": 0.1145 } }, { "id": "CortexLM/btlm-7b-base-v0.2", "name": "btlm-7b-base-v0.2", "developer": "CortexLM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1483, "hfopenllm_v2/BBH": 0.4006, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3846, "hfopenllm_v2/MMLU-PRO": 0.235 } }, { "id": "cpayne1303/cp2024", "name": "cp2024", "developer": "cpayne1303", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1658, "hfopenllm_v2/BBH": 0.2985, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3383, "hfopenllm_v2/MMLU-PRO": 0.1101 } }, { "id": "cpayne1303/cp2024-instruct", "name": "cp2024-instruct", "developer": "cpayne1303", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1706, "hfopenllm_v2/BBH": 0.2947, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3686, "hfopenllm_v2/MMLU-PRO": 0.1167 } }, { "id": "cpayne1303/llama-43m-beta", "name": "llama-43m-beta", "developer": "cpayne1303", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1916, "hfopenllm_v2/BBH": 0.2977, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3872, "hfopenllm_v2/MMLU-PRO": 0.1132 } }, { "id": "cpayne1303/smallcp2024", "name": "smallcp2024", "developer": "cpayne1303", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1582, "hfopenllm_v2/BBH": 0.3027, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2307, "hfopenllm_v2/MUSR": 0.3425, "hfopenllm_v2/MMLU-PRO": 0.1114 } }, { "id": "Cran-May/merge_model_20250308_2", "name": "merge_model_20250308_2", "developer": "Cran-May", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5932, "hfopenllm_v2/BBH": 0.6585, "hfopenllm_v2/MATH Level 5": 0.4381, "hfopenllm_v2/GPQA": 0.3909, "hfopenllm_v2/MUSR": 0.4794, "hfopenllm_v2/MMLU-PRO": 0.542 } }, { "id": "Cran-May/merge_model_20250308_3", "name": "merge_model_20250308_3", "developer": "Cran-May", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6018, "hfopenllm_v2/BBH": 0.6271, "hfopenllm_v2/MATH Level 5": 0.2545, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.432, "hfopenllm_v2/MMLU-PRO": 0.4962 } }, { "id": "Cran-May/merge_model_20250308_4", "name": "merge_model_20250308_4", "developer": "Cran-May", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.454, "hfopenllm_v2/BBH": 0.6664, "hfopenllm_v2/MATH Level 5": 0.4199, "hfopenllm_v2/GPQA": 0.3977, "hfopenllm_v2/MUSR": 0.4688, "hfopenllm_v2/MMLU-PRO": 0.5367 } }, { "id": "Cran-May/SCE-2-24B", "name": "SCE-2-24B", "developer": "Cran-May", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5866, "hfopenllm_v2/BBH": 0.6265, "hfopenllm_v2/MATH Level 5": 0.1896, "hfopenllm_v2/GPQA": 0.3372, "hfopenllm_v2/MUSR": 0.4528, "hfopenllm_v2/MMLU-PRO": 0.4612 } }, { "id": "Cran-May/SCE-3-24B", "name": "SCE-3-24B", "developer": "Cran-May", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5465, "hfopenllm_v2/BBH": 0.5973, "hfopenllm_v2/MATH Level 5": 0.1881, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.4435, "hfopenllm_v2/MMLU-PRO": 0.4647 } }, { "id": "Cran-May/T.E-8.1", "name": "T.E-8.1", "developer": "Cran-May", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7077, "hfopenllm_v2/BBH": 0.5582, "hfopenllm_v2/MATH Level 5": 0.4456, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4505, "hfopenllm_v2/MMLU-PRO": 0.4432 } }, { "id": "Cran-May/tempmotacilla-cinerea-0308", "name": "tempmotacilla-cinerea-0308", "developer": "Cran-May", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8085, "hfopenllm_v2/BBH": 0.6551, "hfopenllm_v2/MATH Level 5": 0.5551, "hfopenllm_v2/GPQA": 0.3624, "hfopenllm_v2/MUSR": 0.4208, "hfopenllm_v2/MMLU-PRO": 0.525 } }, { "id": "CreitinGameplays/Llama-3.1-8B-R1-v0.1", "name": "Llama-3.1-8B-R1-v0.1", "developer": "CreitinGameplays", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3235, "hfopenllm_v2/BBH": 0.3057, "hfopenllm_v2/MATH Level 5": 0.1813, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3622, "hfopenllm_v2/MMLU-PRO": 0.1252 } }, { "id": "crestf411/MN-Slush", "name": "MN-Slush", "developer": "crestf411", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4077, "hfopenllm_v2/BBH": 0.534, "hfopenllm_v2/MATH Level 5": 0.1269, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.3933, "hfopenllm_v2/MMLU-PRO": 0.3508 } }, { "id": "cstr/llama3.1-8b-spaetzle-v90", "name": "llama3.1-8b-spaetzle-v90", "developer": "cstr", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7356, "hfopenllm_v2/BBH": 0.5303, "hfopenllm_v2/MATH Level 5": 0.1495, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4134, "hfopenllm_v2/MMLU-PRO": 0.3731 } }, { "id": "CultriX/Qwen2.5-14B-Broca", "name": "Qwen2.5-14B-Broca", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5604, "hfopenllm_v2/BBH": 0.6527, "hfopenllm_v2/MATH Level 5": 0.358, "hfopenllm_v2/GPQA": 0.3867, "hfopenllm_v2/MUSR": 0.4767, "hfopenllm_v2/MMLU-PRO": 0.5364 } }, { "id": "CultriX/Qwen2.5-14B-Brocav3", "name": "Qwen2.5-14B-Brocav3", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6952, "hfopenllm_v2/BBH": 0.6452, "hfopenllm_v2/MATH Level 5": 0.3875, "hfopenllm_v2/GPQA": 0.3591, "hfopenllm_v2/MUSR": 0.4756, "hfopenllm_v2/MMLU-PRO": 0.5317 } }, { "id": "CultriX/Qwen2.5-14B-Brocav6", "name": "Qwen2.5-14B-Brocav6", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6995, "hfopenllm_v2/BBH": 0.6389, "hfopenllm_v2/MATH Level 5": 0.3875, "hfopenllm_v2/GPQA": 0.3674, "hfopenllm_v2/MUSR": 0.4742, "hfopenllm_v2/MMLU-PRO": 0.5319 } }, { "id": "CultriX/Qwen2.5-14B-Brocav7", "name": "Qwen2.5-14B-Brocav7", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6724, "hfopenllm_v2/BBH": 0.6444, "hfopenllm_v2/MATH Level 5": 0.3844, "hfopenllm_v2/GPQA": 0.3674, "hfopenllm_v2/MUSR": 0.4796, "hfopenllm_v2/MMLU-PRO": 0.5258 } }, { "id": "CultriX/Qwen2.5-14B-BrocaV9", "name": "Qwen2.5-14B-BrocaV9", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6763, "hfopenllm_v2/BBH": 0.6391, "hfopenllm_v2/MATH Level 5": 0.3814, "hfopenllm_v2/GPQA": 0.3641, "hfopenllm_v2/MUSR": 0.469, "hfopenllm_v2/MMLU-PRO": 0.5331 } }, { "id": "CultriX/Qwen2.5-14B-Emerged", "name": "Qwen2.5-14B-Emerged", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7, "hfopenllm_v2/BBH": 0.626, "hfopenllm_v2/MATH Level 5": 0.3248, "hfopenllm_v2/GPQA": 0.3574, "hfopenllm_v2/MUSR": 0.4691, "hfopenllm_v2/MMLU-PRO": 0.5186 } }, { "id": "CultriX/Qwen2.5-14B-Emergedv3", "name": "Qwen2.5-14B-Emergedv3", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6388, "hfopenllm_v2/BBH": 0.6191, "hfopenllm_v2/MATH Level 5": 0.4358, "hfopenllm_v2/GPQA": 0.3607, "hfopenllm_v2/MUSR": 0.4728, "hfopenllm_v2/MMLU-PRO": 0.5174 } }, { "id": "CultriX/Qwen2.5-14B-FinalMerge", "name": "Qwen2.5-14B-FinalMerge", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4891, "hfopenllm_v2/BBH": 0.5715, "hfopenllm_v2/MATH Level 5": 0.3814, "hfopenllm_v2/GPQA": 0.3549, "hfopenllm_v2/MUSR": 0.4379, "hfopenllm_v2/MMLU-PRO": 0.4574 } }, { "id": "CultriX/Qwen2.5-14B-Hyper", "name": "Qwen2.5-14B-Hyper", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5391, "hfopenllm_v2/BBH": 0.6507, "hfopenllm_v2/MATH Level 5": 0.3437, "hfopenllm_v2/GPQA": 0.3918, "hfopenllm_v2/MUSR": 0.4898, "hfopenllm_v2/MMLU-PRO": 0.5374 } }, { "id": "CultriX/Qwen2.5-14B-Hyperionv3", "name": "Qwen2.5-14B-Hyperionv3", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6836, "hfopenllm_v2/BBH": 0.6522, "hfopenllm_v2/MATH Level 5": 0.3701, "hfopenllm_v2/GPQA": 0.3708, "hfopenllm_v2/MUSR": 0.473, "hfopenllm_v2/MMLU-PRO": 0.534 } }, { "id": "CultriX/Qwen2.5-14B-Hyperionv4", "name": "Qwen2.5-14B-Hyperionv4", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5416, "hfopenllm_v2/BBH": 0.6472, "hfopenllm_v2/MATH Level 5": 0.3474, "hfopenllm_v2/GPQA": 0.3977, "hfopenllm_v2/MUSR": 0.4832, "hfopenllm_v2/MMLU-PRO": 0.5364 } }, { "id": "CultriX/Qwen2.5-14B-Hyperionv5", "name": "Qwen2.5-14B-Hyperionv5", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6729, "hfopenllm_v2/BBH": 0.6443, "hfopenllm_v2/MATH Level 5": 0.3822, "hfopenllm_v2/GPQA": 0.3716, "hfopenllm_v2/MUSR": 0.4795, "hfopenllm_v2/MMLU-PRO": 0.5302 } }, { "id": "CultriX/Qwen2.5-14B-HyperMarck-dl", "name": "Qwen2.5-14B-HyperMarck-dl", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.665, "hfopenllm_v2/BBH": 0.6096, "hfopenllm_v2/MATH Level 5": 0.5287, "hfopenllm_v2/GPQA": 0.3674, "hfopenllm_v2/MUSR": 0.4416, "hfopenllm_v2/MMLU-PRO": 0.5091 } }, { "id": "CultriX/Qwen2.5-14B-MegaMerge-pt2", "name": "Qwen2.5-14B-MegaMerge-pt2", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5683, "hfopenllm_v2/BBH": 0.6578, "hfopenllm_v2/MATH Level 5": 0.3995, "hfopenllm_v2/GPQA": 0.3792, "hfopenllm_v2/MUSR": 0.4729, "hfopenllm_v2/MMLU-PRO": 0.5421 } }, { "id": "CultriX/Qwen2.5-14B-MergeStock", "name": "Qwen2.5-14B-MergeStock", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5685, "hfopenllm_v2/BBH": 0.6579, "hfopenllm_v2/MATH Level 5": 0.4147, "hfopenllm_v2/GPQA": 0.3733, "hfopenllm_v2/MUSR": 0.4676, "hfopenllm_v2/MMLU-PRO": 0.5396 } }, { "id": "CultriX/Qwen2.5-14B-partialmergept1", "name": "Qwen2.5-14B-partialmergept1", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6337, "hfopenllm_v2/BBH": 0.6151, "hfopenllm_v2/MATH Level 5": 0.4539, "hfopenllm_v2/GPQA": 0.3616, "hfopenllm_v2/MUSR": 0.4757, "hfopenllm_v2/MMLU-PRO": 0.5208 } }, { "id": "CultriX/Qwen2.5-14B-ReasoningMerge", "name": "Qwen2.5-14B-ReasoningMerge", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4605, "hfopenllm_v2/BBH": 0.6578, "hfopenllm_v2/MATH Level 5": 0.5204, "hfopenllm_v2/GPQA": 0.4077, "hfopenllm_v2/MUSR": 0.5166, "hfopenllm_v2/MMLU-PRO": 0.5345 } }, { "id": "CultriX/Qwen2.5-14B-Ultimav2", "name": "Qwen2.5-14B-Ultimav2", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.55, "hfopenllm_v2/BBH": 0.6555, "hfopenllm_v2/MATH Level 5": 0.3844, "hfopenllm_v2/GPQA": 0.3851, "hfopenllm_v2/MUSR": 0.4966, "hfopenllm_v2/MMLU-PRO": 0.5417 } }, { "id": "CultriX/Qwen2.5-14B-Unity", "name": "Qwen2.5-14B-Unity", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6739, "hfopenllm_v2/BBH": 0.602, "hfopenllm_v2/MATH Level 5": 0.4313, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4679, "hfopenllm_v2/MMLU-PRO": 0.5076 } }, { "id": "CultriX/Qwen2.5-14B-Wernicke", "name": "Qwen2.5-14B-Wernicke", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5235, "hfopenllm_v2/BBH": 0.6568, "hfopenllm_v2/MATH Level 5": 0.3814, "hfopenllm_v2/GPQA": 0.3935, "hfopenllm_v2/MUSR": 0.4689, "hfopenllm_v2/MMLU-PRO": 0.5424 } }, { "id": "CultriX/Qwen2.5-14B-Wernicke-SFT", "name": "Qwen2.5-14B-Wernicke-SFT", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4937, "hfopenllm_v2/BBH": 0.6461, "hfopenllm_v2/MATH Level 5": 0.3595, "hfopenllm_v2/GPQA": 0.354, "hfopenllm_v2/MUSR": 0.39, "hfopenllm_v2/MMLU-PRO": 0.507 } }, { "id": "CultriX/Qwen2.5-14B-Wernicke-SLERP", "name": "Qwen2.5-14B-Wernicke-SLERP", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5589, "hfopenllm_v2/BBH": 0.6441, "hfopenllm_v2/MATH Level 5": 0.4486, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.414, "hfopenllm_v2/MMLU-PRO": 0.5094 } }, { "id": "CultriX/Qwen2.5-14B-Wernickev3", "name": "Qwen2.5-14B-Wernickev3", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7048, "hfopenllm_v2/BBH": 0.6184, "hfopenllm_v2/MATH Level 5": 0.3542, "hfopenllm_v2/GPQA": 0.3624, "hfopenllm_v2/MUSR": 0.4717, "hfopenllm_v2/MMLU-PRO": 0.5151 } }, { "id": "CultriX/Qwenfinity-2.5-14B", "name": "Qwenfinity-2.5-14B", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4814, "hfopenllm_v2/BBH": 0.5655, "hfopenllm_v2/MATH Level 5": 0.4101, "hfopenllm_v2/GPQA": 0.349, "hfopenllm_v2/MUSR": 0.4506, "hfopenllm_v2/MMLU-PRO": 0.4498 } }, { "id": "CultriX/Qwestion-14B", "name": "Qwestion-14B", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6318, "hfopenllm_v2/BBH": 0.645, "hfopenllm_v2/MATH Level 5": 0.3724, "hfopenllm_v2/GPQA": 0.3683, "hfopenllm_v2/MUSR": 0.4636, "hfopenllm_v2/MMLU-PRO": 0.5422 } }, { "id": "CultriX/SeQwence-14B", "name": "SeQwence-14B", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5352, "hfopenllm_v2/BBH": 0.6506, "hfopenllm_v2/MATH Level 5": 0.3535, "hfopenllm_v2/GPQA": 0.3607, "hfopenllm_v2/MUSR": 0.4666, "hfopenllm_v2/MMLU-PRO": 0.5419 } }, { "id": "CultriX/SeQwence-14B-EvolMerge", "name": "SeQwence-14B-EvolMerge", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5382, "hfopenllm_v2/BBH": 0.6572, "hfopenllm_v2/MATH Level 5": 0.3671, "hfopenllm_v2/GPQA": 0.3809, "hfopenllm_v2/MUSR": 0.4821, "hfopenllm_v2/MMLU-PRO": 0.5419 } }, { "id": "CultriX/SeQwence-14B-EvolMergev1", "name": "SeQwence-14B-EvolMergev1", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5555, "hfopenllm_v2/BBH": 0.6546, "hfopenllm_v2/MATH Level 5": 0.4215, "hfopenllm_v2/GPQA": 0.3767, "hfopenllm_v2/MUSR": 0.4623, "hfopenllm_v2/MMLU-PRO": 0.5393 } }, { "id": "CultriX/SeQwence-14B-v5", "name": "SeQwence-14B-v5", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.592, "hfopenllm_v2/BBH": 0.6517, "hfopenllm_v2/MATH Level 5": 0.3308, "hfopenllm_v2/GPQA": 0.37, "hfopenllm_v2/MUSR": 0.4714, "hfopenllm_v2/MMLU-PRO": 0.5415 } }, { "id": "CultriX/SeQwence-14Bv1", "name": "SeQwence-14Bv1", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6678, "hfopenllm_v2/BBH": 0.6345, "hfopenllm_v2/MATH Level 5": 0.361, "hfopenllm_v2/GPQA": 0.3616, "hfopenllm_v2/MUSR": 0.4704, "hfopenllm_v2/MMLU-PRO": 0.532 } }, { "id": "CultriX/SeQwence-14Bv2", "name": "SeQwence-14Bv2", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5786, "hfopenllm_v2/BBH": 0.6305, "hfopenllm_v2/MATH Level 5": 0.4758, "hfopenllm_v2/GPQA": 0.3607, "hfopenllm_v2/MUSR": 0.4601, "hfopenllm_v2/MMLU-PRO": 0.5334 } }, { "id": "CultriX/SeQwence-14Bv3", "name": "SeQwence-14Bv3", "developer": "CultriX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5719, "hfopenllm_v2/BBH": 0.6302, "hfopenllm_v2/MATH Level 5": 0.4766, "hfopenllm_v2/GPQA": 0.3649, "hfopenllm_v2/MUSR": 0.4624, "hfopenllm_v2/MMLU-PRO": 0.5335 } }, { "id": "cyberagent/calm3-22b-chat", "name": "calm3-22b-chat", "developer": "cyberagent", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5091, "hfopenllm_v2/BBH": 0.4992, "hfopenllm_v2/MATH Level 5": 0.0695, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.4553, "hfopenllm_v2/MMLU-PRO": 0.295 } }, { "id": "CYFRAGOVPL/Llama-PLLuM-8B-base", "name": "Llama-PLLuM-8B-base", "developer": "CYFRAGOVPL", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2899, "hfopenllm_v2/BBH": 0.432, "hfopenllm_v2/MATH Level 5": 0.0363, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.397, "hfopenllm_v2/MMLU-PRO": 0.2757 } }, { "id": "CYFRAGOVPL/Llama-PLLuM-8B-chat", "name": "Llama-PLLuM-8B-chat", "developer": "CYFRAGOVPL", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3515, "hfopenllm_v2/BBH": 0.4077, "hfopenllm_v2/MATH Level 5": 0.034, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.4199, "hfopenllm_v2/MMLU-PRO": 0.2719 } }, { "id": "CYFRAGOVPL/PLLuM-12B-base", "name": "PLLuM-12B-base", "developer": "CYFRAGOVPL", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2821, "hfopenllm_v2/BBH": 0.4391, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4142, "hfopenllm_v2/MMLU-PRO": 0.274 } }, { "id": "CYFRAGOVPL/PLLuM-12B-chat", "name": "PLLuM-12B-chat", "developer": "CYFRAGOVPL", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3214, "hfopenllm_v2/BBH": 0.4446, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.4115, "hfopenllm_v2/MMLU-PRO": 0.2872 } }, { "id": "CYFRAGOVPL/PLLuM-12B-nc-base", "name": "PLLuM-12B-nc-base", "developer": "CYFRAGOVPL", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2405, "hfopenllm_v2/BBH": 0.4277, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3645, "hfopenllm_v2/MMLU-PRO": 0.2559 } }, { "id": "CYFRAGOVPL/PLLuM-12B-nc-chat", "name": "PLLuM-12B-nc-chat", "developer": "CYFRAGOVPL", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2834, "hfopenllm_v2/BBH": 0.4576, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4354, "hfopenllm_v2/MMLU-PRO": 0.2597 } }, { "id": "Daemontatox/AetherDrake-SFT", "name": "AetherDrake-SFT", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4813, "hfopenllm_v2/BBH": 0.4872, "hfopenllm_v2/MATH Level 5": 0.1511, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4088, "hfopenllm_v2/MMLU-PRO": 0.3499 } }, { "id": "Daemontatox/AetherSett", "name": "AetherSett", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.537, "hfopenllm_v2/BBH": 0.5452, "hfopenllm_v2/MATH Level 5": 0.3973, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4603, "hfopenllm_v2/MMLU-PRO": 0.4279 } }, { "id": "Daemontatox/AetherTOT", "name": "AetherTOT", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4398, "hfopenllm_v2/BBH": 0.5066, "hfopenllm_v2/MATH Level 5": 0.1488, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4079, "hfopenllm_v2/MMLU-PRO": 0.3804 } }, { "id": "Daemontatox/AetherUncensored", "name": "AetherUncensored", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4042, "hfopenllm_v2/BBH": 0.4463, "hfopenllm_v2/MATH Level 5": 0.145, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.3747, "hfopenllm_v2/MMLU-PRO": 0.271 } }, { "id": "Daemontatox/Cogito-MIS", "name": "Cogito-MIS", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1815, "hfopenllm_v2/BBH": 0.506, "hfopenllm_v2/MATH Level 5": 0.0861, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3768, "hfopenllm_v2/MMLU-PRO": 0.1435 } }, { "id": "Daemontatox/CogitoDistil", "name": "CogitoDistil", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2776, "hfopenllm_v2/BBH": 0.3677, "hfopenllm_v2/MATH Level 5": 0.3927, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3755, "hfopenllm_v2/MMLU-PRO": 0.2625 } }, { "id": "Daemontatox/CogitoZ", "name": "CogitoZ", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3967, "hfopenllm_v2/BBH": 0.6734, "hfopenllm_v2/MATH Level 5": 0.5242, "hfopenllm_v2/GPQA": 0.3951, "hfopenllm_v2/MUSR": 0.4793, "hfopenllm_v2/MMLU-PRO": 0.5593 } }, { "id": "Daemontatox/CogitoZ14", "name": "CogitoZ14", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6637, "hfopenllm_v2/BBH": 0.6298, "hfopenllm_v2/MATH Level 5": 0.4222, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4059, "hfopenllm_v2/MMLU-PRO": 0.3999 } }, { "id": "Daemontatox/DocumentCogito", "name": "DocumentCogito", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5064, "hfopenllm_v2/BBH": 0.5112, "hfopenllm_v2/MATH Level 5": 0.1631, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.3973, "hfopenllm_v2/MMLU-PRO": 0.3802 } }, { "id": "Daemontatox/Llama3.3-70B-CogniLink", "name": "Llama3.3-70B-CogniLink", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6931, "hfopenllm_v2/BBH": 0.6668, "hfopenllm_v2/MATH Level 5": 0.4139, "hfopenllm_v2/GPQA": 0.4455, "hfopenllm_v2/MUSR": 0.4877, "hfopenllm_v2/MMLU-PRO": 0.5173 } }, { "id": "Daemontatox/Llama_cot", "name": "Llama_cot", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7549, "hfopenllm_v2/BBH": 0.4838, "hfopenllm_v2/MATH Level 5": 0.2024, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.3872, "hfopenllm_v2/MMLU-PRO": 0.3518 } }, { "id": "Daemontatox/MawaredT1", "name": "MawaredT1", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4199, "hfopenllm_v2/BBH": 0.5215, "hfopenllm_v2/MATH Level 5": 0.3021, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.4702, "hfopenllm_v2/MMLU-PRO": 0.4718 } }, { "id": "Daemontatox/mini-Cogito-R1", "name": "mini-Cogito-R1", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2298, "hfopenllm_v2/BBH": 0.328, "hfopenllm_v2/MATH Level 5": 0.2749, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.3447, "hfopenllm_v2/MMLU-PRO": 0.1482 } }, { "id": "Daemontatox/mini_Pathfinder", "name": "mini_Pathfinder", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2962, "hfopenllm_v2/BBH": 0.3956, "hfopenllm_v2/MATH Level 5": 0.4751, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3781, "hfopenllm_v2/MMLU-PRO": 0.2809 } }, { "id": "Daemontatox/Mini_QwQ", "name": "Mini_QwQ", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4497, "hfopenllm_v2/BBH": 0.5549, "hfopenllm_v2/MATH Level 5": 0.4192, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4682, "hfopenllm_v2/MMLU-PRO": 0.4373 } }, { "id": "Daemontatox/NemoR", "name": "NemoR", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2287, "hfopenllm_v2/BBH": 0.5194, "hfopenllm_v2/MATH Level 5": 0.0831, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.3908, "hfopenllm_v2/MMLU-PRO": 0.329 } }, { "id": "Daemontatox/PathfinderAI", "name": "PathfinderAI", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3745, "hfopenllm_v2/BBH": 0.6668, "hfopenllm_v2/MATH Level 5": 0.4758, "hfopenllm_v2/GPQA": 0.3943, "hfopenllm_v2/MUSR": 0.4858, "hfopenllm_v2/MMLU-PRO": 0.5593 } }, { "id": "Daemontatox/PathFinderAI2.0", "name": "PathFinderAI2.0", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4541, "hfopenllm_v2/BBH": 0.6658, "hfopenllm_v2/MATH Level 5": 0.5076, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4216, "hfopenllm_v2/MMLU-PRO": 0.5547 } }, { "id": "Daemontatox/PathFinderAi3.0", "name": "PathFinderAi3.0", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4271, "hfopenllm_v2/BBH": 0.6884, "hfopenllm_v2/MATH Level 5": 0.5045, "hfopenllm_v2/GPQA": 0.4086, "hfopenllm_v2/MUSR": 0.4807, "hfopenllm_v2/MMLU-PRO": 0.5757 } }, { "id": "Daemontatox/Phi-4-COT", "name": "Phi-4-COT", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1793, "hfopenllm_v2/BBH": 0.6173, "hfopenllm_v2/MATH Level 5": 0.2243, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.453, "hfopenllm_v2/MMLU-PRO": 0.5005 } }, { "id": "Daemontatox/PixelParse_AI", "name": "PixelParse_AI", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4383, "hfopenllm_v2/BBH": 0.5034, "hfopenllm_v2/MATH Level 5": 0.1473, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4052, "hfopenllm_v2/MMLU-PRO": 0.3778 } }, { "id": "Daemontatox/RA2.0", "name": "RA2.0", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3784, "hfopenllm_v2/BBH": 0.4889, "hfopenllm_v2/MATH Level 5": 0.3837, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4091, "hfopenllm_v2/MMLU-PRO": 0.2616 } }, { "id": "Daemontatox/RA_Reasoner", "name": "RA_Reasoner", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5592, "hfopenllm_v2/BBH": 0.6054, "hfopenllm_v2/MATH Level 5": 0.2122, "hfopenllm_v2/GPQA": 0.3314, "hfopenllm_v2/MUSR": 0.3964, "hfopenllm_v2/MMLU-PRO": 0.43 } }, { "id": "Daemontatox/RA_Reasoner2.0", "name": "RA_Reasoner2.0", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5366, "hfopenllm_v2/BBH": 0.6062, "hfopenllm_v2/MATH Level 5": 0.2311, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.3884, "hfopenllm_v2/MMLU-PRO": 0.4353 } }, { "id": "Daemontatox/ReasonTest", "name": "ReasonTest", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.408, "hfopenllm_v2/BBH": 0.5435, "hfopenllm_v2/MATH Level 5": 0.2137, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4315, "hfopenllm_v2/MMLU-PRO": 0.4272 } }, { "id": "Daemontatox/Research_PathfinderAI", "name": "Research_PathfinderAI", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3457, "hfopenllm_v2/BBH": 0.2872, "hfopenllm_v2/MATH Level 5": 0.1699, "hfopenllm_v2/GPQA": 0.2408, "hfopenllm_v2/MUSR": 0.3394, "hfopenllm_v2/MMLU-PRO": 0.113 } }, { "id": "Daemontatox/SphinX", "name": "SphinX", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5725, "hfopenllm_v2/BBH": 0.5441, "hfopenllm_v2/MATH Level 5": 0.3082, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4405, "hfopenllm_v2/MMLU-PRO": 0.4366 } }, { "id": "Daemontatox/Sphinx2.0", "name": "Sphinx2.0", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7123, "hfopenllm_v2/BBH": 0.6473, "hfopenllm_v2/MATH Level 5": 0.4018, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.426, "hfopenllm_v2/MMLU-PRO": 0.5184 } }, { "id": "Daemontatox/TinySphinx", "name": "TinySphinx", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2567, "hfopenllm_v2/BBH": 0.331, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3328, "hfopenllm_v2/MMLU-PRO": 0.1698 } }, { "id": "Daemontatox/TinySphinx2.0", "name": "TinySphinx2.0", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2535, "hfopenllm_v2/BBH": 0.3168, "hfopenllm_v2/MATH Level 5": 0.0325, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3382, "hfopenllm_v2/MMLU-PRO": 0.1731 } }, { "id": "Daemontatox/Zirel-7B-Math", "name": "Zirel-7B-Math", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6639, "hfopenllm_v2/BBH": 0.5448, "hfopenllm_v2/MATH Level 5": 0.1979, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.4789, "hfopenllm_v2/MMLU-PRO": 0.4237 } }, { "id": "Daemontatox/Zirel_1.5", "name": "Zirel_1.5", "developer": "Daemontatox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4168, "hfopenllm_v2/BBH": 0.3985, "hfopenllm_v2/MATH Level 5": 0.1133, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3658, "hfopenllm_v2/MMLU-PRO": 0.2143 } }, { "id": "Dampfinchen/Llama-3.1-8B-Ultra-Instruct", "name": "Llama-3.1-8B-Ultra-Instruct", "developer": "Dampfinchen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8081, "hfopenllm_v2/BBH": 0.5258, "hfopenllm_v2/MATH Level 5": 0.2205, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4003, "hfopenllm_v2/MMLU-PRO": 0.3826 } }, { "id": "Danielbrdz/Barcenas-10b", "name": "Barcenas-10b", "developer": "Danielbrdz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6608, "hfopenllm_v2/BBH": 0.6121, "hfopenllm_v2/MATH Level 5": 0.2153, "hfopenllm_v2/GPQA": 0.3414, "hfopenllm_v2/MUSR": 0.4135, "hfopenllm_v2/MMLU-PRO": 0.4361 } }, { "id": "Danielbrdz/Barcenas-14b-Phi-3-medium-ORPO", "name": "Barcenas-14b-Phi-3-medium-ORPO", "developer": "Danielbrdz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4799, "hfopenllm_v2/BBH": 0.6536, "hfopenllm_v2/MATH Level 5": 0.2024, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.4808, "hfopenllm_v2/MMLU-PRO": 0.4723 } }, { "id": "Danielbrdz/Barcenas-14b-phi-4", "name": "Barcenas-14b-phi-4", "developer": "Danielbrdz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0498, "hfopenllm_v2/BBH": 0.6769, "hfopenllm_v2/MATH Level 5": 0.2583, "hfopenllm_v2/GPQA": 0.3834, "hfopenllm_v2/MUSR": 0.5097, "hfopenllm_v2/MMLU-PRO": 0.5175 } }, { "id": "Danielbrdz/Barcenas-14b-phi-4-v2", "name": "Barcenas-14b-phi-4-v2", "developer": "Danielbrdz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2775, "hfopenllm_v2/BBH": 0.6573, "hfopenllm_v2/MATH Level 5": 0.3218, "hfopenllm_v2/GPQA": 0.3784, "hfopenllm_v2/MUSR": 0.4399, "hfopenllm_v2/MMLU-PRO": 0.5244 } }, { "id": "Danielbrdz/Barcenas-3b-GRPO", "name": "Barcenas-3b-GRPO", "developer": "Danielbrdz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5444, "hfopenllm_v2/BBH": 0.4414, "hfopenllm_v2/MATH Level 5": 0.1375, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3576, "hfopenllm_v2/MMLU-PRO": 0.3037 } }, { "id": "Danielbrdz/Barcenas-Llama3-8b-ORPO", "name": "Barcenas-Llama3-8b-ORPO", "developer": "Danielbrdz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7372, "hfopenllm_v2/BBH": 0.4987, "hfopenllm_v2/MATH Level 5": 0.0657, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.419, "hfopenllm_v2/MMLU-PRO": 0.383 } }, { "id": "Danielbrdz/Barcenas-R1-Qwen-1.5b", "name": "Barcenas-R1-Qwen-1.5b", "developer": "Danielbrdz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2428, "hfopenllm_v2/BBH": 0.3587, "hfopenllm_v2/MATH Level 5": 0.3497, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.3541, "hfopenllm_v2/MMLU-PRO": 0.1909 } }, { "id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-2", "name": "12b-mn-dans-reasoning-test-2", "developer": "Dans-DiscountModels", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3711, "hfopenllm_v2/BBH": 0.4807, "hfopenllm_v2/MATH Level 5": 0.0634, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3702, "hfopenllm_v2/MMLU-PRO": 0.2507 } }, { "id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-3", "name": "12b-mn-dans-reasoning-test-3", "developer": "Dans-DiscountModels", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5053, "hfopenllm_v2/BBH": 0.4839, "hfopenllm_v2/MATH Level 5": 0.0778, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.4168, "hfopenllm_v2/MMLU-PRO": 0.2516 } }, { "id": "Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML", "name": "Dans-Instruct-CoreCurriculum-12b-ChatML", "developer": "Dans-DiscountModels", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2111, "hfopenllm_v2/BBH": 0.4792, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3606, "hfopenllm_v2/MMLU-PRO": 0.2805 } }, { "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML", "name": "Dans-Instruct-Mix-8b-ChatML", "developer": "Dans-DiscountModels", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0825, "hfopenllm_v2/BBH": 0.4738, "hfopenllm_v2/MATH Level 5": 0.0551, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3918, "hfopenllm_v2/MMLU-PRO": 0.3288 } }, { "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0", "name": "Dans-Instruct-Mix-8b-ChatML-V0.1.0", "developer": "Dans-DiscountModels", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0668, "hfopenllm_v2/BBH": 0.4775, "hfopenllm_v2/MATH Level 5": 0.0672, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.3786, "hfopenllm_v2/MMLU-PRO": 0.3284 } }, { "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1", "name": "Dans-Instruct-Mix-8b-ChatML-V0.1.1", "developer": "Dans-DiscountModels", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0911, "hfopenllm_v2/BBH": 0.4749, "hfopenllm_v2/MATH Level 5": 0.0597, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.3825, "hfopenllm_v2/MMLU-PRO": 0.3279 } }, { "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0", "name": "Dans-Instruct-Mix-8b-ChatML-V0.2.0", "developer": "Dans-DiscountModels", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5064, "hfopenllm_v2/BBH": 0.4624, "hfopenllm_v2/MATH Level 5": 0.0733, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.3644, "hfopenllm_v2/MMLU-PRO": 0.3 } }, { "id": "Dans-DiscountModels/mistral-7b-test-merged", "name": "mistral-7b-test-merged", "developer": "Dans-DiscountModels", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6678, "hfopenllm_v2/BBH": 0.4898, "hfopenllm_v2/MATH Level 5": 0.0446, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3754, "hfopenllm_v2/MMLU-PRO": 0.2978 } }, { "id": "Dans-DiscountModels/Mistral-7b-v0.3-Test-E0.7", "name": "Mistral-7b-v0.3-Test-E0.7", "developer": "Dans-DiscountModels", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5124, "hfopenllm_v2/BBH": 0.475, "hfopenllm_v2/MATH Level 5": 0.034, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4005, "hfopenllm_v2/MMLU-PRO": 0.2744 } }, { "id": "darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp", "name": "BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp", "developer": "darkc0de", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4358, "hfopenllm_v2/BBH": 0.5243, "hfopenllm_v2/MATH Level 5": 0.1284, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4143, "hfopenllm_v2/MMLU-PRO": 0.3673 } }, { "id": "darkc0de/BuddyGlassNeverSleeps", "name": "BuddyGlassNeverSleeps", "developer": "darkc0de", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4239, "hfopenllm_v2/BBH": 0.4977, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3993, "hfopenllm_v2/MMLU-PRO": 0.3452 } }, { "id": "darkc0de/BuddyGlassUncensored2025.2", "name": "BuddyGlassUncensored2025.2", "developer": "darkc0de", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7731, "hfopenllm_v2/BBH": 0.6095, "hfopenllm_v2/MATH Level 5": 0.2402, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4071, "hfopenllm_v2/MMLU-PRO": 0.4336 } }, { "id": "Darkknight535/OpenCrystal-12B-L3", "name": "OpenCrystal-12B-L3", "developer": "Darkknight535", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4071, "hfopenllm_v2/BBH": 0.5223, "hfopenllm_v2/MATH Level 5": 0.0899, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.3657, "hfopenllm_v2/MMLU-PRO": 0.364 } }, { "id": "Databricks-Mosaic-Research/PGRM", "name": "Databricks-Mosaic-Research/PGRM", "developer": "Databricks-Mosaic-Research", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8002, "reward-bench/Factuality": 0.7937, "reward-bench/Precise IF": 0.5062, "reward-bench/Math": 0.7404, "reward-bench/Safety": 0.9289, "reward-bench/Focus": 0.9424, "reward-bench/Ties": 0.8893 } }, { "id": "databricks/dbrx-base", "name": "dbrx-base", "developer": "databricks", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0821, "hfopenllm_v2/BBH": 0.5196, "hfopenllm_v2/MATH Level 5": 0.1, "hfopenllm_v2/GPQA": 0.3267, "hfopenllm_v2/MUSR": 0.4067, "hfopenllm_v2/MMLU-PRO": 0.35 } }, { "id": "databricks/dbrx-instruct", "name": "DBRX Instruct", "developer": "databricks", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.289, "helm_lite/NarrativeQA": 0.488, "helm_lite/NaturalQuestions (closed-book)": 0.284, "helm_lite/OpenbookQA": 0.91, "helm_lite/MMLU": 0.643, "helm_lite/MATH": 0.358, "helm_lite/GSM8K": 0.671, "helm_lite/LegalBench": 0.426, "helm_lite/MedQA": 0.694, "helm_lite/WMT 2014": 0.131, "helm_mmlu/MMLU All Subjects": 0.741, "helm_mmlu/Abstract Algebra": 0.34, "helm_mmlu/Anatomy": 0.667, "helm_mmlu/College Physics": 0.539, "helm_mmlu/Computer Security": 0.83, "helm_mmlu/Econometrics": 0.605, "helm_mmlu/Global Facts": 0.46, "helm_mmlu/Jurisprudence": 0.843, "helm_mmlu/Philosophy": 0.804, "helm_mmlu/Professional Psychology": 0.801, "helm_mmlu/Us Foreign Policy": 0.93, "helm_mmlu/Astronomy": 0.836, "helm_mmlu/Business Ethics": 0.78, "helm_mmlu/Clinical Knowledge": 0.789, "helm_mmlu/Conceptual Physics": 0.74, "helm_mmlu/Electrical Engineering": 0.71, "helm_mmlu/Elementary Mathematics": 0.563, "helm_mmlu/Formal Logic": 0.563, "helm_mmlu/High School World History": 0.903, "helm_mmlu/Human Sexuality": 0.878, "helm_mmlu/International Law": 0.884, "helm_mmlu/Logical Fallacies": 0.847, "helm_mmlu/Machine Learning": 0.625, "helm_mmlu/Management": 0.854, "helm_mmlu/Marketing": 0.94, "helm_mmlu/Medical Genetics": 0.85, "helm_mmlu/Miscellaneous": 0.911, "helm_mmlu/Moral Scenarios": 0.465, "helm_mmlu/Nutrition": 0.814, "helm_mmlu/Prehistory": 0.84, "helm_mmlu/Public Relations": 0.691, "helm_mmlu/Security Studies": 0.804, "helm_mmlu/Sociology": 0.896, "helm_mmlu/Virology": 0.566, "helm_mmlu/World Religions": 0.871, "helm_mmlu/Mean win rate": 0.537, "hfopenllm_v2/IFEval": 0.5416, "hfopenllm_v2/BBH": 0.5429, "hfopenllm_v2/MATH Level 5": 0.0687, "hfopenllm_v2/GPQA": 0.3414, "hfopenllm_v2/MUSR": 0.4269, "hfopenllm_v2/MMLU-PRO": 0.3683 } }, { "id": "databricks/dolly-v1-6b", "name": "dolly-v1-6b", "developer": "databricks", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2224, "hfopenllm_v2/BBH": 0.3172, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.4004, "hfopenllm_v2/MMLU-PRO": 0.1266 } }, { "id": "databricks/dolly-v2-12b", "name": "dolly-v2-12b", "developer": "databricks", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2355, "hfopenllm_v2/BBH": 0.332, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2408, "hfopenllm_v2/MUSR": 0.3739, "hfopenllm_v2/MMLU-PRO": 0.1129 } }, { "id": "databricks/dolly-v2-3b", "name": "dolly-v2-3b", "developer": "databricks", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2247, "hfopenllm_v2/BBH": 0.3079, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3338, "hfopenllm_v2/MMLU-PRO": 0.1145 } }, { "id": "databricks/dolly-v2-7b", "name": "dolly-v2-7b", "developer": "databricks", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.201, "hfopenllm_v2/BBH": 0.3173, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3553, "hfopenllm_v2/MMLU-PRO": 0.1149 } }, { "id": "DavidAU/DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm", "name": "DeepHermes-3-Llama-3-8B-Preview-16.5B-Brainstorm", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3136, "hfopenllm_v2/BBH": 0.4762, "hfopenllm_v2/MATH Level 5": 0.1057, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.3928, "hfopenllm_v2/MMLU-PRO": 0.3209 } }, { "id": "DavidAU/DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B", "name": "DeepSeek-BlackRoot-R1-Distill-Llama-3.1-8B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3685, "hfopenllm_v2/BBH": 0.4887, "hfopenllm_v2/MATH Level 5": 0.0657, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.432, "hfopenllm_v2/MMLU-PRO": 0.2976 } }, { "id": "DavidAU/DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B", "name": "DeepSeek-Grand-Horror-SMB-R1-Distill-Llama-3.1-16B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2507, "hfopenllm_v2/BBH": 0.4488, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4164, "hfopenllm_v2/MMLU-PRO": 0.2709 } }, { "id": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B", "name": "DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Deep-Thinker-Uncensored-24B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3883, "hfopenllm_v2/BBH": 0.4886, "hfopenllm_v2/MATH Level 5": 0.0816, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4375, "hfopenllm_v2/MMLU-PRO": 0.3024 } }, { "id": "DavidAU/DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B", "name": "DeepSeek-MOE-4X8B-R1-Distill-Llama-3.1-Mad-Scientist-24B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3436, "hfopenllm_v2/BBH": 0.4769, "hfopenllm_v2/MATH Level 5": 0.0755, "hfopenllm_v2/GPQA": 0.3372, "hfopenllm_v2/MUSR": 0.4231, "hfopenllm_v2/MMLU-PRO": 0.297 } }, { "id": "DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm", "name": "DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3416, "hfopenllm_v2/BBH": 0.5807, "hfopenllm_v2/MATH Level 5": 0.5536, "hfopenllm_v2/GPQA": 0.3859, "hfopenllm_v2/MUSR": 0.5155, "hfopenllm_v2/MMLU-PRO": 0.4624 } }, { "id": "DavidAU/DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B", "name": "DeepSeek-V2-Grand-Horror-SMB-R1-Distill-Llama-3.1-Uncensored-16.5B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2853, "hfopenllm_v2/BBH": 0.4462, "hfopenllm_v2/MATH Level 5": 0.0174, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4179, "hfopenllm_v2/MMLU-PRO": 0.2778 } }, { "id": "DavidAU/DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B", "name": "DeepThought-MOE-8X3B-R1-Llama-3.2-Reasoning-18B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3793, "hfopenllm_v2/BBH": 0.4232, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.356, "hfopenllm_v2/MMLU-PRO": 0.272 } }, { "id": "DavidAU/Gemma-The-Writer-9B", "name": "Gemma-The-Writer-9B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.174, "hfopenllm_v2/BBH": 0.5905, "hfopenllm_v2/MATH Level 5": 0.0876, "hfopenllm_v2/GPQA": 0.3456, "hfopenllm_v2/MUSR": 0.4099, "hfopenllm_v2/MMLU-PRO": 0.3979 } }, { "id": "DavidAU/Gemma-The-Writer-DEADLINE-10B", "name": "Gemma-The-Writer-DEADLINE-10B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2332, "hfopenllm_v2/BBH": 0.5896, "hfopenllm_v2/MATH Level 5": 0.0989, "hfopenllm_v2/GPQA": 0.3423, "hfopenllm_v2/MUSR": 0.4189, "hfopenllm_v2/MMLU-PRO": 0.3946 } }, { "id": "DavidAU/Gemma-The-Writer-J.GutenBerg-10B", "name": "Gemma-The-Writer-J.GutenBerg-10B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2858, "hfopenllm_v2/BBH": 0.5909, "hfopenllm_v2/MATH Level 5": 0.0921, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4176, "hfopenllm_v2/MMLU-PRO": 0.3947 } }, { "id": "DavidAU/Gemma-The-Writer-Mighty-Sword-9B", "name": "Gemma-The-Writer-Mighty-Sword-9B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7528, "hfopenllm_v2/BBH": 0.5912, "hfopenllm_v2/MATH Level 5": 0.1911, "hfopenllm_v2/GPQA": 0.3482, "hfopenllm_v2/MUSR": 0.4112, "hfopenllm_v2/MMLU-PRO": 0.3968 } }, { "id": "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored", "name": "Gemma-The-Writer-N-Restless-Quill-10B-Uncensored", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7071, "hfopenllm_v2/BBH": 0.5922, "hfopenllm_v2/MATH Level 5": 0.2296, "hfopenllm_v2/GPQA": 0.3414, "hfopenllm_v2/MUSR": 0.4163, "hfopenllm_v2/MMLU-PRO": 0.3966 } }, { "id": "DavidAU/L3-Dark-Planet-8B", "name": "L3-Dark-Planet-8B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4134, "hfopenllm_v2/BBH": 0.5084, "hfopenllm_v2/MATH Level 5": 0.0823, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.3616, "hfopenllm_v2/MMLU-PRO": 0.3737 } }, { "id": "DavidAU/L3-DARKEST-PLANET-16.5B", "name": "L3-DARKEST-PLANET-16.5B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6231, "hfopenllm_v2/BBH": 0.523, "hfopenllm_v2/MATH Level 5": 0.0899, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.3754, "hfopenllm_v2/MMLU-PRO": 0.363 } }, { "id": "DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct", "name": "L3-Jamet-12.2B-MK.V-Blackroot-Instruct", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3962, "hfopenllm_v2/BBH": 0.4766, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.402, "hfopenllm_v2/MMLU-PRO": 0.3291 } }, { "id": "DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct", "name": "L3-Lumimaid-12.2B-v0.1-OAS-Instruct", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3924, "hfopenllm_v2/BBH": 0.4693, "hfopenllm_v2/MATH Level 5": 0.0461, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.4194, "hfopenllm_v2/MMLU-PRO": 0.3142 } }, { "id": "DavidAU/L3-SMB-Instruct-12.2B-F32", "name": "L3-SMB-Instruct-12.2B-F32", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4303, "hfopenllm_v2/BBH": 0.4786, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4087, "hfopenllm_v2/MMLU-PRO": 0.3312 } }, { "id": "DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B", "name": "L3-Stheno-Maid-Blackroot-Grand-HORROR-16B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3439, "hfopenllm_v2/BBH": 0.4736, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.4031, "hfopenllm_v2/MMLU-PRO": 0.357 } }, { "id": "DavidAU/L3-Stheno-v3.2-12.2B-Instruct", "name": "L3-Stheno-v3.2-12.2B-Instruct", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4028, "hfopenllm_v2/BBH": 0.4846, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.4103, "hfopenllm_v2/MMLU-PRO": 0.3345 } }, { "id": "DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B", "name": "L3.1-Dark-Planet-SpinFire-Uncensored-8B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7043, "hfopenllm_v2/BBH": 0.5261, "hfopenllm_v2/MATH Level 5": 0.0929, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3541, "hfopenllm_v2/MMLU-PRO": 0.367 } }, { "id": "DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B", "name": "L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3345, "hfopenllm_v2/BBH": 0.4421, "hfopenllm_v2/MATH Level 5": 0.2606, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.3749, "hfopenllm_v2/MMLU-PRO": 0.2892 } }, { "id": "DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B", "name": "Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1783, "hfopenllm_v2/BBH": 0.3033, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3715, "hfopenllm_v2/MMLU-PRO": 0.1142 } }, { "id": "DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B", "name": "Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2835, "hfopenllm_v2/BBH": 0.3592, "hfopenllm_v2/MATH Level 5": 0.2417, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3847, "hfopenllm_v2/MMLU-PRO": 0.1636 } }, { "id": "DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32", "name": "Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32", "developer": "DavidAU", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2107, "hfopenllm_v2/BBH": 0.3286, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2475, "hfopenllm_v2/MUSR": 0.3404, "hfopenllm_v2/MMLU-PRO": 0.1122 } }, { "id": "davidkim205/nox-solar-10.7b-v4", "name": "nox-solar-10.7b-v4", "developer": "davidkim205", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3753, "hfopenllm_v2/BBH": 0.4814, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4298, "hfopenllm_v2/MMLU-PRO": 0.3333 } }, { "id": "davidkim205/Rhea-72b-v0.5", "name": "Rhea-72b-v0.5", "developer": "davidkim205", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0145, "hfopenllm_v2/BBH": 0.3078, "hfopenllm_v2/MATH Level 5": 0.1737, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.4241, "hfopenllm_v2/MMLU-PRO": 0.1166 } }, { "id": "Davidsv/SUONG-1", "name": "SUONG-1", "developer": "Davidsv", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2497, "hfopenllm_v2/BBH": 0.2817, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2441, "hfopenllm_v2/MUSR": 0.3578, "hfopenllm_v2/MMLU-PRO": 0.1085 } }, { "id": "DavieLion/Llama-3.2-1B-SPIN-iter0", "name": "Llama-3.2-1B-SPIN-iter0", "developer": "DavieLion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1507, "hfopenllm_v2/BBH": 0.293, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3565, "hfopenllm_v2/MMLU-PRO": 0.1125 } }, { "id": "DavieLion/Llama-3.2-1B-SPIN-iter1", "name": "Llama-3.2-1B-SPIN-iter1", "developer": "DavieLion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1575, "hfopenllm_v2/BBH": 0.294, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.3646, "hfopenllm_v2/MMLU-PRO": 0.1118 } }, { "id": "DavieLion/Llama-3.2-1B-SPIN-iter2", "name": "Llama-3.2-1B-SPIN-iter2", "developer": "DavieLion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1376, "hfopenllm_v2/BBH": 0.298, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3553, "hfopenllm_v2/MMLU-PRO": 0.1129 } }, { "id": "DavieLion/Llama-3.2-1B-SPIN-iter3", "name": "Llama-3.2-1B-SPIN-iter3", "developer": "DavieLion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1336, "hfopenllm_v2/BBH": 0.2975, "hfopenllm_v2/MATH Level 5": 0.0068, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.35, "hfopenllm_v2/MMLU-PRO": 0.1128 } }, { "id": "DavieLion/Lllma-3.2-1B", "name": "Lllma-3.2-1B", "developer": "DavieLion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1601, "hfopenllm_v2/BBH": 0.2965, "hfopenllm_v2/MATH Level 5": 0.0068, "hfopenllm_v2/GPQA": 0.2441, "hfopenllm_v2/MUSR": 0.3578, "hfopenllm_v2/MMLU-PRO": 0.1126 } }, { "id": "DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT", "name": "Llama-3.1-Argunaut-1-8B-SFT", "developer": "DebateLabKIT", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5519, "hfopenllm_v2/BBH": 0.4824, "hfopenllm_v2/MATH Level 5": 0.145, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4503, "hfopenllm_v2/MMLU-PRO": 0.3472 } }, { "id": "Deci/DeciLM-7B", "name": "DeciLM-7B", "developer": "Deci", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2813, "hfopenllm_v2/BBH": 0.4423, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4359, "hfopenllm_v2/MMLU-PRO": 0.2692 } }, { "id": "Deci/DeciLM-7B-instruct", "name": "DeciLM-7B-instruct", "developer": "Deci", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.488, "hfopenllm_v2/BBH": 0.459, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.3884, "hfopenllm_v2/MMLU-PRO": 0.2608 } }, { "id": "DeepAutoAI/causal_gpt2", "name": "causal_gpt2", "developer": "DeepAutoAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1813, "hfopenllm_v2/BBH": 0.3026, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.427, "hfopenllm_v2/MMLU-PRO": 0.1131 } }, { "id": "DeepAutoAI/d2nwg_causal_gpt2", "name": "d2nwg_causal_gpt2", "developer": "DeepAutoAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1916, "hfopenllm_v2/BBH": 0.3027, "hfopenllm_v2/MATH Level 5": 0.0045, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.4297, "hfopenllm_v2/MMLU-PRO": 0.1151 } }, { "id": "DeepAutoAI/d2nwg_causal_gpt2_v1", "name": "d2nwg_causal_gpt2_v1", "developer": "DeepAutoAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1989, "hfopenllm_v2/BBH": 0.2992, "hfopenllm_v2/MATH Level 5": 0.0038, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.4337, "hfopenllm_v2/MMLU-PRO": 0.1135 } }, { "id": "DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0", "name": "d2nwg_Llama-3.1-8B-Instruct-v0.0", "developer": "DeepAutoAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7893, "hfopenllm_v2/BBH": 0.508, "hfopenllm_v2/MATH Level 5": 0.1805, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4135, "hfopenllm_v2/MMLU-PRO": 0.3877 } }, { "id": "DeepAutoAI/Explore_Llama-3.1-8B-Inst", "name": "Explore_Llama-3.1-8B-Inst", "developer": "DeepAutoAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7795, "hfopenllm_v2/BBH": 0.5117, "hfopenllm_v2/MATH Level 5": 0.2009, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.391, "hfopenllm_v2/MMLU-PRO": 0.3792 } }, { "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst", "name": "Explore_Llama-3.2-1B-Inst", "developer": "DeepAutoAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5649, "hfopenllm_v2/BBH": 0.3505, "hfopenllm_v2/MATH Level 5": 0.0748, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3183, "hfopenllm_v2/MMLU-PRO": 0.1809 } }, { "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v0", "name": "Explore_Llama-3.2-1B-Inst_v0", "developer": "DeepAutoAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5597, "hfopenllm_v2/BBH": 0.3365, "hfopenllm_v2/MATH Level 5": 0.0597, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3103, "hfopenllm_v2/MMLU-PRO": 0.1804 } }, { "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1", "name": "Explore_Llama-3.2-1B-Inst_v1", "developer": "DeepAutoAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4999, "hfopenllm_v2/BBH": 0.3141, "hfopenllm_v2/MATH Level 5": 0.031, "hfopenllm_v2/GPQA": 0.245, "hfopenllm_v2/MUSR": 0.3781, "hfopenllm_v2/MMLU-PRO": 0.1269 } }, { "id": "DeepAutoAI/Explore_Llama-3.2-1B-Inst_v1.1", "name": "Explore_Llama-3.2-1B-Inst_v1.1", "developer": "DeepAutoAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5844, "hfopenllm_v2/BBH": 0.3513, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3117, "hfopenllm_v2/MMLU-PRO": 0.1818 } }, { "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Inst", "name": "ldm_soup_Llama-3.1-8B-Inst", "developer": "DeepAutoAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8033, "hfopenllm_v2/BBH": 0.5121, "hfopenllm_v2/MATH Level 5": 0.1888, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4161, "hfopenllm_v2/MMLU-PRO": 0.3886 } }, { "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0", "name": "ldm_soup_Llama-3.1-8B-Instruct-v0.0", "developer": "DeepAutoAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7889, "hfopenllm_v2/BBH": 0.5125, "hfopenllm_v2/MATH Level 5": 0.1918, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4121, "hfopenllm_v2/MMLU-PRO": 0.3895 } }, { "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1", "name": "ldm_soup_Llama-3.1-8B-Instruct-v0.1", "developer": "DeepAutoAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7889, "hfopenllm_v2/BBH": 0.5125, "hfopenllm_v2/MATH Level 5": 0.1918, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4121, "hfopenllm_v2/MMLU-PRO": 0.3895 } }, { "id": "DeepMount00/Lexora-Lite-3B", "name": "Lexora-Lite-3B", "developer": "DeepMount00", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5776, "hfopenllm_v2/BBH": 0.4873, "hfopenllm_v2/MATH Level 5": 0.2304, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3966, "hfopenllm_v2/MMLU-PRO": 0.3602 } }, { "id": "DeepMount00/Lexora-Lite-3B_v2", "name": "Lexora-Lite-3B_v2", "developer": "DeepMount00", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4943, "hfopenllm_v2/BBH": 0.4812, "hfopenllm_v2/MATH Level 5": 0.2281, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3822, "hfopenllm_v2/MMLU-PRO": 0.3544 } }, { "id": "DeepMount00/Lexora-Medium-7B", "name": "Lexora-Medium-7B", "developer": "DeepMount00", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4103, "hfopenllm_v2/BBH": 0.5145, "hfopenllm_v2/MATH Level 5": 0.2221, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4439, "hfopenllm_v2/MMLU-PRO": 0.4325 } }, { "id": "DeepMount00/Llama-3-8b-Ita", "name": "Llama-3-8b-Ita", "developer": "DeepMount00", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.753, "hfopenllm_v2/BBH": 0.4936, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4268, "hfopenllm_v2/MMLU-PRO": 0.3852 } }, { "id": "DeepMount00/Llama-3.1-8b-ITA", "name": "Llama-3.1-8b-ITA", "developer": "DeepMount00", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5365, "hfopenllm_v2/BBH": 0.517, "hfopenllm_v2/MATH Level 5": 0.1707, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4487, "hfopenllm_v2/MMLU-PRO": 0.396 } }, { "id": "DeepMount00/Llama-3.1-Distilled", "name": "Llama-3.1-Distilled", "developer": "DeepMount00", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7844, "hfopenllm_v2/BBH": 0.5101, "hfopenllm_v2/MATH Level 5": 0.2032, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4058, "hfopenllm_v2/MMLU-PRO": 0.3782 } }, { "id": "DeepMount00/mergekit-ties-okvgjfz", "name": "mergekit-ties-okvgjfz", "developer": "DeepMount00", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.153, "hfopenllm_v2/BBH": 0.2998, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3806, "hfopenllm_v2/MMLU-PRO": 0.1118 } }, { "id": "DeepMount00/Qwen2-1.5B-Ita", "name": "Qwen2-1.5B-Ita", "developer": "DeepMount00", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5173, "hfopenllm_v2/BBH": 0.3981, "hfopenllm_v2/MATH Level 5": 0.114, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3504, "hfopenllm_v2/MMLU-PRO": 0.2772 } }, { "id": "DeepMount00/Qwen2-1.5B-Ita_v2", "name": "Qwen2-1.5B-Ita_v2", "developer": "DeepMount00", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5, "hfopenllm_v2/BBH": 0.3954, "hfopenllm_v2/MATH Level 5": 0.0967, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3702, "hfopenllm_v2/MMLU-PRO": 0.3032 } }, { "id": "DeepMount00/Qwen2-1.5B-Ita_v3", "name": "Qwen2-1.5B-Ita_v3", "developer": "DeepMount00", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.489, "hfopenllm_v2/BBH": 0.3948, "hfopenllm_v2/MATH Level 5": 0.1042, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3742, "hfopenllm_v2/MMLU-PRO": 0.3018 } }, { "id": "DeepMount00/Qwen2-1.5B-Ita_v5", "name": "Qwen2-1.5B-Ita_v5", "developer": "DeepMount00", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4987, "hfopenllm_v2/BBH": 0.4032, "hfopenllm_v2/MATH Level 5": 0.1178, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3422, "hfopenllm_v2/MMLU-PRO": 0.2943 } }, { "id": "DeepMount00/Qwen2-1.5B-Ita_v6", "name": "Qwen2-1.5B-Ita_v6", "developer": "DeepMount00", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2999, "hfopenllm_v2/BBH": 0.4249, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.3755, "hfopenllm_v2/MMLU-PRO": 0.2872 } }, { "id": "DeepMount00/Qwen2.5-7B-Instruct-MathCoder", "name": "Qwen2.5-7B-Instruct-MathCoder", "developer": "DeepMount00", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.153, "hfopenllm_v2/BBH": 0.2998, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3806, "hfopenllm_v2/MMLU-PRO": 0.1118 } }, { "id": "deepseek-ai/deepseek-llm-67b-chat", "name": "DeepSeek LLM Chat 67B", "developer": "deepseek-ai", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.488, "helm_lite/NarrativeQA": 0.581, "helm_lite/NaturalQuestions (closed-book)": 0.412, "helm_lite/OpenbookQA": 0.88, "helm_lite/MMLU": 0.641, "helm_lite/MATH": 0.615, "helm_lite/GSM8K": 0.795, "helm_lite/LegalBench": 0.637, "helm_lite/MedQA": 0.628, "helm_lite/WMT 2014": 0.186, "helm_mmlu/MMLU All Subjects": 0.725, "helm_mmlu/Abstract Algebra": 0.44, "helm_mmlu/Anatomy": 0.667, "helm_mmlu/College Physics": 0.363, "helm_mmlu/Computer Security": 0.79, "helm_mmlu/Econometrics": 0.553, "helm_mmlu/Global Facts": 0.46, "helm_mmlu/Jurisprudence": 0.852, "helm_mmlu/Philosophy": 0.801, "helm_mmlu/Professional Psychology": 0.809, "helm_mmlu/Us Foreign Policy": 0.91, "helm_mmlu/Astronomy": 0.822, "helm_mmlu/Business Ethics": 0.86, "helm_mmlu/Clinical Knowledge": 0.785, "helm_mmlu/Conceptual Physics": 0.723, "helm_mmlu/Electrical Engineering": 0.669, "helm_mmlu/Elementary Mathematics": 0.548, "helm_mmlu/Formal Logic": 0.548, "helm_mmlu/High School World History": 0.911, "helm_mmlu/Human Sexuality": 0.84, "helm_mmlu/International Law": 0.851, "helm_mmlu/Logical Fallacies": 0.847, "helm_mmlu/Machine Learning": 0.562, "helm_mmlu/Management": 0.903, "helm_mmlu/Marketing": 0.923, "helm_mmlu/Medical Genetics": 0.73, "helm_mmlu/Miscellaneous": 0.904, "helm_mmlu/Moral Scenarios": 0.544, "helm_mmlu/Nutrition": 0.781, "helm_mmlu/Prehistory": 0.858, "helm_mmlu/Public Relations": 0.7, "helm_mmlu/Security Studies": 0.796, "helm_mmlu/Sociology": 0.876, "helm_mmlu/Virology": 0.554, "helm_mmlu/World Religions": 0.865, "helm_mmlu/Mean win rate": 0.387, "hfopenllm_v2/IFEval": 0.5587, "hfopenllm_v2/BBH": 0.5243, "hfopenllm_v2/MATH Level 5": 0.0929, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.5059, "hfopenllm_v2/MMLU-PRO": 0.3944 } }, { "id": "deepseek-ai/deepseek-llm-7b-base", "name": "deepseek-llm-7b-base", "developer": "deepseek-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2179, "hfopenllm_v2/BBH": 0.3503, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3738, "hfopenllm_v2/MMLU-PRO": 0.1806 } }, { "id": "deepseek-ai/deepseek-llm-7b-chat", "name": "deepseek-llm-7b-chat", "developer": "deepseek-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4171, "hfopenllm_v2/BBH": 0.3632, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.4668, "hfopenllm_v2/MMLU-PRO": 0.2133 } }, { "id": "deepseek-ai/deepseek-moe-16b-base", "name": "deepseek-moe-16b-base", "developer": "deepseek-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.245, "hfopenllm_v2/BBH": 0.3409, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3658, "hfopenllm_v2/MMLU-PRO": 0.1505 } }, { "id": "deepseek-ai/deepseek-moe-16b-chat", "name": "deepseek-moe-16b-chat", "developer": "deepseek-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3663, "hfopenllm_v2/BBH": 0.3275, "hfopenllm_v2/MATH Level 5": 0.0257, "hfopenllm_v2/GPQA": 0.2248, "hfopenllm_v2/MUSR": 0.3808, "hfopenllm_v2/MMLU-PRO": 0.1964 } }, { "id": "deepseek-ai/deepseek-r1-0528", "name": "DeepSeek-R1-0528", "developer": "deepseek-ai", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.699, "helm_capabilities/MMLU-Pro": 0.793, "helm_capabilities/GPQA": 0.666, "helm_capabilities/IFEval": 0.784, "helm_capabilities/WildBench": 0.828, "helm_capabilities/Omni-MATH": 0.424 } }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "name": "DeepSeek-R1-Distill-Llama-70B", "developer": "deepseek-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4336, "hfopenllm_v2/BBH": 0.5635, "hfopenllm_v2/MATH Level 5": 0.3074, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.4342, "hfopenllm_v2/MMLU-PRO": 0.4748 } }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "name": "DeepSeek-R1-Distill-Llama-8B", "developer": "deepseek-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3782, "hfopenllm_v2/BBH": 0.3239, "hfopenllm_v2/MATH Level 5": 0.2198, "hfopenllm_v2/GPQA": 0.255, "hfopenllm_v2/MUSR": 0.325, "hfopenllm_v2/MMLU-PRO": 0.2089 } }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "name": "DeepSeek-R1-Distill-Qwen-1.5B", "developer": "deepseek-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3463, "hfopenllm_v2/BBH": 0.3241, "hfopenllm_v2/MATH Level 5": 0.1692, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3635, "hfopenllm_v2/MMLU-PRO": 0.1187 } }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "name": "DeepSeek-R1-Distill-Qwen-14B", "developer": "deepseek-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4382, "hfopenllm_v2/BBH": 0.5906, "hfopenllm_v2/MATH Level 5": 0.5702, "hfopenllm_v2/GPQA": 0.3876, "hfopenllm_v2/MUSR": 0.5366, "hfopenllm_v2/MMLU-PRO": 0.4667 } }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "name": "DeepSeek-R1-Distill-Qwen-32B", "developer": "deepseek-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4186, "hfopenllm_v2/BBH": 0.4197, "hfopenllm_v2/MATH Level 5": 0.1707, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4526, "hfopenllm_v2/MMLU-PRO": 0.4687 } }, { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "name": "DeepSeek-R1-Distill-Qwen-7B", "developer": "deepseek-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4038, "hfopenllm_v2/BBH": 0.3443, "hfopenllm_v2/MATH Level 5": 0.1956, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3663, "hfopenllm_v2/MMLU-PRO": 0.2321 } }, { "id": "deepseek-ai/deepseek-v3", "name": "DeepSeek v3", "developer": "deepseek-ai", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.665, "helm_capabilities/MMLU-Pro": 0.723, "helm_capabilities/GPQA": 0.538, "helm_capabilities/IFEval": 0.832, "helm_capabilities/WildBench": 0.831, "helm_capabilities/Omni-MATH": 0.403, "helm_lite/Mean win rate": 0.908, "helm_lite/NarrativeQA": 0.796, "helm_lite/NaturalQuestions (closed-book)": 0.467, "helm_lite/OpenbookQA": 0.954, "helm_lite/MMLU": 0.803, "helm_lite/MATH": 0.912, "helm_lite/GSM8K": 0.94, "helm_lite/LegalBench": 0.718, "helm_lite/MedQA": 0.809, "helm_lite/WMT 2014": 0.209, "helm_mmlu/MMLU All Subjects": 0.872, "helm_mmlu/Abstract Algebra": 0.84, "helm_mmlu/Anatomy": 0.867, "helm_mmlu/College Physics": 0.814, "helm_mmlu/Computer Security": 0.86, "helm_mmlu/Econometrics": 0.746, "helm_mmlu/Global Facts": 0.68, "helm_mmlu/Jurisprudence": 0.898, "helm_mmlu/Philosophy": 0.9, "helm_mmlu/Professional Psychology": 0.887, "helm_mmlu/Us Foreign Policy": 0.92, "helm_mmlu/Astronomy": 0.921, "helm_mmlu/Business Ethics": 0.89, "helm_mmlu/Clinical Knowledge": 0.913, "helm_mmlu/Conceptual Physics": 0.94, "helm_mmlu/Electrical Engineering": 0.869, "helm_mmlu/Elementary Mathematics": 0.942, "helm_mmlu/Formal Logic": 0.77, "helm_mmlu/High School World History": 0.928, "helm_mmlu/Human Sexuality": 0.924, "helm_mmlu/International Law": 0.95, "helm_mmlu/Logical Fallacies": 0.914, "helm_mmlu/Machine Learning": 0.786, "helm_mmlu/Management": 0.903, "helm_mmlu/Marketing": 0.949, "helm_mmlu/Medical Genetics": 0.96, "helm_mmlu/Miscellaneous": 0.949, "helm_mmlu/Moral Scenarios": 0.808, "helm_mmlu/Nutrition": 0.918, "helm_mmlu/Prehistory": 0.923, "helm_mmlu/Public Relations": 0.809, "helm_mmlu/Security Studies": 0.837, "helm_mmlu/Sociology": 0.955, "helm_mmlu/Virology": 0.596, "helm_mmlu/World Religions": 0.912, "helm_mmlu/Mean win rate": 0.215 } }, { "id": "deepseek/chat-v3-0324", "name": "deepseek/chat-v3-0324", "developer": "DeepSeek", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.0, "livecodebenchpro/Easy Problems": 0.19718309859154928 } }, { "id": "deepseek/deepseek-r1-0528", "name": "deepseek-r1-0528", "developer": "deepseek", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.6744, "global-mmlu-lite/Culturally Sensitive": 0.6672, "global-mmlu-lite/Culturally Agnostic": 0.6816, "global-mmlu-lite/Arabic": 0.6825, "global-mmlu-lite/English": 0.715, "global-mmlu-lite/Bengali": 0.655, "global-mmlu-lite/German": 0.6375, "global-mmlu-lite/French": 0.6925, "global-mmlu-lite/Hindi": 0.6475, "global-mmlu-lite/Indonesian": 0.655, "global-mmlu-lite/Italian": 0.6775, "global-mmlu-lite/Japanese": 0.7725, "global-mmlu-lite/Korean": 0.6575, "global-mmlu-lite/Portuguese": 0.635, "global-mmlu-lite/Spanish": 0.7175, "global-mmlu-lite/Swahili": 0.6775, "global-mmlu-lite/Yoruba": 0.77, "global-mmlu-lite/Chinese": 0.5075, "global-mmlu-lite/Burmese": 0.69 } }, { "id": "deepseek/deepseek-v3-2-exp-fc", "name": "DeepSeek-V3.2-Exp (FC)", "developer": "deepseek", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 19.0, "bfcl/bfcl.overall.overall_accuracy": 54.12, "bfcl/bfcl.overall.total_cost_usd": 6.71, "bfcl/bfcl.overall.latency_mean_s": 5.83, "bfcl/bfcl.overall.latency_std_s": 11.71, "bfcl/bfcl.overall.latency_p95_s": 10.59, "bfcl/bfcl.non_live.ast_accuracy": 34.85, "bfcl/bfcl.non_live.simple_ast_accuracy": 37.92, "bfcl/bfcl.non_live.multiple_ast_accuracy": 74.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 15.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 12.5, "bfcl/bfcl.live.live_accuracy": 53.66, "bfcl/bfcl.live.live_simple_ast_accuracy": 66.28, "bfcl/bfcl.live.live_multiple_ast_accuracy": 51.66, "bfcl/bfcl.live.live_parallel_ast_accuracy": 25.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 25.0, "bfcl/bfcl.multi_turn.accuracy": 37.38, "bfcl/bfcl.multi_turn.base_accuracy": 41.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 39.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 33.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 35.0, "bfcl/bfcl.web_search.accuracy": 69.5, "bfcl/bfcl.web_search.base_accuracy": 80.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 59.0, "bfcl/bfcl.memory.accuracy": 54.19, "bfcl/bfcl.memory.kv_accuracy": 41.94, "bfcl/bfcl.memory.vector_accuracy": 61.29, "bfcl/bfcl.memory.recursive_summarization_accuracy": 59.35, "bfcl/bfcl.relevance.relevance_detection_accuracy": 37.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 93.18 } }, { "id": "deepseek/deepseek-v3-2-exp-prompt-thinking", "name": "DeepSeek-V3.2-Exp (Prompt + Thinking)", "developer": "deepseek", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 14.0, "bfcl/bfcl.overall.overall_accuracy": 56.73, "bfcl/bfcl.overall.total_cost_usd": 57.75, "bfcl/bfcl.overall.latency_mean_s": 37.89, "bfcl/bfcl.overall.latency_std_s": 49.56, "bfcl/bfcl.overall.latency_p95_s": 102.09, "bfcl/bfcl.non_live.ast_accuracy": 85.52, "bfcl/bfcl.non_live.simple_ast_accuracy": 74.08, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 89.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 86.5, "bfcl/bfcl.live.live_accuracy": 76.02, "bfcl/bfcl.live.live_simple_ast_accuracy": 82.56, "bfcl/bfcl.live.live_multiple_ast_accuracy": 74.74, "bfcl/bfcl.live.live_parallel_ast_accuracy": 87.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 54.17, "bfcl/bfcl.multi_turn.accuracy": 44.88, "bfcl/bfcl.multi_turn.base_accuracy": 55.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 49.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 27.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 48.5, "bfcl/bfcl.web_search.accuracy": 58.0, "bfcl/bfcl.web_search.base_accuracy": 64.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 52.0, "bfcl/bfcl.memory.accuracy": 44.09, "bfcl/bfcl.memory.kv_accuracy": 46.45, "bfcl/bfcl.memory.vector_accuracy": 46.45, "bfcl/bfcl.memory.recursive_summarization_accuracy": 39.35, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 67.0, "bfcl/bfcl.format_sensitivity.max_delta": 10.0, "bfcl/bfcl.format_sensitivity.stddev": 2.77 } }, { "id": "deepseek/deepseek-v3.1", "name": "deepseek-v3.1", "developer": "deepseek", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.8044, "global-mmlu-lite/Culturally Sensitive": 0.7793, "global-mmlu-lite/Culturally Agnostic": 0.8295, "global-mmlu-lite/Arabic": 0.805, "global-mmlu-lite/English": 0.825, "global-mmlu-lite/Bengali": 0.8157, "global-mmlu-lite/German": 0.7925, "global-mmlu-lite/French": 0.8175, "global-mmlu-lite/Hindi": 0.7569, "global-mmlu-lite/Indonesian": 0.7764, "global-mmlu-lite/Italian": 0.8075, "global-mmlu-lite/Japanese": 0.8312, "global-mmlu-lite/Korean": 0.8125, "global-mmlu-lite/Portuguese": 0.8246, "global-mmlu-lite/Spanish": 0.8125, "global-mmlu-lite/Swahili": 0.801, "global-mmlu-lite/Yoruba": 0.7831, "global-mmlu-lite/Chinese": 0.8161, "global-mmlu-lite/Burmese": 0.7925 } }, { "id": "deepseek/deepseek-v3.2", "name": "DeepSeek-V3.2", "developer": "DeepSeek", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 39.6 } }, { "id": "deepseek/ep-20250214004308-p7n89", "name": "ep-20250214004308-p7n89", "developer": "DeepSeek", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.014084507042253521, "livecodebenchpro/Easy Problems": 0.4225352112676056 } }, { "id": "deepseek/ep-20250228232227-z44x5", "name": "ep-20250228232227-z44x5", "developer": "DeepSeek", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.0, "livecodebenchpro/Easy Problems": 0.1267605633802817 } }, { "id": "deepseek/ep-20250603132404-cgpjm", "name": "ep-20250603132404-cgpjm", "developer": "DeepSeek", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.08450704225352113, "livecodebenchpro/Easy Problems": 0.5774647887323944 } }, { "id": "Delta-Vector/Baldur-8B", "name": "Baldur-8B", "developer": "Delta-Vector", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4782, "hfopenllm_v2/BBH": 0.5306, "hfopenllm_v2/MATH Level 5": 0.1435, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4372, "hfopenllm_v2/MMLU-PRO": 0.3654 } }, { "id": "Delta-Vector/Control-8B", "name": "Control-8B", "developer": "Delta-Vector", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.549, "hfopenllm_v2/BBH": 0.5041, "hfopenllm_v2/MATH Level 5": 0.139, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4355, "hfopenllm_v2/MMLU-PRO": 0.3732 } }, { "id": "Delta-Vector/Control-8B-V1.1", "name": "Control-8B-V1.1", "developer": "Delta-Vector", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5697, "hfopenllm_v2/BBH": 0.4993, "hfopenllm_v2/MATH Level 5": 0.1276, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4237, "hfopenllm_v2/MMLU-PRO": 0.3745 } }, { "id": "Delta-Vector/Darkens-8B", "name": "Darkens-8B", "developer": "Delta-Vector", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2548, "hfopenllm_v2/BBH": 0.5251, "hfopenllm_v2/MATH Level 5": 0.0589, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4106, "hfopenllm_v2/MMLU-PRO": 0.3736 } }, { "id": "Delta-Vector/Henbane-7b-attempt2", "name": "Henbane-7b-attempt2", "developer": "Delta-Vector", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4157, "hfopenllm_v2/BBH": 0.5061, "hfopenllm_v2/MATH Level 5": 0.2273, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3973, "hfopenllm_v2/MMLU-PRO": 0.4028 } }, { "id": "Delta-Vector/Odin-9B", "name": "Odin-9B", "developer": "Delta-Vector", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3692, "hfopenllm_v2/BBH": 0.544, "hfopenllm_v2/MATH Level 5": 0.145, "hfopenllm_v2/GPQA": 0.3414, "hfopenllm_v2/MUSR": 0.4648, "hfopenllm_v2/MMLU-PRO": 0.4047 } }, { "id": "Delta-Vector/Tor-8B", "name": "Tor-8B", "developer": "Delta-Vector", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2382, "hfopenllm_v2/BBH": 0.5209, "hfopenllm_v2/MATH Level 5": 0.0589, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4092, "hfopenllm_v2/MMLU-PRO": 0.373 } }, { "id": "DevQuasar/DevQuasar-R1-Uncensored-Llama-8B", "name": "DevQuasar-R1-Uncensored-Llama-8B", "developer": "DevQuasar", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3849, "hfopenllm_v2/BBH": 0.5118, "hfopenllm_v2/MATH Level 5": 0.3308, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4436, "hfopenllm_v2/MMLU-PRO": 0.3615 } }, { "id": "dfurman/CalmeRys-78B-Orpo-v0.1", "name": "CalmeRys-78B-Orpo-v0.1", "developer": "dfurman", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8163, "hfopenllm_v2/BBH": 0.7262, "hfopenllm_v2/MATH Level 5": 0.4063, "hfopenllm_v2/GPQA": 0.4002, "hfopenllm_v2/MUSR": 0.5902, "hfopenllm_v2/MMLU-PRO": 0.7012 } }, { "id": "dfurman/Llama-3-70B-Orpo-v0.1", "name": "Llama-3-70B-Orpo-v0.1", "developer": "dfurman", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2049, "hfopenllm_v2/BBH": 0.4655, "hfopenllm_v2/MATH Level 5": 0.1579, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.4534, "hfopenllm_v2/MMLU-PRO": 0.3893 } }, { "id": "dfurman/Llama-3-8B-Orpo-v0.1", "name": "Llama-3-8B-Orpo-v0.1", "developer": "dfurman", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2835, "hfopenllm_v2/BBH": 0.3842, "hfopenllm_v2/MATH Level 5": 0.0521, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3566, "hfopenllm_v2/MMLU-PRO": 0.2298 } }, { "id": "dfurman/Qwen2-72B-Orpo-v0.1", "name": "Qwen2-72B-Orpo-v0.1", "developer": "dfurman", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.788, "hfopenllm_v2/BBH": 0.6969, "hfopenllm_v2/MATH Level 5": 0.4056, "hfopenllm_v2/GPQA": 0.3842, "hfopenllm_v2/MUSR": 0.4784, "hfopenllm_v2/MMLU-PRO": 0.5455 } }, { "id": "dicta-il/dictalm2.0", "name": "dictalm2.0", "developer": "dicta-il", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2413, "hfopenllm_v2/BBH": 0.4018, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.382, "hfopenllm_v2/MMLU-PRO": 0.2605 } }, { "id": "dicta-il/dictalm2.0-instruct", "name": "dictalm2.0-instruct", "developer": "dicta-il", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4412, "hfopenllm_v2/BBH": 0.4256, "hfopenllm_v2/MATH Level 5": 0.0227, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.3946, "hfopenllm_v2/MMLU-PRO": 0.2605 } }, { "id": "distilbert/distilgpt2", "name": "distilgpt2", "developer": "distilbert", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0611, "hfopenllm_v2/BBH": 0.3038, "hfopenllm_v2/MATH Level 5": 0.006, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.4207, "hfopenllm_v2/MMLU-PRO": 0.1187 } }, { "id": "divyanshukunwar/SASTRI_1_9B", "name": "SASTRI_1_9B", "developer": "divyanshukunwar", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4207, "hfopenllm_v2/BBH": 0.468, "hfopenllm_v2/MATH Level 5": 0.1156, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.3831, "hfopenllm_v2/MMLU-PRO": 0.3187 } }, { "id": "djuna-test-lab/TEST-L3.2-ReWish-3B", "name": "TEST-L3.2-ReWish-3B", "developer": "djuna-test-lab", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6368, "hfopenllm_v2/BBH": 0.4495, "hfopenllm_v2/MATH Level 5": 0.1367, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3777, "hfopenllm_v2/MMLU-PRO": 0.3126 } }, { "id": "djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base", "name": "TEST-L3.2-ReWish-3B-ties-w-base", "developer": "djuna-test-lab", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6353, "hfopenllm_v2/BBH": 0.4495, "hfopenllm_v2/MATH Level 5": 0.1367, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3777, "hfopenllm_v2/MMLU-PRO": 0.3126 } }, { "id": "djuna/G2-BigGSHT-27B-2", "name": "G2-BigGSHT-27B-2", "developer": "djuna", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7974, "hfopenllm_v2/BBH": 0.6415, "hfopenllm_v2/MATH Level 5": 0.2349, "hfopenllm_v2/GPQA": 0.3633, "hfopenllm_v2/MUSR": 0.4072, "hfopenllm_v2/MMLU-PRO": 0.4528 } }, { "id": "djuna/G2-GSHT", "name": "G2-GSHT", "developer": "djuna", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.563, "hfopenllm_v2/BBH": 0.527, "hfopenllm_v2/MATH Level 5": 0.1926, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.4006, "hfopenllm_v2/MMLU-PRO": 0.307 } }, { "id": "djuna/Gemma-2-gemmama-9b", "name": "Gemma-2-gemmama-9b", "developer": "djuna", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7703, "hfopenllm_v2/BBH": 0.542, "hfopenllm_v2/MATH Level 5": 0.1926, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.4031, "hfopenllm_v2/MMLU-PRO": 0.3109 } }, { "id": "djuna/L3.1-ForStHS", "name": "L3.1-ForStHS", "developer": "djuna", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7813, "hfopenllm_v2/BBH": 0.5203, "hfopenllm_v2/MATH Level 5": 0.1503, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4026, "hfopenllm_v2/MMLU-PRO": 0.3735 } }, { "id": "djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc", "name": "L3.1-Promissum_Mane-8B-Della-1.5-calc", "developer": "djuna", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7235, "hfopenllm_v2/BBH": 0.5433, "hfopenllm_v2/MATH Level 5": 0.1639, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4253, "hfopenllm_v2/MMLU-PRO": 0.3904 } }, { "id": "djuna/L3.1-Promissum_Mane-8B-Della-calc", "name": "L3.1-Promissum_Mane-8B-Della-calc", "developer": "djuna", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5442, "hfopenllm_v2/BBH": 0.5486, "hfopenllm_v2/MATH Level 5": 0.1843, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.423, "hfopenllm_v2/MMLU-PRO": 0.3802 } }, { "id": "djuna/L3.1-Purosani-2-8B", "name": "L3.1-Purosani-2-8B", "developer": "djuna", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4988, "hfopenllm_v2/BBH": 0.5182, "hfopenllm_v2/MATH Level 5": 0.1171, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.3816, "hfopenllm_v2/MMLU-PRO": 0.3752 } }, { "id": "djuna/L3.1-Suze-Vume-calc", "name": "L3.1-Suze-Vume-calc", "developer": "djuna", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7297, "hfopenllm_v2/BBH": 0.5164, "hfopenllm_v2/MATH Level 5": 0.114, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.3843, "hfopenllm_v2/MMLU-PRO": 0.3515 } }, { "id": "djuna/MN-Chinofun", "name": "MN-Chinofun", "developer": "djuna", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.611, "hfopenllm_v2/BBH": 0.4953, "hfopenllm_v2/MATH Level 5": 0.1307, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4084, "hfopenllm_v2/MMLU-PRO": 0.3603 } }, { "id": "djuna/MN-Chinofun-12B-2", "name": "MN-Chinofun-12B-2", "developer": "djuna", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6171, "hfopenllm_v2/BBH": 0.5037, "hfopenllm_v2/MATH Level 5": 0.1307, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4268, "hfopenllm_v2/MMLU-PRO": 0.3615 } }, { "id": "djuna/MN-Chinofun-12B-3", "name": "MN-Chinofun-12B-3", "developer": "djuna", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3053, "hfopenllm_v2/BBH": 0.5348, "hfopenllm_v2/MATH Level 5": 0.1005, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.4198, "hfopenllm_v2/MMLU-PRO": 0.3026 } }, { "id": "djuna/MN-Chinofun-12B-4", "name": "MN-Chinofun-12B-4", "developer": "djuna", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5404, "hfopenllm_v2/BBH": 0.5348, "hfopenllm_v2/MATH Level 5": 0.1118, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4307, "hfopenllm_v2/MMLU-PRO": 0.3497 } }, { "id": "djuna/Q2.5-Partron-7B", "name": "Q2.5-Partron-7B", "developer": "djuna", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7321, "hfopenllm_v2/BBH": 0.5418, "hfopenllm_v2/MATH Level 5": 0.4826, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4165, "hfopenllm_v2/MMLU-PRO": 0.4283 } }, { "id": "djuna/Q2.5-Veltha-14B", "name": "Q2.5-Veltha-14B", "developer": "djuna", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8292, "hfopenllm_v2/BBH": 0.6484, "hfopenllm_v2/MATH Level 5": 0.4789, "hfopenllm_v2/GPQA": 0.3591, "hfopenllm_v2/MUSR": 0.4194, "hfopenllm_v2/MMLU-PRO": 0.5298 } }, { "id": "djuna/Q2.5-Veltha-14B-0.5", "name": "Q2.5-Veltha-14B-0.5", "developer": "djuna", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7796, "hfopenllm_v2/BBH": 0.6523, "hfopenllm_v2/MATH Level 5": 0.4373, "hfopenllm_v2/GPQA": 0.3683, "hfopenllm_v2/MUSR": 0.4339, "hfopenllm_v2/MMLU-PRO": 0.5295 } }, { "id": "dnhkng/RYS-Llama-3-8B-Instruct", "name": "RYS-Llama-3-8B-Instruct", "developer": "dnhkng", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6958, "hfopenllm_v2/BBH": 0.4809, "hfopenllm_v2/MATH Level 5": 0.0687, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3383, "hfopenllm_v2/MMLU-PRO": 0.3557 } }, { "id": "dnhkng/RYS-Llama-3-Huge-Instruct", "name": "RYS-Llama-3-Huge-Instruct", "developer": "dnhkng", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7686, "hfopenllm_v2/BBH": 0.6481, "hfopenllm_v2/MATH Level 5": 0.2289, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.4208, "hfopenllm_v2/MMLU-PRO": 0.511 } }, { "id": "dnhkng/RYS-Llama-3-Large-Instruct", "name": "RYS-Llama-3-Large-Instruct", "developer": "dnhkng", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8051, "hfopenllm_v2/BBH": 0.6525, "hfopenllm_v2/MATH Level 5": 0.2304, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.418, "hfopenllm_v2/MMLU-PRO": 0.5137 } }, { "id": "dnhkng/RYS-Llama-3.1-8B-Instruct", "name": "RYS-Llama-3.1-8B-Instruct", "developer": "dnhkng", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7685, "hfopenllm_v2/BBH": 0.5164, "hfopenllm_v2/MATH Level 5": 0.1329, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3681, "hfopenllm_v2/MMLU-PRO": 0.3639 } }, { "id": "dnhkng/RYS-Llama3.1-Large", "name": "RYS-Llama3.1-Large", "developer": "dnhkng", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8492, "hfopenllm_v2/BBH": 0.6899, "hfopenllm_v2/MATH Level 5": 0.3505, "hfopenllm_v2/GPQA": 0.3742, "hfopenllm_v2/MUSR": 0.4554, "hfopenllm_v2/MMLU-PRO": 0.5249 } }, { "id": "dnhkng/RYS-Medium", "name": "RYS-Medium", "developer": "dnhkng", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4406, "hfopenllm_v2/BBH": 0.6285, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4069, "hfopenllm_v2/MMLU-PRO": 0.4326 } }, { "id": "dnhkng/RYS-Phi-3-medium-4k-instruct", "name": "RYS-Phi-3-medium-4k-instruct", "developer": "dnhkng", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4391, "hfopenllm_v2/BBH": 0.6226, "hfopenllm_v2/MATH Level 5": 0.1609, "hfopenllm_v2/GPQA": 0.3549, "hfopenllm_v2/MUSR": 0.4253, "hfopenllm_v2/MMLU-PRO": 0.4846 } }, { "id": "dnhkng/RYS-XLarge", "name": "RYS-XLarge", "developer": "dnhkng", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7996, "hfopenllm_v2/BBH": 0.705, "hfopenllm_v2/MATH Level 5": 0.4252, "hfopenllm_v2/GPQA": 0.3842, "hfopenllm_v2/MUSR": 0.497, "hfopenllm_v2/MMLU-PRO": 0.5428 } }, { "id": "dnhkng/RYS-XLarge-base", "name": "RYS-XLarge-base", "developer": "dnhkng", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.791, "hfopenllm_v2/BBH": 0.7047, "hfopenllm_v2/MATH Level 5": 0.3792, "hfopenllm_v2/GPQA": 0.3792, "hfopenllm_v2/MUSR": 0.4903, "hfopenllm_v2/MMLU-PRO": 0.5431 } }, { "id": "dnhkng/RYS-XLarge2", "name": "RYS-XLarge2", "developer": "dnhkng", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4902, "hfopenllm_v2/BBH": 0.6574, "hfopenllm_v2/MATH Level 5": 0.2749, "hfopenllm_v2/GPQA": 0.3742, "hfopenllm_v2/MUSR": 0.4508, "hfopenllm_v2/MMLU-PRO": 0.5378 } }, { "id": "Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO", "name": "DeepSeek-R1-Distill-Qwen-7B-GRPO", "developer": "Dongwei", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4038, "hfopenllm_v2/BBH": 0.3443, "hfopenllm_v2/MATH Level 5": 0.1956, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3663, "hfopenllm_v2/MMLU-PRO": 0.2322 } }, { "id": "DoppelReflEx/L3-8B-R1-WolfCore", "name": "L3-8B-R1-WolfCore", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3775, "hfopenllm_v2/BBH": 0.5318, "hfopenllm_v2/MATH Level 5": 0.1631, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4277, "hfopenllm_v2/MMLU-PRO": 0.3717 } }, { "id": "DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test", "name": "L3-8B-R1-WolfCore-V1.5-test", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3955, "hfopenllm_v2/BBH": 0.5315, "hfopenllm_v2/MATH Level 5": 0.1231, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.3841, "hfopenllm_v2/MMLU-PRO": 0.3728 } }, { "id": "DoppelReflEx/L3-8B-WolfCore", "name": "L3-8B-WolfCore", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4022, "hfopenllm_v2/BBH": 0.5182, "hfopenllm_v2/MATH Level 5": 0.0982, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.3973, "hfopenllm_v2/MMLU-PRO": 0.3705 } }, { "id": "DoppelReflEx/MiniusLight-24B", "name": "MiniusLight-24B", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2577, "hfopenllm_v2/BBH": 0.6256, "hfopenllm_v2/MATH Level 5": 0.1261, "hfopenllm_v2/GPQA": 0.3582, "hfopenllm_v2/MUSR": 0.4319, "hfopenllm_v2/MMLU-PRO": 0.5091 } }, { "id": "DoppelReflEx/MiniusLight-24B-test", "name": "MiniusLight-24B-test", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0394, "hfopenllm_v2/BBH": 0.6334, "hfopenllm_v2/MATH Level 5": 0.0257, "hfopenllm_v2/GPQA": 0.3683, "hfopenllm_v2/MUSR": 0.4093, "hfopenllm_v2/MMLU-PRO": 0.5182 } }, { "id": "DoppelReflEx/MiniusLight-24B-v1b-test", "name": "MiniusLight-24B-v1b-test", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3791, "hfopenllm_v2/BBH": 0.6617, "hfopenllm_v2/MATH Level 5": 0.2394, "hfopenllm_v2/GPQA": 0.3792, "hfopenllm_v2/MUSR": 0.4557, "hfopenllm_v2/MMLU-PRO": 0.5365 } }, { "id": "DoppelReflEx/MiniusLight-24B-v1c-test", "name": "MiniusLight-24B-v1c-test", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3786, "hfopenllm_v2/BBH": 0.6753, "hfopenllm_v2/MATH Level 5": 0.2968, "hfopenllm_v2/GPQA": 0.3951, "hfopenllm_v2/MUSR": 0.4634, "hfopenllm_v2/MMLU-PRO": 0.5487 } }, { "id": "DoppelReflEx/MiniusLight-24B-v1d-test", "name": "MiniusLight-24B-v1d-test", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4032, "hfopenllm_v2/BBH": 0.6712, "hfopenllm_v2/MATH Level 5": 0.2946, "hfopenllm_v2/GPQA": 0.3951, "hfopenllm_v2/MUSR": 0.4621, "hfopenllm_v2/MMLU-PRO": 0.5489 } }, { "id": "DoppelReflEx/MN-12B-FoxFrame-test", "name": "MN-12B-FoxFrame-test", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4222, "hfopenllm_v2/BBH": 0.5456, "hfopenllm_v2/MATH Level 5": 0.1397, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4254, "hfopenllm_v2/MMLU-PRO": 0.3503 } }, { "id": "DoppelReflEx/MN-12B-FoxFrame2-test", "name": "MN-12B-FoxFrame2-test", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4319, "hfopenllm_v2/BBH": 0.5485, "hfopenllm_v2/MATH Level 5": 0.1405, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4252, "hfopenllm_v2/MMLU-PRO": 0.3569 } }, { "id": "DoppelReflEx/MN-12B-FoxFrame3-test", "name": "MN-12B-FoxFrame3-test", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4323, "hfopenllm_v2/BBH": 0.5395, "hfopenllm_v2/MATH Level 5": 0.1322, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4598, "hfopenllm_v2/MMLU-PRO": 0.3529 } }, { "id": "DoppelReflEx/MN-12B-Kakigori", "name": "MN-12B-Kakigori", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3593, "hfopenllm_v2/BBH": 0.5416, "hfopenllm_v2/MATH Level 5": 0.1193, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4052, "hfopenllm_v2/MMLU-PRO": 0.3581 } }, { "id": "DoppelReflEx/MN-12B-LilithFrame", "name": "MN-12B-LilithFrame", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.436, "hfopenllm_v2/BBH": 0.4956, "hfopenllm_v2/MATH Level 5": 0.0589, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.3843, "hfopenllm_v2/MMLU-PRO": 0.3237 } }, { "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-2", "name": "MN-12B-LilithFrame-Experiment-2", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4299, "hfopenllm_v2/BBH": 0.4983, "hfopenllm_v2/MATH Level 5": 0.1073, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.3804, "hfopenllm_v2/MMLU-PRO": 0.3276 } }, { "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-3", "name": "MN-12B-LilithFrame-Experiment-3", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4128, "hfopenllm_v2/BBH": 0.5468, "hfopenllm_v2/MATH Level 5": 0.1344, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4039, "hfopenllm_v2/MMLU-PRO": 0.3604 } }, { "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-4", "name": "MN-12B-LilithFrame-Experiment-4", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3981, "hfopenllm_v2/BBH": 0.5534, "hfopenllm_v2/MATH Level 5": 0.1224, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4371, "hfopenllm_v2/MMLU-PRO": 0.3649 } }, { "id": "DoppelReflEx/MN-12B-Mimicore-GreenSnake", "name": "MN-12B-Mimicore-GreenSnake", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.478, "hfopenllm_v2/BBH": 0.5481, "hfopenllm_v2/MATH Level 5": 0.139, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4306, "hfopenllm_v2/MMLU-PRO": 0.3651 } }, { "id": "DoppelReflEx/MN-12B-Mimicore-Nocturne", "name": "MN-12B-Mimicore-Nocturne", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3957, "hfopenllm_v2/BBH": 0.5703, "hfopenllm_v2/MATH Level 5": 0.1057, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4569, "hfopenllm_v2/MMLU-PRO": 0.3634 } }, { "id": "DoppelReflEx/MN-12B-Mimicore-Orochi", "name": "MN-12B-Mimicore-Orochi", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.462, "hfopenllm_v2/BBH": 0.5498, "hfopenllm_v2/MATH Level 5": 0.136, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4546, "hfopenllm_v2/MMLU-PRO": 0.3447 } }, { "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment", "name": "MN-12B-Mimicore-Orochi-v2-Experiment", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2842, "hfopenllm_v2/BBH": 0.5323, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4574, "hfopenllm_v2/MMLU-PRO": 0.3423 } }, { "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment", "name": "MN-12B-Mimicore-Orochi-v3-Experiment", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4102, "hfopenllm_v2/BBH": 0.5438, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4438, "hfopenllm_v2/MMLU-PRO": 0.3396 } }, { "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment", "name": "MN-12B-Mimicore-Orochi-v4-Experiment", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4321, "hfopenllm_v2/BBH": 0.5463, "hfopenllm_v2/MATH Level 5": 0.1208, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4449, "hfopenllm_v2/MMLU-PRO": 0.352 } }, { "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake", "name": "MN-12B-Mimicore-WhiteSnake", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4438, "hfopenllm_v2/BBH": 0.5605, "hfopenllm_v2/MATH Level 5": 0.1314, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.4569, "hfopenllm_v2/MMLU-PRO": 0.3658 } }, { "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1", "name": "MN-12B-Mimicore-WhiteSnake-v2-Experiment-1", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3909, "hfopenllm_v2/BBH": 0.4866, "hfopenllm_v2/MATH Level 5": 0.0785, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.379, "hfopenllm_v2/MMLU-PRO": 0.3114 } }, { "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2", "name": "MN-12B-Mimicore-WhiteSnake-v2-Experiment-2", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3124, "hfopenllm_v2/BBH": 0.5126, "hfopenllm_v2/MATH Level 5": 0.1125, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.3975, "hfopenllm_v2/MMLU-PRO": 0.3314 } }, { "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3", "name": "MN-12B-Mimicore-WhiteSnake-v2-Experiment-3", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4302, "hfopenllm_v2/BBH": 0.4812, "hfopenllm_v2/MATH Level 5": 0.0899, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.3684, "hfopenllm_v2/MMLU-PRO": 0.3198 } }, { "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4", "name": "MN-12B-Mimicore-WhiteSnake-v2-Experiment-4", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4241, "hfopenllm_v2/BBH": 0.5185, "hfopenllm_v2/MATH Level 5": 0.114, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4002, "hfopenllm_v2/MMLU-PRO": 0.3342 } }, { "id": "DoppelReflEx/MN-12B-Unleashed-Twilight", "name": "MN-12B-Unleashed-Twilight", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3505, "hfopenllm_v2/BBH": 0.5521, "hfopenllm_v2/MATH Level 5": 0.0959, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4384, "hfopenllm_v2/MMLU-PRO": 0.3678 } }, { "id": "DoppelReflEx/MN-12B-WolFrame", "name": "MN-12B-WolFrame", "developer": "DoppelReflEx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4397, "hfopenllm_v2/BBH": 0.5117, "hfopenllm_v2/MATH Level 5": 0.1314, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4015, "hfopenllm_v2/MMLU-PRO": 0.3393 } }, { "id": "DreadPoor/Again-8B-Model_Stock", "name": "Again-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6724, "hfopenllm_v2/BBH": 0.531, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.3987, "hfopenllm_v2/MMLU-PRO": 0.3518 } }, { "id": "DreadPoor/Alita99-8B-LINEAR", "name": "Alita99-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.719, "hfopenllm_v2/BBH": 0.5442, "hfopenllm_v2/MATH Level 5": 0.1647, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4266, "hfopenllm_v2/MMLU-PRO": 0.3809 } }, { "id": "DreadPoor/AnotherTest", "name": "AnotherTest", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4701, "hfopenllm_v2/BBH": 0.4683, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4213, "hfopenllm_v2/MMLU-PRO": 0.2875 } }, { "id": "DreadPoor/Aspire-8B-model_stock", "name": "Aspire-8B-model_stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7141, "hfopenllm_v2/BBH": 0.5278, "hfopenllm_v2/MATH Level 5": 0.1495, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4212, "hfopenllm_v2/MMLU-PRO": 0.3763 } }, { "id": "DreadPoor/Aspire_1.3-8B_model-stock", "name": "Aspire_1.3-8B_model-stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7062, "hfopenllm_v2/BBH": 0.5302, "hfopenllm_v2/MATH Level 5": 0.1692, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4105, "hfopenllm_v2/MMLU-PRO": 0.3716 } }, { "id": "DreadPoor/Aspire_V2-8B-Model_Stock", "name": "Aspire_V2-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7371, "hfopenllm_v2/BBH": 0.533, "hfopenllm_v2/MATH Level 5": 0.176, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.3894, "hfopenllm_v2/MMLU-PRO": 0.3697 } }, { "id": "DreadPoor/Aspire_V2.1-8B-Model_Stock", "name": "Aspire_V2.1-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7238, "hfopenllm_v2/BBH": 0.5236, "hfopenllm_v2/MATH Level 5": 0.1767, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4136, "hfopenllm_v2/MMLU-PRO": 0.3801 } }, { "id": "DreadPoor/Aspire_V2_ALT-8B-Model_Stock", "name": "Aspire_V2_ALT-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7381, "hfopenllm_v2/BBH": 0.5266, "hfopenllm_v2/MATH Level 5": 0.173, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.3975, "hfopenllm_v2/MMLU-PRO": 0.3727 } }, { "id": "DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock", "name": "Aspire_V2_ALT_ROW-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7381, "hfopenllm_v2/BBH": 0.5266, "hfopenllm_v2/MATH Level 5": 0.173, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.3975, "hfopenllm_v2/MMLU-PRO": 0.3727 } }, { "id": "DreadPoor/Aspire_V3-8B-Model_Stock", "name": "Aspire_V3-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5119, "hfopenllm_v2/BBH": 0.5268, "hfopenllm_v2/MATH Level 5": 0.1858, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4015, "hfopenllm_v2/MMLU-PRO": 0.3642 } }, { "id": "DreadPoor/Aspire_V4-8B-Model_Stock", "name": "Aspire_V4-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7694, "hfopenllm_v2/BBH": 0.5314, "hfopenllm_v2/MATH Level 5": 0.1926, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.3867, "hfopenllm_v2/MMLU-PRO": 0.3708 } }, { "id": "DreadPoor/Aspire_V4_ALT-8B-Model_Stock", "name": "Aspire_V4_ALT-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7366, "hfopenllm_v2/BBH": 0.5268, "hfopenllm_v2/MATH Level 5": 0.1813, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.392, "hfopenllm_v2/MMLU-PRO": 0.3682 } }, { "id": "DreadPoor/Asymmetric_Linearity-8B-Model_Stock", "name": "Asymmetric_Linearity-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7174, "hfopenllm_v2/BBH": 0.5465, "hfopenllm_v2/MATH Level 5": 0.1647, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4199, "hfopenllm_v2/MMLU-PRO": 0.3844 } }, { "id": "DreadPoor/Aurora_faustus-8B-LINEAR", "name": "Aurora_faustus-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7281, "hfopenllm_v2/BBH": 0.5516, "hfopenllm_v2/MATH Level 5": 0.1707, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4146, "hfopenllm_v2/MMLU-PRO": 0.3842 } }, { "id": "DreadPoor/Aurora_faustus-8B-LORABLATED", "name": "Aurora_faustus-8B-LORABLATED", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7527, "hfopenllm_v2/BBH": 0.5392, "hfopenllm_v2/MATH Level 5": 0.1488, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4239, "hfopenllm_v2/MMLU-PRO": 0.3673 } }, { "id": "DreadPoor/Aurora_faustus-8B-LORABLATED_ALT", "name": "Aurora_faustus-8B-LORABLATED_ALT", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7378, "hfopenllm_v2/BBH": 0.5388, "hfopenllm_v2/MATH Level 5": 0.1586, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4225, "hfopenllm_v2/MMLU-PRO": 0.3694 } }, { "id": "DreadPoor/Autumn_Dawn-8B-LINEAR", "name": "Autumn_Dawn-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7293, "hfopenllm_v2/BBH": 0.5459, "hfopenllm_v2/MATH Level 5": 0.1858, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4186, "hfopenllm_v2/MMLU-PRO": 0.3968 } }, { "id": "DreadPoor/BaeZel-8B-LINEAR", "name": "BaeZel-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7378, "hfopenllm_v2/BBH": 0.5464, "hfopenllm_v2/MATH Level 5": 0.1813, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4227, "hfopenllm_v2/MMLU-PRO": 0.3861 } }, { "id": "DreadPoor/BaeZel-8B-Model_Stock", "name": "BaeZel-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7713, "hfopenllm_v2/BBH": 0.5408, "hfopenllm_v2/MATH Level 5": 0.1639, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4199, "hfopenllm_v2/MMLU-PRO": 0.388 } }, { "id": "DreadPoor/BaeZel_V2-8B-Model_Stock", "name": "BaeZel_V2-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7677, "hfopenllm_v2/BBH": 0.5374, "hfopenllm_v2/MATH Level 5": 0.1798, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4186, "hfopenllm_v2/MMLU-PRO": 0.3947 } }, { "id": "DreadPoor/BaeZel_V2_ALT-8B-Model_Stock", "name": "BaeZel_V2_ALT-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7677, "hfopenllm_v2/BBH": 0.5374, "hfopenllm_v2/MATH Level 5": 0.1798, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4186, "hfopenllm_v2/MMLU-PRO": 0.3947 } }, { "id": "DreadPoor/BaeZel_V3-8B-Model_Stock", "name": "BaeZel_V3-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7832, "hfopenllm_v2/BBH": 0.5392, "hfopenllm_v2/MATH Level 5": 0.1896, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4174, "hfopenllm_v2/MMLU-PRO": 0.3888 } }, { "id": "DreadPoor/Blunt_Edge-8B-SLERP", "name": "Blunt_Edge-8B-SLERP", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7497, "hfopenllm_v2/BBH": 0.5389, "hfopenllm_v2/MATH Level 5": 0.1858, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4174, "hfopenllm_v2/MMLU-PRO": 0.3767 } }, { "id": "DreadPoor/BulkUp", "name": "BulkUp", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1778, "hfopenllm_v2/BBH": 0.287, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2475, "hfopenllm_v2/MUSR": 0.3447, "hfopenllm_v2/MMLU-PRO": 0.111 } }, { "id": "DreadPoor/Cadence-8B-LINEAR", "name": "Cadence-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7682, "hfopenllm_v2/BBH": 0.5433, "hfopenllm_v2/MATH Level 5": 0.1677, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4173, "hfopenllm_v2/MMLU-PRO": 0.3803 } }, { "id": "DreadPoor/Caelid-8B-Model_Stock", "name": "Caelid-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7247, "hfopenllm_v2/BBH": 0.546, "hfopenllm_v2/MATH Level 5": 0.1511, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4001, "hfopenllm_v2/MMLU-PRO": 0.3816 } }, { "id": "DreadPoor/Casuar-9B-Model_Stock", "name": "Casuar-9B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7765, "hfopenllm_v2/BBH": 0.6107, "hfopenllm_v2/MATH Level 5": 0.213, "hfopenllm_v2/GPQA": 0.3448, "hfopenllm_v2/MUSR": 0.4165, "hfopenllm_v2/MMLU-PRO": 0.4156 } }, { "id": "DreadPoor/Condensed_Milk-8B-Model_Stock", "name": "Condensed_Milk-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7536, "hfopenllm_v2/BBH": 0.5435, "hfopenllm_v2/MATH Level 5": 0.1745, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.416, "hfopenllm_v2/MMLU-PRO": 0.3876 } }, { "id": "DreadPoor/CoolerCoder-8B-LINEAR", "name": "CoolerCoder-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4519, "hfopenllm_v2/BBH": 0.4762, "hfopenllm_v2/MATH Level 5": 0.0793, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3964, "hfopenllm_v2/MMLU-PRO": 0.3159 } }, { "id": "DreadPoor/Damasteel-8B-LINEAR", "name": "Damasteel-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7384, "hfopenllm_v2/BBH": 0.5388, "hfopenllm_v2/MATH Level 5": 0.1669, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4212, "hfopenllm_v2/MMLU-PRO": 0.3779 } }, { "id": "DreadPoor/Dearly_Beloved-8B-TIES", "name": "Dearly_Beloved-8B-TIES", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8267, "hfopenllm_v2/BBH": 0.405, "hfopenllm_v2/MATH Level 5": 0.2115, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4175, "hfopenllm_v2/MMLU-PRO": 0.2827 } }, { "id": "DreadPoor/Decayed-8B-LINEAR", "name": "Decayed-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7676, "hfopenllm_v2/BBH": 0.5417, "hfopenllm_v2/MATH Level 5": 0.1715, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4186, "hfopenllm_v2/MMLU-PRO": 0.3763 } }, { "id": "DreadPoor/Derivative-8B-Model_Stock", "name": "Derivative-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7667, "hfopenllm_v2/BBH": 0.5395, "hfopenllm_v2/MATH Level 5": 0.179, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.42, "hfopenllm_v2/MMLU-PRO": 0.3811 } }, { "id": "DreadPoor/Derivative_V2-8B-Model_Stock", "name": "Derivative_V2-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7537, "hfopenllm_v2/BBH": 0.5393, "hfopenllm_v2/MATH Level 5": 0.1798, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4123, "hfopenllm_v2/MMLU-PRO": 0.3856 } }, { "id": "DreadPoor/Derivative_V2_ALT-8B-Model_Stock", "name": "Derivative_V2_ALT-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.772, "hfopenllm_v2/BBH": 0.5365, "hfopenllm_v2/MATH Level 5": 0.1881, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4135, "hfopenllm_v2/MMLU-PRO": 0.3882 } }, { "id": "DreadPoor/Derivative_V3-8B-Model_Stock", "name": "Derivative_V3-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6964, "hfopenllm_v2/BBH": 0.5243, "hfopenllm_v2/MATH Level 5": 0.1465, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.415, "hfopenllm_v2/MMLU-PRO": 0.3502 } }, { "id": "DreadPoor/Elusive_Dragon_Heart-8B-LINEAR", "name": "Elusive_Dragon_Heart-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7131, "hfopenllm_v2/BBH": 0.5456, "hfopenllm_v2/MATH Level 5": 0.148, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4146, "hfopenllm_v2/MMLU-PRO": 0.3814 } }, { "id": "DreadPoor/Emu_Eggs-9B-Model_Stock", "name": "Emu_Eggs-9B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7607, "hfopenllm_v2/BBH": 0.6052, "hfopenllm_v2/MATH Level 5": 0.21, "hfopenllm_v2/GPQA": 0.3331, "hfopenllm_v2/MUSR": 0.4071, "hfopenllm_v2/MMLU-PRO": 0.4227 } }, { "id": "DreadPoor/Eunoia_Vespera-8B-LINEAR", "name": "Eunoia_Vespera-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7235, "hfopenllm_v2/BBH": 0.5399, "hfopenllm_v2/MATH Level 5": 0.1541, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4185, "hfopenllm_v2/MMLU-PRO": 0.3839 } }, { "id": "DreadPoor/felix_dies-mistral-7B-model_stock", "name": "felix_dies-mistral-7B-model_stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3008, "hfopenllm_v2/BBH": 0.4901, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4518, "hfopenllm_v2/MMLU-PRO": 0.3109 } }, { "id": "DreadPoor/Fu_sion_HA-8B-SLERP", "name": "Fu_sion_HA-8B-SLERP", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7609, "hfopenllm_v2/BBH": 0.5373, "hfopenllm_v2/MATH Level 5": 0.1752, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.416, "hfopenllm_v2/MMLU-PRO": 0.3825 } }, { "id": "DreadPoor/H_the_eighth-8B-LINEAR", "name": "H_the_eighth-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7469, "hfopenllm_v2/BBH": 0.5384, "hfopenllm_v2/MATH Level 5": 0.1775, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4173, "hfopenllm_v2/MMLU-PRO": 0.3824 } }, { "id": "DreadPoor/hakuchido-8B-MODEL_STOCK", "name": "hakuchido-8B-MODEL_STOCK", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7375, "hfopenllm_v2/BBH": 0.5398, "hfopenllm_v2/MATH Level 5": 0.1949, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4175, "hfopenllm_v2/MMLU-PRO": 0.3782 } }, { "id": "DreadPoor/Happy_New_Year-8B-Model_Stock", "name": "Happy_New_Year-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7616, "hfopenllm_v2/BBH": 0.5368, "hfopenllm_v2/MATH Level 5": 0.1594, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4186, "hfopenllm_v2/MMLU-PRO": 0.3879 } }, { "id": "DreadPoor/Heart_Stolen-8B-Model_Stock", "name": "Heart_Stolen-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7245, "hfopenllm_v2/BBH": 0.5395, "hfopenllm_v2/MATH Level 5": 0.1722, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4162, "hfopenllm_v2/MMLU-PRO": 0.3794 } }, { "id": "DreadPoor/Heart_Stolen-ALT-8B-Model_Stock", "name": "Heart_Stolen-ALT-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7184, "hfopenllm_v2/BBH": 0.5263, "hfopenllm_v2/MATH Level 5": 0.1563, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4055, "hfopenllm_v2/MMLU-PRO": 0.3772 } }, { "id": "DreadPoor/Here_We_Go_Again-8B-SLERP", "name": "Here_We_Go_Again-8B-SLERP", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7442, "hfopenllm_v2/BBH": 0.546, "hfopenllm_v2/MATH Level 5": 0.173, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4187, "hfopenllm_v2/MMLU-PRO": 0.3873 } }, { "id": "DreadPoor/HOT_STINKING_GARBAGE", "name": "HOT_STINKING_GARBAGE", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5754, "hfopenllm_v2/BBH": 0.4884, "hfopenllm_v2/MATH Level 5": 0.0672, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.425, "hfopenllm_v2/MMLU-PRO": 0.3017 } }, { "id": "DreadPoor/Howdy-8B-LINEAR", "name": "Howdy-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7378, "hfopenllm_v2/BBH": 0.5384, "hfopenllm_v2/MATH Level 5": 0.1775, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4121, "hfopenllm_v2/MMLU-PRO": 0.3807 } }, { "id": "DreadPoor/ichor-8B-Model_Stock", "name": "ichor-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5386, "hfopenllm_v2/BBH": 0.5084, "hfopenllm_v2/MATH Level 5": 0.1088, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4212, "hfopenllm_v2/MMLU-PRO": 0.3151 } }, { "id": "DreadPoor/ichor_1.1-8B-Model_Stock", "name": "ichor_1.1-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8096, "hfopenllm_v2/BBH": 0.5281, "hfopenllm_v2/MATH Level 5": 0.1775, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4068, "hfopenllm_v2/MMLU-PRO": 0.3856 } }, { "id": "DreadPoor/Incidental-8B-Model_Stock", "name": "Incidental-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7482, "hfopenllm_v2/BBH": 0.5452, "hfopenllm_v2/MATH Level 5": 0.1616, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.424, "hfopenllm_v2/MMLU-PRO": 0.3873 } }, { "id": "DreadPoor/inexpertus-8B-Model_Stock", "name": "inexpertus-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7795, "hfopenllm_v2/BBH": 0.528, "hfopenllm_v2/MATH Level 5": 0.1707, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4118, "hfopenllm_v2/MMLU-PRO": 0.3791 } }, { "id": "DreadPoor/inexpertus_1.1-8B-LINEAR", "name": "inexpertus_1.1-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7527, "hfopenllm_v2/BBH": 0.5525, "hfopenllm_v2/MATH Level 5": 0.173, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4173, "hfopenllm_v2/MMLU-PRO": 0.3827 } }, { "id": "DreadPoor/inexpertus_1.2-8B-LINEAR", "name": "inexpertus_1.2-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7348, "hfopenllm_v2/BBH": 0.5523, "hfopenllm_v2/MATH Level 5": 0.1586, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4133, "hfopenllm_v2/MMLU-PRO": 0.3788 } }, { "id": "DreadPoor/Irina-8B-model_stock", "name": "Irina-8B-model_stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6799, "hfopenllm_v2/BBH": 0.5237, "hfopenllm_v2/MATH Level 5": 0.102, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4003, "hfopenllm_v2/MMLU-PRO": 0.3574 } }, { "id": "DreadPoor/Kindling-8B-Model_Stock", "name": "Kindling-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7308, "hfopenllm_v2/BBH": 0.5492, "hfopenllm_v2/MATH Level 5": 0.1752, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.4068, "hfopenllm_v2/MMLU-PRO": 0.383 } }, { "id": "DreadPoor/L3.1-BaeZel-8B-Della", "name": "L3.1-BaeZel-8B-Della", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.518, "hfopenllm_v2/BBH": 0.5448, "hfopenllm_v2/MATH Level 5": 0.1745, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.42, "hfopenllm_v2/MMLU-PRO": 0.3902 } }, { "id": "DreadPoor/Laughing_Stock-8B-Model_Stock", "name": "Laughing_Stock-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.719, "hfopenllm_v2/BBH": 0.5449, "hfopenllm_v2/MATH Level 5": 0.1579, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4146, "hfopenllm_v2/MMLU-PRO": 0.3764 } }, { "id": "DreadPoor/Lava_Lamp-8B-SLERP", "name": "Lava_Lamp-8B-SLERP", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7381, "hfopenllm_v2/BBH": 0.5368, "hfopenllm_v2/MATH Level 5": 0.1737, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4187, "hfopenllm_v2/MMLU-PRO": 0.375 } }, { "id": "DreadPoor/LemonP-8B-Model_Stock", "name": "LemonP-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7676, "hfopenllm_v2/BBH": 0.5439, "hfopenllm_v2/MATH Level 5": 0.1767, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4081, "hfopenllm_v2/MMLU-PRO": 0.4004 } }, { "id": "DreadPoor/Lydia_of_Whiterun-8B-LINEAR", "name": "Lydia_of_Whiterun-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7603, "hfopenllm_v2/BBH": 0.538, "hfopenllm_v2/MATH Level 5": 0.1767, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4251, "hfopenllm_v2/MMLU-PRO": 0.3801 } }, { "id": "DreadPoor/Matryoshka-8B-LINEAR", "name": "Matryoshka-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7263, "hfopenllm_v2/BBH": 0.5444, "hfopenllm_v2/MATH Level 5": 0.1752, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4252, "hfopenllm_v2/MMLU-PRO": 0.3866 } }, { "id": "DreadPoor/Mercury_In_Retrograde-8b-Model-Stock", "name": "Mercury_In_Retrograde-8b-Model-Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7296, "hfopenllm_v2/BBH": 0.5391, "hfopenllm_v2/MATH Level 5": 0.1647, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4199, "hfopenllm_v2/MMLU-PRO": 0.3829 } }, { "id": "DreadPoor/mergekit-nuslerp-nqzkedi", "name": "mergekit-nuslerp-nqzkedi", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7765, "hfopenllm_v2/BBH": 0.5362, "hfopenllm_v2/MATH Level 5": 0.1881, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4225, "hfopenllm_v2/MMLU-PRO": 0.3919 } }, { "id": "DreadPoor/Minthy-8B-Model_Stock", "name": "Minthy-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7658, "hfopenllm_v2/BBH": 0.5353, "hfopenllm_v2/MATH Level 5": 0.1918, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4094, "hfopenllm_v2/MMLU-PRO": 0.3993 } }, { "id": "DreadPoor/Minthy_ALT-8B-Model_Stock", "name": "Minthy_ALT-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6992, "hfopenllm_v2/BBH": 0.5375, "hfopenllm_v2/MATH Level 5": 0.176, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4225, "hfopenllm_v2/MMLU-PRO": 0.3674 } }, { "id": "DreadPoor/Minthy_V2-8B-Model_Stock", "name": "Minthy_V2-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7126, "hfopenllm_v2/BBH": 0.5491, "hfopenllm_v2/MATH Level 5": 0.1594, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4199, "hfopenllm_v2/MMLU-PRO": 0.3737 } }, { "id": "DreadPoor/Minus_Penus-8B-Model_Stock", "name": "Minus_Penus-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7311, "hfopenllm_v2/BBH": 0.5344, "hfopenllm_v2/MATH Level 5": 0.2002, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4019, "hfopenllm_v2/MMLU-PRO": 0.3752 } }, { "id": "DreadPoor/Morphing-8B-Model_Stock", "name": "Morphing-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7445, "hfopenllm_v2/BBH": 0.5397, "hfopenllm_v2/MATH Level 5": 0.1888, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4069, "hfopenllm_v2/MMLU-PRO": 0.3852 } }, { "id": "DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock", "name": "Not_Even_My_Final_Form-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7722, "hfopenllm_v2/BBH": 0.5351, "hfopenllm_v2/MATH Level 5": 0.176, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4147, "hfopenllm_v2/MMLU-PRO": 0.384 } }, { "id": "DreadPoor/Nother_One-8B-Model_Stock", "name": "Nother_One-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6863, "hfopenllm_v2/BBH": 0.5205, "hfopenllm_v2/MATH Level 5": 0.1518, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.387, "hfopenllm_v2/MMLU-PRO": 0.3595 } }, { "id": "DreadPoor/Noxis-8B-LINEAR", "name": "Noxis-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6913, "hfopenllm_v2/BBH": 0.5421, "hfopenllm_v2/MATH Level 5": 0.1979, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4231, "hfopenllm_v2/MMLU-PRO": 0.366 } }, { "id": "DreadPoor/Nullsworn-12B-LINEAR", "name": "Nullsworn-12B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4436, "hfopenllm_v2/BBH": 0.5483, "hfopenllm_v2/MATH Level 5": 0.1125, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.435, "hfopenllm_v2/MMLU-PRO": 0.3645 } }, { "id": "DreadPoor/Nwah-8B-Model_Stock", "name": "Nwah-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7716, "hfopenllm_v2/BBH": 0.5384, "hfopenllm_v2/MATH Level 5": 0.1798, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4039, "hfopenllm_v2/MMLU-PRO": 0.3807 } }, { "id": "DreadPoor/Oh_Boy-8B-LINEAR", "name": "Oh_Boy-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7503, "hfopenllm_v2/BBH": 0.5375, "hfopenllm_v2/MATH Level 5": 0.1782, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4108, "hfopenllm_v2/MMLU-PRO": 0.3849 } }, { "id": "DreadPoor/ONeil-model_stock-8B", "name": "ONeil-model_stock-8B", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6786, "hfopenllm_v2/BBH": 0.5548, "hfopenllm_v2/MATH Level 5": 0.1012, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4173, "hfopenllm_v2/MMLU-PRO": 0.3599 } }, { "id": "DreadPoor/OrangeJ-8B-Model_Stock", "name": "OrangeJ-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7841, "hfopenllm_v2/BBH": 0.5413, "hfopenllm_v2/MATH Level 5": 0.176, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4028, "hfopenllm_v2/MMLU-PRO": 0.3969 } }, { "id": "DreadPoor/Promissum_Mane-8B-LINEAR", "name": "Promissum_Mane-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.715, "hfopenllm_v2/BBH": 0.5458, "hfopenllm_v2/MATH Level 5": 0.1556, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.42, "hfopenllm_v2/MMLU-PRO": 0.3851 } }, { "id": "DreadPoor/Promissum_Mane-8B-LINEAR-lorablated", "name": "Promissum_Mane-8B-LINEAR-lorablated", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7156, "hfopenllm_v2/BBH": 0.5435, "hfopenllm_v2/MATH Level 5": 0.1533, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4198, "hfopenllm_v2/MMLU-PRO": 0.3739 } }, { "id": "DreadPoor/remember_to_breathe-8b-Model-Stock", "name": "remember_to_breathe-8b-Model-Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7104, "hfopenllm_v2/BBH": 0.5412, "hfopenllm_v2/MATH Level 5": 0.1488, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4145, "hfopenllm_v2/MMLU-PRO": 0.3761 } }, { "id": "DreadPoor/RPMash-8B-Model_Stock", "name": "RPMash-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4564, "hfopenllm_v2/BBH": 0.5169, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4054, "hfopenllm_v2/MMLU-PRO": 0.3604 } }, { "id": "DreadPoor/RPMash_V3-8B-Model_Stock", "name": "RPMash_V3-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7049, "hfopenllm_v2/BBH": 0.5217, "hfopenllm_v2/MATH Level 5": 0.1042, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.3778, "hfopenllm_v2/MMLU-PRO": 0.3614 } }, { "id": "DreadPoor/Rusted_Gold-8B-LINEAR", "name": "Rusted_Gold-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7296, "hfopenllm_v2/BBH": 0.5387, "hfopenllm_v2/MATH Level 5": 0.1934, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4178, "hfopenllm_v2/MMLU-PRO": 0.378 } }, { "id": "DreadPoor/Rusted_Platinum-8B-LINEAR", "name": "Rusted_Platinum-8B-LINEAR", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.718, "hfopenllm_v2/BBH": 0.5428, "hfopenllm_v2/MATH Level 5": 0.1722, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3967, "hfopenllm_v2/MMLU-PRO": 0.373 } }, { "id": "DreadPoor/Rusted_Platinum-8B-Model_Stock", "name": "Rusted_Platinum-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4408, "hfopenllm_v2/BBH": 0.5243, "hfopenllm_v2/MATH Level 5": 0.102, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3741, "hfopenllm_v2/MMLU-PRO": 0.3546 } }, { "id": "DreadPoor/Sellen-8B-model_stock", "name": "Sellen-8B-model_stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7113, "hfopenllm_v2/BBH": 0.5232, "hfopenllm_v2/MATH Level 5": 0.1337, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.396, "hfopenllm_v2/MMLU-PRO": 0.357 } }, { "id": "DreadPoor/Something-8B-Model_Stock", "name": "Something-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5043, "hfopenllm_v2/BBH": 0.5395, "hfopenllm_v2/MATH Level 5": 0.1798, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4187, "hfopenllm_v2/MMLU-PRO": 0.3885 } }, { "id": "DreadPoor/Spring_Dusk-8B-SCE", "name": "Spring_Dusk-8B-SCE", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6515, "hfopenllm_v2/BBH": 0.5635, "hfopenllm_v2/MATH Level 5": 0.0763, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.46, "hfopenllm_v2/MMLU-PRO": 0.3436 } }, { "id": "DreadPoor/Summer_Dawn-8B-SCE", "name": "Summer_Dawn-8B-SCE", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6642, "hfopenllm_v2/BBH": 0.5391, "hfopenllm_v2/MATH Level 5": 0.1722, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.412, "hfopenllm_v2/MMLU-PRO": 0.3753 } }, { "id": "DreadPoor/Summer_Dusk-8B-TIES", "name": "Summer_Dusk-8B-TIES", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4922, "hfopenllm_v2/BBH": 0.536, "hfopenllm_v2/MATH Level 5": 0.1805, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4267, "hfopenllm_v2/MMLU-PRO": 0.3856 } }, { "id": "DreadPoor/Summer_Rain-8B-SCE", "name": "Summer_Rain-8B-SCE", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5459, "hfopenllm_v2/BBH": 0.5846, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4477, "hfopenllm_v2/MMLU-PRO": 0.3551 } }, { "id": "DreadPoor/Summer_Rain-8B-TIES", "name": "Summer_Rain-8B-TIES", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5444, "hfopenllm_v2/BBH": 0.5846, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4477, "hfopenllm_v2/MMLU-PRO": 0.3551 } }, { "id": "DreadPoor/Sun-8B-Model_Stock", "name": "Sun-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7758, "hfopenllm_v2/BBH": 0.5264, "hfopenllm_v2/MATH Level 5": 0.21, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4098, "hfopenllm_v2/MMLU-PRO": 0.3835 } }, { "id": "DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock", "name": "Sweetened_Condensed_Milk-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7417, "hfopenllm_v2/BBH": 0.5406, "hfopenllm_v2/MATH Level 5": 0.1873, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4107, "hfopenllm_v2/MMLU-PRO": 0.3848 } }, { "id": "DreadPoor/test", "name": "test", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4937, "hfopenllm_v2/BBH": 0.5372, "hfopenllm_v2/MATH Level 5": 0.1934, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.4351, "hfopenllm_v2/MMLU-PRO": 0.3647 } }, { "id": "DreadPoor/TEST02-Ignore", "name": "TEST02-Ignore", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6119, "hfopenllm_v2/BBH": 0.5602, "hfopenllm_v2/MATH Level 5": 0.0869, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4199, "hfopenllm_v2/MMLU-PRO": 0.3468 } }, { "id": "DreadPoor/TEST03-ignore", "name": "TEST03-ignore", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6967, "hfopenllm_v2/BBH": 0.5383, "hfopenllm_v2/MATH Level 5": 0.1654, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4186, "hfopenllm_v2/MMLU-PRO": 0.3789 } }, { "id": "DreadPoor/TEST06-ignore", "name": "TEST06-ignore", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7323, "hfopenllm_v2/BBH": 0.5509, "hfopenllm_v2/MATH Level 5": 0.1178, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4225, "hfopenllm_v2/MMLU-PRO": 0.3615 } }, { "id": "DreadPoor/TEST07-ignore", "name": "TEST07-ignore", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.74, "hfopenllm_v2/BBH": 0.5561, "hfopenllm_v2/MATH Level 5": 0.1662, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4094, "hfopenllm_v2/MMLU-PRO": 0.388 } }, { "id": "DreadPoor/TEST08-ignore", "name": "TEST08-ignore", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7467, "hfopenllm_v2/BBH": 0.5454, "hfopenllm_v2/MATH Level 5": 0.182, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4081, "hfopenllm_v2/MMLU-PRO": 0.3853 } }, { "id": "DreadPoor/test_ALT", "name": "test_ALT", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4997, "hfopenllm_v2/BBH": 0.537, "hfopenllm_v2/MATH Level 5": 0.1707, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.4363, "hfopenllm_v2/MMLU-PRO": 0.3492 } }, { "id": "DreadPoor/tests_pending-do_not_use_yet", "name": "tests_pending-do_not_use_yet", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7691, "hfopenllm_v2/BBH": 0.5408, "hfopenllm_v2/MATH Level 5": 0.1979, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4005, "hfopenllm_v2/MMLU-PRO": 0.3827 } }, { "id": "DreadPoor/Trinas_Nectar-8B-model_stock", "name": "Trinas_Nectar-8B-model_stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7259, "hfopenllm_v2/BBH": 0.5256, "hfopenllm_v2/MATH Level 5": 0.1526, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4068, "hfopenllm_v2/MMLU-PRO": 0.3618 } }, { "id": "DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock", "name": "UNTESTED-VENN_1.2-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4718, "hfopenllm_v2/BBH": 0.5475, "hfopenllm_v2/MATH Level 5": 0.1541, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.4449, "hfopenllm_v2/MMLU-PRO": 0.3787 } }, { "id": "DreadPoor/VENN_1.2-8B-Model_Stock", "name": "VENN_1.2-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7226, "hfopenllm_v2/BBH": 0.5459, "hfopenllm_v2/MATH Level 5": 0.1707, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.42, "hfopenllm_v2/MMLU-PRO": 0.3721 } }, { "id": "DreadPoor/Wannabe-8B-Model_Stock", "name": "Wannabe-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7205, "hfopenllm_v2/BBH": 0.539, "hfopenllm_v2/MATH Level 5": 0.1775, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4135, "hfopenllm_v2/MMLU-PRO": 0.3831 } }, { "id": "DreadPoor/What_A_Thrill-8B-Model_Stock", "name": "What_A_Thrill-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7064, "hfopenllm_v2/BBH": 0.5311, "hfopenllm_v2/MATH Level 5": 0.182, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.408, "hfopenllm_v2/MMLU-PRO": 0.3615 } }, { "id": "DreadPoor/Winter-8B-SCE", "name": "Winter-8B-SCE", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7536, "hfopenllm_v2/BBH": 0.5262, "hfopenllm_v2/MATH Level 5": 0.1918, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4071, "hfopenllm_v2/MMLU-PRO": 0.3839 } }, { "id": "DreadPoor/Winter_Dawn-8B-TIES", "name": "Winter_Dawn-8B-TIES", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5496, "hfopenllm_v2/BBH": 0.5309, "hfopenllm_v2/MATH Level 5": 0.1858, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4279, "hfopenllm_v2/MMLU-PRO": 0.391 } }, { "id": "DreadPoor/Winter_Dusk-8B-TIES", "name": "Winter_Dusk-8B-TIES", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7153, "hfopenllm_v2/BBH": 0.4952, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.3688, "hfopenllm_v2/MMLU-PRO": 0.3478 } }, { "id": "DreadPoor/Winter_Night-8B-Model_Stock", "name": "Winter_Night-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.704, "hfopenllm_v2/BBH": 0.5185, "hfopenllm_v2/MATH Level 5": 0.1458, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.3914, "hfopenllm_v2/MMLU-PRO": 0.3666 } }, { "id": "DreadPoor/WIP-Acacia-8B-Model_Stock", "name": "WIP-Acacia-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6246, "hfopenllm_v2/BBH": 0.5195, "hfopenllm_v2/MATH Level 5": 0.1669, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4226, "hfopenllm_v2/MMLU-PRO": 0.3737 } }, { "id": "DreadPoor/WIP_Damascus-8B-TIES", "name": "WIP_Damascus-8B-TIES", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4776, "hfopenllm_v2/BBH": 0.5411, "hfopenllm_v2/MATH Level 5": 0.1654, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4119, "hfopenllm_v2/MMLU-PRO": 0.3761 } }, { "id": "DreadPoor/Yafune-8B-Model_Stock", "name": "Yafune-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7533, "hfopenllm_v2/BBH": 0.5467, "hfopenllm_v2/MATH Level 5": 0.1662, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.4173, "hfopenllm_v2/MMLU-PRO": 0.3851 } }, { "id": "DreadPoor/Yearn_V3-8B-Model_Stock", "name": "Yearn_V3-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.729, "hfopenllm_v2/BBH": 0.5322, "hfopenllm_v2/MATH Level 5": 0.1896, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.3909, "hfopenllm_v2/MMLU-PRO": 0.3802 } }, { "id": "DreadPoor/Zelus-8B-Model_Stock", "name": "Zelus-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7788, "hfopenllm_v2/BBH": 0.5307, "hfopenllm_v2/MATH Level 5": 0.1647, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4214, "hfopenllm_v2/MMLU-PRO": 0.3841 } }, { "id": "DreadPoor/Zelus_V2-8B-Model_Stock", "name": "Zelus_V2-8B-Model_Stock", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7898, "hfopenllm_v2/BBH": 0.5345, "hfopenllm_v2/MATH Level 5": 0.2054, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.3961, "hfopenllm_v2/MMLU-PRO": 0.3833 } }, { "id": "DreadPoor/ZEUS-8B-V17-Abliterated_ALT", "name": "ZEUS-8B-V17-Abliterated_ALT", "developer": "DreadPoor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5511, "hfopenllm_v2/BBH": 0.5231, "hfopenllm_v2/MATH Level 5": 0.1903, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4149, "hfopenllm_v2/MMLU-PRO": 0.389 } }, { "id": "dreamgen/WizardLM-2-7B", "name": "WizardLM-2-7B", "developer": "dreamgen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4583, "hfopenllm_v2/BBH": 0.3487, "hfopenllm_v2/MATH Level 5": 0.0332, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.3941, "hfopenllm_v2/MMLU-PRO": 0.266 } }, { "id": "DRXD1000/Atlas-7B", "name": "Atlas-7B", "developer": "DRXD1000", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3704, "hfopenllm_v2/BBH": 0.3302, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3342, "hfopenllm_v2/MMLU-PRO": 0.1401 } }, { "id": "DRXD1000/Phoenix-7B", "name": "Phoenix-7B", "developer": "DRXD1000", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.321, "hfopenllm_v2/BBH": 0.3932, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.3849, "hfopenllm_v2/MMLU-PRO": 0.2343 } }, { "id": "DUAL-GPO/zephyr-7b-ipo-0k-15k-i1", "name": "zephyr-7b-ipo-0k-15k-i1", "developer": "DUAL-GPO", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2756, "hfopenllm_v2/BBH": 0.4473, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4173, "hfopenllm_v2/MMLU-PRO": 0.313 } }, { "id": "dustinwloring1988/Reflexis-8b-chat-v1", "name": "Reflexis-8b-chat-v1", "developer": "dustinwloring1988", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3658, "hfopenllm_v2/BBH": 0.4664, "hfopenllm_v2/MATH Level 5": 0.1156, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3754, "hfopenllm_v2/MMLU-PRO": 0.3384 } }, { "id": "dustinwloring1988/Reflexis-8b-chat-v2", "name": "Reflexis-8b-chat-v2", "developer": "dustinwloring1988", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3912, "hfopenllm_v2/BBH": 0.4724, "hfopenllm_v2/MATH Level 5": 0.1163, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3526, "hfopenllm_v2/MMLU-PRO": 0.3378 } }, { "id": "dustinwloring1988/Reflexis-8b-chat-v3", "name": "Reflexis-8b-chat-v3", "developer": "dustinwloring1988", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5367, "hfopenllm_v2/BBH": 0.4658, "hfopenllm_v2/MATH Level 5": 0.1224, "hfopenllm_v2/GPQA": 0.2424, "hfopenllm_v2/MUSR": 0.3512, "hfopenllm_v2/MMLU-PRO": 0.3548 } }, { "id": "dustinwloring1988/Reflexis-8b-chat-v4", "name": "Reflexis-8b-chat-v4", "developer": "dustinwloring1988", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4698, "hfopenllm_v2/BBH": 0.4686, "hfopenllm_v2/MATH Level 5": 0.1027, "hfopenllm_v2/GPQA": 0.2341, "hfopenllm_v2/MUSR": 0.3393, "hfopenllm_v2/MMLU-PRO": 0.339 } }, { "id": "dustinwloring1988/Reflexis-8b-chat-v5", "name": "Reflexis-8b-chat-v5", "developer": "dustinwloring1988", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4238, "hfopenllm_v2/BBH": 0.4782, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3354, "hfopenllm_v2/MMLU-PRO": 0.3217 } }, { "id": "dustinwloring1988/Reflexis-8b-chat-v6", "name": "Reflexis-8b-chat-v6", "developer": "dustinwloring1988", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4939, "hfopenllm_v2/BBH": 0.481, "hfopenllm_v2/MATH Level 5": 0.1299, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3753, "hfopenllm_v2/MMLU-PRO": 0.3479 } }, { "id": "dustinwloring1988/Reflexis-8b-chat-v7", "name": "Reflexis-8b-chat-v7", "developer": "dustinwloring1988", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.398, "hfopenllm_v2/BBH": 0.481, "hfopenllm_v2/MATH Level 5": 0.1631, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3222, "hfopenllm_v2/MMLU-PRO": 0.3643 } }, { "id": "duyhv1411/Llama-3.2-1B-en-vi", "name": "Llama-3.2-1B-en-vi", "developer": "duyhv1411", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4788, "hfopenllm_v2/BBH": 0.3291, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.3197, "hfopenllm_v2/MMLU-PRO": 0.1341 } }, { "id": "duyhv1411/Llama-3.2-3B-en-vi", "name": "Llama-3.2-3B-en-vi", "developer": "duyhv1411", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4852, "hfopenllm_v2/BBH": 0.3272, "hfopenllm_v2/MATH Level 5": 0.0227, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.321, "hfopenllm_v2/MMLU-PRO": 0.1359 } }, { "id": "dwikitheduck/gemma-2-2b-id", "name": "gemma-2-2b-id", "developer": "dwikitheduck", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3879, "hfopenllm_v2/BBH": 0.3962, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4154, "hfopenllm_v2/MMLU-PRO": 0.2173 } }, { "id": "dwikitheduck/gemma-2-2b-id-inst", "name": "gemma-2-2b-id-inst", "developer": "dwikitheduck", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3879, "hfopenllm_v2/BBH": 0.3962, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4154, "hfopenllm_v2/MMLU-PRO": 0.2173 } }, { "id": "dwikitheduck/gemma-2-2b-id-instruct", "name": "gemma-2-2b-id-instruct", "developer": "dwikitheduck", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3879, "hfopenllm_v2/BBH": 0.3962, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4154, "hfopenllm_v2/MMLU-PRO": 0.2173 } }, { "id": "dwikitheduck/gen-inst-1", "name": "gen-inst-1", "developer": "dwikitheduck", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.775, "hfopenllm_v2/BBH": 0.642, "hfopenllm_v2/MATH Level 5": 0.4554, "hfopenllm_v2/GPQA": 0.3716, "hfopenllm_v2/MUSR": 0.4205, "hfopenllm_v2/MMLU-PRO": 0.5089 } }, { "id": "dwikitheduck/gen-try1", "name": "gen-try1", "developer": "dwikitheduck", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7522, "hfopenllm_v2/BBH": 0.6359, "hfopenllm_v2/MATH Level 5": 0.4101, "hfopenllm_v2/GPQA": 0.3414, "hfopenllm_v2/MUSR": 0.4416, "hfopenllm_v2/MMLU-PRO": 0.5111 } }, { "id": "dwikitheduck/gen-try1-notemp", "name": "gen-try1-notemp", "developer": "dwikitheduck", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2627, "hfopenllm_v2/BBH": 0.6263, "hfopenllm_v2/MATH Level 5": 0.318, "hfopenllm_v2/GPQA": 0.354, "hfopenllm_v2/MUSR": 0.4714, "hfopenllm_v2/MMLU-PRO": 0.521 } }, { "id": "dzakwan/dzakwan-MoE-4x7b-Beta", "name": "dzakwan-MoE-4x7b-Beta", "developer": "dzakwan", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4443, "hfopenllm_v2/BBH": 0.514, "hfopenllm_v2/MATH Level 5": 0.0778, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4267, "hfopenllm_v2/MMLU-PRO": 0.3108 } }, { "id": "DZgas/GIGABATEMAN-7B", "name": "GIGABATEMAN-7B", "developer": "DZgas", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4607, "hfopenllm_v2/BBH": 0.5032, "hfopenllm_v2/MATH Level 5": 0.0551, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4328, "hfopenllm_v2/MMLU-PRO": 0.3177 } }, { "id": "ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2", "name": "ILAB-Merging-3B-V2", "developer": "ECE-ILAB-PRYMMAL", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4029, "hfopenllm_v2/BBH": 0.5402, "hfopenllm_v2/MATH Level 5": 0.1518, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4332, "hfopenllm_v2/MMLU-PRO": 0.3861 } }, { "id": "Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16", "name": "meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16", "developer": "Edgerunners", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7147, "hfopenllm_v2/BBH": 0.498, "hfopenllm_v2/MATH Level 5": 0.0906, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3342, "hfopenllm_v2/MMLU-PRO": 0.3636 } }, { "id": "ehristoforu/coolqwen-3b-it", "name": "coolqwen-3b-it", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6473, "hfopenllm_v2/BBH": 0.4851, "hfopenllm_v2/MATH Level 5": 0.3671, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4125, "hfopenllm_v2/MMLU-PRO": 0.3601 } }, { "id": "ehristoforu/della-70b-test-v1", "name": "della-70b-test-v1", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4979, "hfopenllm_v2/BBH": 0.3029, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.4555, "hfopenllm_v2/MMLU-PRO": 0.1575 } }, { "id": "ehristoforu/Falcon3-8B-Franken-Basestruct", "name": "Falcon3-8B-Franken-Basestruct", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1715, "hfopenllm_v2/BBH": 0.5463, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.3406, "hfopenllm_v2/MUSR": 0.3555, "hfopenllm_v2/MMLU-PRO": 0.3947 } }, { "id": "ehristoforu/Falcon3-MoE-2x7B-Insruct", "name": "Falcon3-MoE-2x7B-Insruct", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7643, "hfopenllm_v2/BBH": 0.5648, "hfopenllm_v2/MATH Level 5": 0.4124, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.484, "hfopenllm_v2/MMLU-PRO": 0.4095 } }, { "id": "ehristoforu/falcon3-ultraset", "name": "falcon3-ultraset", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7135, "hfopenllm_v2/BBH": 0.5584, "hfopenllm_v2/MATH Level 5": 0.2122, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.4853, "hfopenllm_v2/MMLU-PRO": 0.3982 } }, { "id": "ehristoforu/fd-lora-merged-16x32", "name": "fd-lora-merged-16x32", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3481, "hfopenllm_v2/BBH": 0.3308, "hfopenllm_v2/MATH Level 5": 0.1707, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3514, "hfopenllm_v2/MMLU-PRO": 0.1205 } }, { "id": "ehristoforu/fd-lora-merged-64x128", "name": "fd-lora-merged-64x128", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3281, "hfopenllm_v2/BBH": 0.3345, "hfopenllm_v2/MATH Level 5": 0.1873, "hfopenllm_v2/GPQA": 0.255, "hfopenllm_v2/MUSR": 0.3368, "hfopenllm_v2/MMLU-PRO": 0.1537 } }, { "id": "ehristoforu/fp4-14b-it-v1", "name": "fp4-14b-it-v1", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2535, "hfopenllm_v2/BBH": 0.574, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.3595, "hfopenllm_v2/MMLU-PRO": 0.4205 } }, { "id": "ehristoforu/fp4-14b-v1-fix", "name": "fp4-14b-v1-fix", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6742, "hfopenllm_v2/BBH": 0.6817, "hfopenllm_v2/MATH Level 5": 0.4207, "hfopenllm_v2/GPQA": 0.354, "hfopenllm_v2/MUSR": 0.4532, "hfopenllm_v2/MMLU-PRO": 0.5353 } }, { "id": "ehristoforu/fq2.5-7b-it-normalize_false", "name": "fq2.5-7b-it-normalize_false", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7399, "hfopenllm_v2/BBH": 0.552, "hfopenllm_v2/MATH Level 5": 0.4622, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4612, "hfopenllm_v2/MMLU-PRO": 0.4413 } }, { "id": "ehristoforu/fq2.5-7b-it-normalize_true", "name": "fq2.5-7b-it-normalize_true", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7399, "hfopenllm_v2/BBH": 0.552, "hfopenllm_v2/MATH Level 5": 0.4622, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4612, "hfopenllm_v2/MMLU-PRO": 0.4413 } }, { "id": "ehristoforu/frqwen2.5-from7b-duable4layers-it", "name": "frqwen2.5-from7b-duable4layers-it", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7729, "hfopenllm_v2/BBH": 0.5264, "hfopenllm_v2/MATH Level 5": 0.4509, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4166, "hfopenllm_v2/MMLU-PRO": 0.4126 } }, { "id": "ehristoforu/frqwen2.5-from7b-it", "name": "frqwen2.5-from7b-it", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6532, "hfopenllm_v2/BBH": 0.5143, "hfopenllm_v2/MATH Level 5": 0.2923, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4086, "hfopenllm_v2/MMLU-PRO": 0.3977 } }, { "id": "ehristoforu/Gemma2-9B-it-psy10k-mental_health", "name": "Gemma2-9B-it-psy10k-mental_health", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5887, "hfopenllm_v2/BBH": 0.5539, "hfopenllm_v2/MATH Level 5": 0.1631, "hfopenllm_v2/GPQA": 0.3372, "hfopenllm_v2/MUSR": 0.4086, "hfopenllm_v2/MMLU-PRO": 0.3829 } }, { "id": "ehristoforu/Gemma2-9b-it-train6", "name": "Gemma2-9b-it-train6", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7025, "hfopenllm_v2/BBH": 0.5898, "hfopenllm_v2/MATH Level 5": 0.1911, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4084, "hfopenllm_v2/MMLU-PRO": 0.3942 } }, { "id": "ehristoforu/HappyLlama1", "name": "HappyLlama1", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7363, "hfopenllm_v2/BBH": 0.4996, "hfopenllm_v2/MATH Level 5": 0.1427, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4287, "hfopenllm_v2/MMLU-PRO": 0.3546 } }, { "id": "ehristoforu/mllama-3.1-8b-instruct", "name": "mllama-3.1-8b-instruct", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3458, "hfopenllm_v2/BBH": 0.4718, "hfopenllm_v2/MATH Level 5": 0.3776, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.338, "hfopenllm_v2/MMLU-PRO": 0.2533 } }, { "id": "ehristoforu/mllama-3.1-8b-it", "name": "mllama-3.1-8b-it", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3879, "hfopenllm_v2/BBH": 0.4868, "hfopenllm_v2/MATH Level 5": 0.3799, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.3349, "hfopenllm_v2/MMLU-PRO": 0.2622 } }, { "id": "ehristoforu/moremerge", "name": "moremerge", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2019, "hfopenllm_v2/BBH": 0.2868, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3566, "hfopenllm_v2/MMLU-PRO": 0.1065 } }, { "id": "ehristoforu/moremerge-upscaled", "name": "moremerge-upscaled", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1979, "hfopenllm_v2/BBH": 0.2698, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2466, "hfopenllm_v2/MUSR": 0.3593, "hfopenllm_v2/MMLU-PRO": 0.1041 } }, { "id": "ehristoforu/phi-4-25b", "name": "phi-4-25b", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6484, "hfopenllm_v2/BBH": 0.6908, "hfopenllm_v2/MATH Level 5": 0.4524, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4208, "hfopenllm_v2/MMLU-PRO": 0.5351 } }, { "id": "ehristoforu/qwen2.5-test-32b-it", "name": "qwen2.5-test-32b-it", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7889, "hfopenllm_v2/BBH": 0.7081, "hfopenllm_v2/MATH Level 5": 0.5974, "hfopenllm_v2/GPQA": 0.3641, "hfopenllm_v2/MUSR": 0.4578, "hfopenllm_v2/MMLU-PRO": 0.5765 } }, { "id": "ehristoforu/qwen2.5-with-lora-think-3b-it", "name": "qwen2.5-with-lora-think-3b-it", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5319, "hfopenllm_v2/BBH": 0.4687, "hfopenllm_v2/MATH Level 5": 0.2364, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.431, "hfopenllm_v2/MMLU-PRO": 0.3403 } }, { "id": "ehristoforu/QwenQwen2.5-7B-IT", "name": "QwenQwen2.5-7B-IT", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7518, "hfopenllm_v2/BBH": 0.5398, "hfopenllm_v2/MATH Level 5": 0.5091, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4034, "hfopenllm_v2/MMLU-PRO": 0.4289 } }, { "id": "ehristoforu/QwenQwen2.5-7B-IT-Dare", "name": "QwenQwen2.5-7B-IT-Dare", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7509, "hfopenllm_v2/BBH": 0.5398, "hfopenllm_v2/MATH Level 5": 0.5091, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4034, "hfopenllm_v2/MMLU-PRO": 0.4289 } }, { "id": "ehristoforu/rmoe-v1", "name": "rmoe-v1", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.265, "hfopenllm_v2/BBH": 0.2929, "hfopenllm_v2/MATH Level 5": 0.0015, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3663, "hfopenllm_v2/MMLU-PRO": 0.1125 } }, { "id": "ehristoforu/RQwen-v0.1", "name": "RQwen-v0.1", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7625, "hfopenllm_v2/BBH": 0.6446, "hfopenllm_v2/MATH Level 5": 0.4645, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.4139, "hfopenllm_v2/MMLU-PRO": 0.5202 } }, { "id": "ehristoforu/RQwen-v0.2", "name": "RQwen-v0.2", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7504, "hfopenllm_v2/BBH": 0.6427, "hfopenllm_v2/MATH Level 5": 0.327, "hfopenllm_v2/GPQA": 0.3372, "hfopenllm_v2/MUSR": 0.4207, "hfopenllm_v2/MMLU-PRO": 0.5159 } }, { "id": "ehristoforu/rufalcon3-3b-it", "name": "rufalcon3-3b-it", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5942, "hfopenllm_v2/BBH": 0.4155, "hfopenllm_v2/MATH Level 5": 0.1782, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3895, "hfopenllm_v2/MMLU-PRO": 0.2348 } }, { "id": "ehristoforu/ruphi-4b", "name": "ruphi-4b", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1752, "hfopenllm_v2/BBH": 0.2906, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2399, "hfopenllm_v2/MUSR": 0.3512, "hfopenllm_v2/MMLU-PRO": 0.1126 } }, { "id": "ehristoforu/SoRu-0009", "name": "SoRu-0009", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2582, "hfopenllm_v2/BBH": 0.315, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3369, "hfopenllm_v2/MMLU-PRO": 0.1239 } }, { "id": "ehristoforu/testq-32b", "name": "testq-32b", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1876, "hfopenllm_v2/BBH": 0.2877, "hfopenllm_v2/MATH Level 5": 0.003, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3715, "hfopenllm_v2/MMLU-PRO": 0.1166 } }, { "id": "ehristoforu/tmoe", "name": "tmoe", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1193, "hfopenllm_v2/BBH": 0.3073, "hfopenllm_v2/MATH Level 5": 0.0076, "hfopenllm_v2/GPQA": 0.2232, "hfopenllm_v2/MUSR": 0.3699, "hfopenllm_v2/MMLU-PRO": 0.1191 } }, { "id": "ehristoforu/tmoe-v2", "name": "tmoe-v2", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1903, "hfopenllm_v2/BBH": 0.2897, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.4151, "hfopenllm_v2/MMLU-PRO": 0.11 } }, { "id": "ehristoforu/trd-7b-it", "name": "trd-7b-it", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2185, "hfopenllm_v2/BBH": 0.299, "hfopenllm_v2/MATH Level 5": 0.0317, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3794, "hfopenllm_v2/MMLU-PRO": 0.1179 } }, { "id": "ehristoforu/ud-14b", "name": "ud-14b", "developer": "ehristoforu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4235, "hfopenllm_v2/BBH": 0.3324, "hfopenllm_v2/MATH Level 5": 0.1903, "hfopenllm_v2/GPQA": 0.2374, "hfopenllm_v2/MUSR": 0.4394, "hfopenllm_v2/MMLU-PRO": 0.2415 } }, { "id": "EleutherAI/gpt-j-6b", "name": "gpt-j-6b", "developer": "EleutherAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2522, "hfopenllm_v2/BBH": 0.3191, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.3658, "hfopenllm_v2/MMLU-PRO": 0.1241 } }, { "id": "EleutherAI/gpt-neo-1.3B", "name": "gpt-neo-1.3B", "developer": "EleutherAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2079, "hfopenllm_v2/BBH": 0.3039, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3817, "hfopenllm_v2/MMLU-PRO": 0.1164 } }, { "id": "EleutherAI/gpt-neo-125m", "name": "gpt-neo-125m", "developer": "EleutherAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1905, "hfopenllm_v2/BBH": 0.3115, "hfopenllm_v2/MATH Level 5": 0.006, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3593, "hfopenllm_v2/MMLU-PRO": 0.1026 } }, { "id": "EleutherAI/gpt-neo-2.7B", "name": "gpt-neo-2.7B", "developer": "EleutherAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.259, "hfopenllm_v2/BBH": 0.314, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3554, "hfopenllm_v2/MMLU-PRO": 0.1163 } }, { "id": "EleutherAI/gpt-neox-20b", "name": "gpt-neox-20b", "developer": "EleutherAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2587, "hfopenllm_v2/BBH": 0.3165, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2433, "hfopenllm_v2/MUSR": 0.3647, "hfopenllm_v2/MMLU-PRO": 0.1155 } }, { "id": "EleutherAI/pythia-1.4b", "name": "pythia-1.4b", "developer": "EleutherAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2371, "hfopenllm_v2/BBH": 0.315, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3538, "hfopenllm_v2/MMLU-PRO": 0.1123 } }, { "id": "EleutherAI/pythia-12b", "name": "Pythia 12B", "developer": "EleutherAI", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.257, "helm_classic/MMLU": 0.274, "helm_classic/BoolQ": 0.662, "helm_classic/NarrativeQA": 0.596, "helm_classic/NaturalQuestions (open-book)": 0.581, "helm_classic/QuAC": 0.313, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.177, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.931, "helm_classic/CivilComments": 0.531, "helm_classic/RAFT": 0.514, "hfopenllm_v2/IFEval": 0.2471, "hfopenllm_v2/BBH": 0.318, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2466, "hfopenllm_v2/MUSR": 0.3647, "hfopenllm_v2/MMLU-PRO": 0.1109 } }, { "id": "EleutherAI/pythia-160m", "name": "pythia-160m", "developer": "EleutherAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1816, "hfopenllm_v2/BBH": 0.297, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.4179, "hfopenllm_v2/MMLU-PRO": 0.112 } }, { "id": "EleutherAI/pythia-1b", "name": "pythia-1b", "developer": "EleutherAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2208, "hfopenllm_v2/BBH": 0.3004, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3552, "hfopenllm_v2/MMLU-PRO": 0.1136 } }, { "id": "EleutherAI/pythia-2.8b", "name": "pythia-2.8b", "developer": "EleutherAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2173, "hfopenllm_v2/BBH": 0.3224, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3486, "hfopenllm_v2/MMLU-PRO": 0.1137 } }, { "id": "EleutherAI/pythia-410m", "name": "pythia-410m", "developer": "EleutherAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2195, "hfopenllm_v2/BBH": 0.3028, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3578, "hfopenllm_v2/MMLU-PRO": 0.1128 } }, { "id": "EleutherAI/pythia-6.9b", "name": "Pythia 6.9B", "developer": "EleutherAI", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.196, "helm_classic/MMLU": 0.236, "helm_classic/BoolQ": 0.631, "helm_classic/NarrativeQA": 0.528, "helm_classic/NaturalQuestions (open-book)": 0.539, "helm_classic/QuAC": 0.296, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.213, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.928, "helm_classic/CivilComments": 0.511, "helm_classic/RAFT": 0.502, "hfopenllm_v2/IFEval": 0.2281, "hfopenllm_v2/BBH": 0.3232, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3591, "hfopenllm_v2/MMLU-PRO": 0.1147 } }, { "id": "elinas/Chronos-Gold-12B-1.0", "name": "Chronos-Gold-12B-1.0", "developer": "elinas", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3166, "hfopenllm_v2/BBH": 0.5515, "hfopenllm_v2/MATH Level 5": 0.0695, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.474, "hfopenllm_v2/MMLU-PRO": 0.3518 } }, { "id": "ell44ot/gemma-2b-def", "name": "gemma-2b-def", "developer": "ell44ot", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2693, "hfopenllm_v2/BBH": 0.3159, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.367, "hfopenllm_v2/MMLU-PRO": 0.1572 } }, { "id": "Enno-Ai/EnnoAi-Pro-French-Llama-3-8B-v0.4", "name": "EnnoAi-Pro-French-Llama-3-8B-v0.4", "developer": "Enno-Ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4189, "hfopenllm_v2/BBH": 0.4075, "hfopenllm_v2/MATH Level 5": 0.0363, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.417, "hfopenllm_v2/MMLU-PRO": 0.2635 } }, { "id": "Enno-Ai/EnnoAi-Pro-Llama-3-8B", "name": "EnnoAi-Pro-Llama-3-8B", "developer": "Enno-Ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3195, "hfopenllm_v2/BBH": 0.4152, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.4071, "hfopenllm_v2/MMLU-PRO": 0.2151 } }, { "id": "Enno-Ai/EnnoAi-Pro-Llama-3-8B-v0.3", "name": "EnnoAi-Pro-Llama-3-8B-v0.3", "developer": "Enno-Ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5083, "hfopenllm_v2/BBH": 0.4101, "hfopenllm_v2/MATH Level 5": 0.0483, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.4236, "hfopenllm_v2/MMLU-PRO": 0.299 } }, { "id": "Enno-Ai/EnnoAi-Pro-Llama-3.1-8B-v0.9", "name": "EnnoAi-Pro-Llama-3.1-8B-v0.9", "developer": "Enno-Ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4689, "hfopenllm_v2/BBH": 0.416, "hfopenllm_v2/MATH Level 5": 0.0378, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3832, "hfopenllm_v2/MMLU-PRO": 0.2596 } }, { "id": "EnnoAi/EnnoAi-7B-French-Instruct-202502", "name": "EnnoAi-7B-French-Instruct-202502", "developer": "EnnoAi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5564, "hfopenllm_v2/BBH": 0.5575, "hfopenllm_v2/MATH Level 5": 0.3724, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.46, "hfopenllm_v2/MMLU-PRO": 0.4013 } }, { "id": "EnnoAi/EnnoAi-Pro-Llama-3.1-8B-v1.0", "name": "EnnoAi-Pro-Llama-3.1-8B-v1.0", "developer": "EnnoAi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4704, "hfopenllm_v2/BBH": 0.416, "hfopenllm_v2/MATH Level 5": 0.0378, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3832, "hfopenllm_v2/MMLU-PRO": 0.2596 } }, { "id": "Epiculous/Azure_Dusk-v0.2", "name": "Azure_Dusk-v0.2", "developer": "Epiculous", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3467, "hfopenllm_v2/BBH": 0.412, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3835, "hfopenllm_v2/MMLU-PRO": 0.3034 } }, { "id": "Epiculous/Crimson_Dawn-v0.2", "name": "Crimson_Dawn-v0.2", "developer": "Epiculous", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3103, "hfopenllm_v2/BBH": 0.4482, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.4152, "hfopenllm_v2/MMLU-PRO": 0.2721 } }, { "id": "Epiculous/NovaSpark", "name": "NovaSpark", "developer": "Epiculous", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6408, "hfopenllm_v2/BBH": 0.5064, "hfopenllm_v2/MATH Level 5": 0.1518, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.3882, "hfopenllm_v2/MMLU-PRO": 0.3649 } }, { "id": "Epiculous/Violet_Twilight-v0.2", "name": "Violet_Twilight-v0.2", "developer": "Epiculous", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4532, "hfopenllm_v2/BBH": 0.4615, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.4299, "hfopenllm_v2/MMLU-PRO": 0.3111 } }, { "id": "EpistemeAI/Alpaca-Llama3.1-8B", "name": "Alpaca-Llama3.1-8B", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1599, "hfopenllm_v2/BBH": 0.4755, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3403, "hfopenllm_v2/MMLU-PRO": 0.3246 } }, { "id": "EpistemeAI/Athena-gemma-2-2b-it", "name": "Athena-gemma-2-2b-it", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3134, "hfopenllm_v2/BBH": 0.4264, "hfopenllm_v2/MATH Level 5": 0.0491, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.4351, "hfopenllm_v2/MMLU-PRO": 0.2422 } }, { "id": "EpistemeAI/Athena-gemma-2-2b-it-Philos", "name": "Athena-gemma-2-2b-it-Philos", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4621, "hfopenllm_v2/BBH": 0.3795, "hfopenllm_v2/MATH Level 5": 0.037, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4314, "hfopenllm_v2/MMLU-PRO": 0.2248 } }, { "id": "EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3", "name": "Athene-codegemma-2-7b-it-alpaca-v1.3", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.403, "hfopenllm_v2/BBH": 0.4332, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.4503, "hfopenllm_v2/MMLU-PRO": 0.2587 } }, { "id": "EpistemeAI/DeepPhi-3.5-mini-instruct", "name": "DeepPhi-3.5-mini-instruct", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1326, "hfopenllm_v2/BBH": 0.2882, "hfopenllm_v2/MATH Level 5": 0.0068, "hfopenllm_v2/GPQA": 0.2332, "hfopenllm_v2/MUSR": 0.3656, "hfopenllm_v2/MMLU-PRO": 0.1103 } }, { "id": "EpistemeAI/DeepThinkers-Phi4", "name": "DeepThinkers-Phi4", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.694, "hfopenllm_v2/BBH": 0.679, "hfopenllm_v2/MATH Level 5": 0.4585, "hfopenllm_v2/GPQA": 0.3406, "hfopenllm_v2/MUSR": 0.3981, "hfopenllm_v2/MMLU-PRO": 0.5258 } }, { "id": "EpistemeAI/FineLlama3.1-8B-Instruct", "name": "FineLlama3.1-8B-Instruct", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.08, "hfopenllm_v2/BBH": 0.4557, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3482, "hfopenllm_v2/MMLU-PRO": 0.3113 } }, { "id": "EpistemeAI/Fireball-12B", "name": "Fireball-12B", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1834, "hfopenllm_v2/BBH": 0.5111, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.4236, "hfopenllm_v2/MMLU-PRO": 0.3344 } }, { "id": "EpistemeAI/Fireball-12B-v1.13a-philosophers", "name": "Fireball-12B-v1.13a-philosophers", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0876, "hfopenllm_v2/BBH": 0.5103, "hfopenllm_v2/MATH Level 5": 0.0461, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4081, "hfopenllm_v2/MMLU-PRO": 0.3367 } }, { "id": "EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200", "name": "Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4577, "hfopenllm_v2/BBH": 0.4838, "hfopenllm_v2/MATH Level 5": 0.1231, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.3945, "hfopenllm_v2/MMLU-PRO": 0.3583 } }, { "id": "EpistemeAI/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta", "name": "Fireball-Alpaca-Llama3.1.07-8B-Philos-Math-KTO-beta", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7274, "hfopenllm_v2/BBH": 0.4865, "hfopenllm_v2/MATH Level 5": 0.1526, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3619, "hfopenllm_v2/MMLU-PRO": 0.3543 } }, { "id": "EpistemeAI/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2", "name": "Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R2", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4673, "hfopenllm_v2/BBH": 0.4932, "hfopenllm_v2/MATH Level 5": 0.1239, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4624, "hfopenllm_v2/MMLU-PRO": 0.3352 } }, { "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto", "name": "Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4432, "hfopenllm_v2/BBH": 0.4824, "hfopenllm_v2/MATH Level 5": 0.1329, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4066, "hfopenllm_v2/MMLU-PRO": 0.3516 } }, { "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K", "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4457, "hfopenllm_v2/BBH": 0.4897, "hfopenllm_v2/MATH Level 5": 0.1208, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3762, "hfopenllm_v2/MMLU-PRO": 0.3543 } }, { "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code", "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5975, "hfopenllm_v2/BBH": 0.4904, "hfopenllm_v2/MATH Level 5": 0.1337, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.401, "hfopenllm_v2/MMLU-PRO": 0.3423 } }, { "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds", "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6691, "hfopenllm_v2/BBH": 0.4668, "hfopenllm_v2/MATH Level 5": 0.1337, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3418, "hfopenllm_v2/MMLU-PRO": 0.3389 } }, { "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7207, "hfopenllm_v2/BBH": 0.461, "hfopenllm_v2/MATH Level 5": 0.1314, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3432, "hfopenllm_v2/MMLU-PRO": 0.3354 } }, { "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT", "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4578, "hfopenllm_v2/BBH": 0.4761, "hfopenllm_v2/MATH Level 5": 0.1382, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.3881, "hfopenllm_v2/MMLU-PRO": 0.3471 } }, { "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto", "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7205, "hfopenllm_v2/BBH": 0.4818, "hfopenllm_v2/MATH Level 5": 0.1435, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.33, "hfopenllm_v2/MMLU-PRO": 0.3548 } }, { "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math", "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Math", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4623, "hfopenllm_v2/BBH": 0.4983, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.3641, "hfopenllm_v2/MMLU-PRO": 0.3331 } }, { "id": "EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO", "name": "Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4611, "hfopenllm_v2/BBH": 0.4801, "hfopenllm_v2/MATH Level 5": 0.1254, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.3998, "hfopenllm_v2/MMLU-PRO": 0.3521 } }, { "id": "EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2", "name": "Fireball-Mistral-Nemo-Base-2407-v1-DPO2", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1861, "hfopenllm_v2/BBH": 0.4968, "hfopenllm_v2/MATH Level 5": 0.0363, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.404, "hfopenllm_v2/MMLU-PRO": 0.3353 } }, { "id": "EpistemeAI/Fireball-R1-Llama-3.1-8B", "name": "Fireball-R1-Llama-3.1-8B", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4427, "hfopenllm_v2/BBH": 0.3643, "hfopenllm_v2/MATH Level 5": 0.3112, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.3288, "hfopenllm_v2/MMLU-PRO": 0.1115 } }, { "id": "EpistemeAI/Fireball-R1-Llama-3.1-8B-Medical-COT", "name": "Fireball-R1-Llama-3.1-8B-Medical-COT", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3216, "hfopenllm_v2/BBH": 0.3716, "hfopenllm_v2/MATH Level 5": 0.327, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3114, "hfopenllm_v2/MMLU-PRO": 0.1402 } }, { "id": "EpistemeAI/Fireball-R1.1-Llama-3.1-8B", "name": "Fireball-R1.1-Llama-3.1-8B", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3676, "hfopenllm_v2/BBH": 0.3326, "hfopenllm_v2/MATH Level 5": 0.1382, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3419, "hfopenllm_v2/MMLU-PRO": 0.1115 } }, { "id": "EpistemeAI/Llama-3.2-3B-Agent007-Coder", "name": "Llama-3.2-3B-Agent007-Coder", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.54, "hfopenllm_v2/BBH": 0.4304, "hfopenllm_v2/MATH Level 5": 0.111, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3668, "hfopenllm_v2/MMLU-PRO": 0.2852 } }, { "id": "EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math", "name": "Mistral-Nemo-Instruct-12B-Philosophy-Math", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0695, "hfopenllm_v2/BBH": 0.5365, "hfopenllm_v2/MATH Level 5": 0.0959, "hfopenllm_v2/GPQA": 0.3314, "hfopenllm_v2/MUSR": 0.4292, "hfopenllm_v2/MMLU-PRO": 0.3296 } }, { "id": "EpistemeAI/OpenReasoner-Llama-3.2-3B-rs1.0", "name": "OpenReasoner-Llama-3.2-3B-rs1.0", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7274, "hfopenllm_v2/BBH": 0.4519, "hfopenllm_v2/MATH Level 5": 0.1344, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3461, "hfopenllm_v2/MMLU-PRO": 0.3134 } }, { "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent", "name": "Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6915, "hfopenllm_v2/BBH": 0.4525, "hfopenllm_v2/MATH Level 5": 0.1292, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3578, "hfopenllm_v2/MMLU-PRO": 0.329 } }, { "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy", "name": "Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7101, "hfopenllm_v2/BBH": 0.4628, "hfopenllm_v2/MATH Level 5": 0.1397, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.3195, "hfopenllm_v2/MMLU-PRO": 0.3311 } }, { "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic", "name": "Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7122, "hfopenllm_v2/BBH": 0.4566, "hfopenllm_v2/MATH Level 5": 0.1246, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.3235, "hfopenllm_v2/MMLU-PRO": 0.335 } }, { "id": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT", "name": "Reasoning-Llama-3.1-CoT-RE1-NMT", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4829, "hfopenllm_v2/BBH": 0.4736, "hfopenllm_v2/MATH Level 5": 0.1299, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3182, "hfopenllm_v2/MMLU-PRO": 0.3343 } }, { "id": "EpistemeAI/Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO", "name": "Reasoning-Llama-3.1-CoT-RE1-NMT-V2-ORPO", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4553, "hfopenllm_v2/BBH": 0.4804, "hfopenllm_v2/MATH Level 5": 0.1292, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.3931, "hfopenllm_v2/MMLU-PRO": 0.3598 } }, { "id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2", "name": "Reasoning-Llama-3.2-1B-Instruct-v1.2", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4087, "hfopenllm_v2/BBH": 0.3324, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3222, "hfopenllm_v2/MMLU-PRO": 0.1179 } }, { "id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3", "name": "Reasoning-Llama-3.2-1B-Instruct-v1.3", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3273, "hfopenllm_v2/BBH": 0.3263, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.326, "hfopenllm_v2/MMLU-PRO": 0.1173 } }, { "id": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1", "name": "Reasoning-Llama-3.2-3B-Math-Instruct-RE1", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.512, "hfopenllm_v2/BBH": 0.4381, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3435, "hfopenllm_v2/MMLU-PRO": 0.2789 } }, { "id": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO", "name": "Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.729, "hfopenllm_v2/BBH": 0.4518, "hfopenllm_v2/MATH Level 5": 0.1533, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3487, "hfopenllm_v2/MMLU-PRO": 0.31 } }, { "id": "EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math", "name": "ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5903, "hfopenllm_v2/BBH": 0.4364, "hfopenllm_v2/MATH Level 5": 0.148, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3314, "hfopenllm_v2/MMLU-PRO": 0.2823 } }, { "id": "EpistemeAI/ReasoningCore-3B-0", "name": "ReasoningCore-3B-0", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7341, "hfopenllm_v2/BBH": 0.4446, "hfopenllm_v2/MATH Level 5": 0.1586, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3554, "hfopenllm_v2/MMLU-PRO": 0.3172 } }, { "id": "EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect", "name": "ReasoningCore-3B-Instruct-r01-Reflect", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7335, "hfopenllm_v2/BBH": 0.445, "hfopenllm_v2/MATH Level 5": 0.1541, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3527, "hfopenllm_v2/MMLU-PRO": 0.3144 } }, { "id": "EpistemeAI/ReasoningCore-3B-R01", "name": "ReasoningCore-3B-R01", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2976, "hfopenllm_v2/BBH": 0.4373, "hfopenllm_v2/MATH Level 5": 0.1299, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3195, "hfopenllm_v2/MMLU-PRO": 0.2591 } }, { "id": "EpistemeAI/ReasoningCore-3B-RE1-V2", "name": "ReasoningCore-3B-RE1-V2", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7393, "hfopenllm_v2/BBH": 0.4462, "hfopenllm_v2/MATH Level 5": 0.1563, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3541, "hfopenllm_v2/MMLU-PRO": 0.3181 } }, { "id": "EpistemeAI/ReasoningCore-3B-RE1-V2A", "name": "ReasoningCore-3B-RE1-V2A", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5733, "hfopenllm_v2/BBH": 0.419, "hfopenllm_v2/MATH Level 5": 0.0929, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3352, "hfopenllm_v2/MMLU-PRO": 0.2736 } }, { "id": "EpistemeAI/ReasoningCore-3B-RE1-V2B", "name": "ReasoningCore-3B-RE1-V2B", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5051, "hfopenllm_v2/BBH": 0.4168, "hfopenllm_v2/MATH Level 5": 0.1073, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3448, "hfopenllm_v2/MMLU-PRO": 0.2673 } }, { "id": "EpistemeAI/ReasoningCore-3B-RE1-V2C", "name": "ReasoningCore-3B-RE1-V2C", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5057, "hfopenllm_v2/BBH": 0.4177, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3422, "hfopenllm_v2/MMLU-PRO": 0.2691 } }, { "id": "EpistemeAI/ReasoningCore-3B-T1-V1", "name": "ReasoningCore-3B-T1-V1", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7208, "hfopenllm_v2/BBH": 0.4517, "hfopenllm_v2/MATH Level 5": 0.1458, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.354, "hfopenllm_v2/MMLU-PRO": 0.312 } }, { "id": "EpistemeAI/ReasoningCore-3B-T1_1", "name": "ReasoningCore-3B-T1_1", "developer": "EpistemeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7275, "hfopenllm_v2/BBH": 0.4524, "hfopenllm_v2/MATH Level 5": 0.1541, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3554, "hfopenllm_v2/MMLU-PRO": 0.3117 } }, { "id": "EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2", "name": "Athene-codegemma-2-7b-it-alpaca-v1.2", "developer": "EpistemeAI2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4351, "hfopenllm_v2/BBH": 0.4175, "hfopenllm_v2/MATH Level 5": 0.0423, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.417, "hfopenllm_v2/MMLU-PRO": 0.2297 } }, { "id": "EpistemeAI2/Fireball-12B-v1.2", "name": "Fireball-12B-v1.2", "developer": "EpistemeAI2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1355, "hfopenllm_v2/BBH": 0.5019, "hfopenllm_v2/MATH Level 5": 0.0415, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4173, "hfopenllm_v2/MMLU-PRO": 0.3337 } }, { "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1-8B-Philos", "name": "Fireball-Alpaca-Llama3.1-8B-Philos", "developer": "EpistemeAI2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4986, "hfopenllm_v2/BBH": 0.4978, "hfopenllm_v2/MATH Level 5": 0.1186, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4277, "hfopenllm_v2/MMLU-PRO": 0.3406 } }, { "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.01-8B-Philos", "name": "Fireball-Alpaca-Llama3.1.01-8B-Philos", "developer": "EpistemeAI2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4212, "hfopenllm_v2/BBH": 0.4956, "hfopenllm_v2/MATH Level 5": 0.136, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4371, "hfopenllm_v2/MMLU-PRO": 0.3383 } }, { "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.03-8B-Philos", "name": "Fireball-Alpaca-Llama3.1.03-8B-Philos", "developer": "EpistemeAI2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3881, "hfopenllm_v2/BBH": 0.4951, "hfopenllm_v2/MATH Level 5": 0.1284, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.428, "hfopenllm_v2/MMLU-PRO": 0.3355 } }, { "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.04-8B-Philos", "name": "Fireball-Alpaca-Llama3.1.04-8B-Philos", "developer": "EpistemeAI2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4084, "hfopenllm_v2/BBH": 0.493, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4372, "hfopenllm_v2/MMLU-PRO": 0.3403 } }, { "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo", "name": "Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo", "developer": "EpistemeAI2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4866, "hfopenllm_v2/BBH": 0.4881, "hfopenllm_v2/MATH Level 5": 0.1307, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.3932, "hfopenllm_v2/MMLU-PRO": 0.3615 } }, { "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.07-8B-Philos-Math", "name": "Fireball-Alpaca-Llama3.1.07-8B-Philos-Math", "developer": "EpistemeAI2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5079, "hfopenllm_v2/BBH": 0.4847, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4063, "hfopenllm_v2/MMLU-PRO": 0.3531 } }, { "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection", "name": "Fireball-Alpaca-Llama3.1.08-8B-C-R1-KTO-Reflection", "developer": "EpistemeAI2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3952, "hfopenllm_v2/BBH": 0.4955, "hfopenllm_v2/MATH Level 5": 0.1246, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4048, "hfopenllm_v2/MMLU-PRO": 0.3593 } }, { "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1", "name": "Fireball-Alpaca-Llama3.1.08-8B-Philos-C-R1", "developer": "EpistemeAI2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5316, "hfopenllm_v2/BBH": 0.4828, "hfopenllm_v2/MATH Level 5": 0.1239, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4103, "hfopenllm_v2/MMLU-PRO": 0.3523 } }, { "id": "EpistemeAI2/Fireball-Llama-3.1-8B-Philos-Reflection", "name": "Fireball-Llama-3.1-8B-Philos-Reflection", "developer": "EpistemeAI2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3596, "hfopenllm_v2/BBH": 0.4898, "hfopenllm_v2/MATH Level 5": 0.1284, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.3957, "hfopenllm_v2/MMLU-PRO": 0.3551 } }, { "id": "EpistemeAI2/Fireball-MathMistral-Nemo-Base-2407-v2dpo", "name": "Fireball-MathMistral-Nemo-Base-2407-v2dpo", "developer": "EpistemeAI2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3097, "hfopenllm_v2/BBH": 0.4328, "hfopenllm_v2/MATH Level 5": 0.037, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.403, "hfopenllm_v2/MMLU-PRO": 0.1148 } }, { "id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math", "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math", "developer": "EpistemeAI2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5515, "hfopenllm_v2/BBH": 0.4808, "hfopenllm_v2/MATH Level 5": 0.1352, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.3693, "hfopenllm_v2/MMLU-PRO": 0.342 } }, { "id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT", "name": "Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT", "developer": "EpistemeAI2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4633, "hfopenllm_v2/BBH": 0.4791, "hfopenllm_v2/MATH Level 5": 0.1171, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.3774, "hfopenllm_v2/MMLU-PRO": 0.3565 } }, { "id": "EpistemeAI2/Fireball-Phi-3-medium-4k-inst-Philos", "name": "Fireball-Phi-3-medium-4k-inst-Philos", "developer": "EpistemeAI2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5313, "hfopenllm_v2/BBH": 0.6178, "hfopenllm_v2/MATH Level 5": 0.1707, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.4139, "hfopenllm_v2/MMLU-PRO": 0.4599 } }, { "id": "Eric111/CatunaMayo", "name": "CatunaMayo", "developer": "Eric111", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4074, "hfopenllm_v2/BBH": 0.5244, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.454, "hfopenllm_v2/MMLU-PRO": 0.3178 } }, { "id": "Eric111/CatunaMayo-DPO", "name": "CatunaMayo-DPO", "developer": "Eric111", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4215, "hfopenllm_v2/BBH": 0.5224, "hfopenllm_v2/MATH Level 5": 0.0816, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.445, "hfopenllm_v2/MMLU-PRO": 0.317 } }, { "id": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties", "name": "Chocolatine-3B-Instruct-DPO-Revised-Ties", "developer": "Etherll", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3725, "hfopenllm_v2/BBH": 0.5411, "hfopenllm_v2/MATH Level 5": 0.1631, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4649, "hfopenllm_v2/MMLU-PRO": 0.3978 } }, { "id": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2", "name": "Chocolatine-3B-Instruct-DPO-Revised-Ties-v2", "developer": "Etherll", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.374, "hfopenllm_v2/BBH": 0.5411, "hfopenllm_v2/MATH Level 5": 0.1631, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4649, "hfopenllm_v2/MMLU-PRO": 0.3978 } }, { "id": "Etherll/Herplete-LLM-Llama-3.1-8b", "name": "Herplete-LLM-Llama-3.1-8b", "developer": "Etherll", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4672, "hfopenllm_v2/BBH": 0.5013, "hfopenllm_v2/MATH Level 5": 0.0279, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.386, "hfopenllm_v2/MMLU-PRO": 0.3482 } }, { "id": "Etherll/Herplete-LLM-Llama-3.1-8b-Ties", "name": "Herplete-LLM-Llama-3.1-8b-Ties", "developer": "Etherll", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6164, "hfopenllm_v2/BBH": 0.5338, "hfopenllm_v2/MATH Level 5": 0.1601, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4017, "hfopenllm_v2/MMLU-PRO": 0.3752 } }, { "id": "Etherll/Qwen2.5-7B-della-test", "name": "Qwen2.5-7B-della-test", "developer": "Etherll", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7625, "hfopenllm_v2/BBH": 0.5447, "hfopenllm_v2/MATH Level 5": 0.4894, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4047, "hfopenllm_v2/MMLU-PRO": 0.4361 } }, { "id": "Etherll/Qwen2.5-Coder-7B-Instruct-Ties", "name": "Qwen2.5-Coder-7B-Instruct-Ties", "developer": "Etherll", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5005, "hfopenllm_v2/BBH": 0.4895, "hfopenllm_v2/MATH Level 5": 0.2915, "hfopenllm_v2/GPQA": 0.3297, "hfopenllm_v2/MUSR": 0.4373, "hfopenllm_v2/MMLU-PRO": 0.3503 } }, { "id": "Etherll/Replete-LLM-V3-Llama-3.1-8b", "name": "Replete-LLM-V3-Llama-3.1-8b", "developer": "Etherll", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5263, "hfopenllm_v2/BBH": 0.4543, "hfopenllm_v2/MATH Level 5": 0.2273, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3516, "hfopenllm_v2/MMLU-PRO": 0.347 } }, { "id": "Etherll/SuperHermes", "name": "SuperHermes", "developer": "Etherll", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5459, "hfopenllm_v2/BBH": 0.529, "hfopenllm_v2/MATH Level 5": 0.1654, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.44, "hfopenllm_v2/MMLU-PRO": 0.3949 } }, { "id": "euclaise/ReMask-3B", "name": "ReMask-3B", "developer": "euclaise", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2419, "hfopenllm_v2/BBH": 0.3517, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3341, "hfopenllm_v2/MMLU-PRO": 0.1357 } }, { "id": "Eurdem/Defne-llama3.1-8B", "name": "Defne-llama3.1-8B", "developer": "Eurdem", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5036, "hfopenllm_v2/BBH": 0.5321, "hfopenllm_v2/MATH Level 5": 0.1601, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4331, "hfopenllm_v2/MMLU-PRO": 0.3866 } }, { "id": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2", "name": "EVA-Qwen2.5-14B-v0.2", "developer": "EVA-UNIT-01", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4038, "hfopenllm_v2/BBH": 0.609, "hfopenllm_v2/MATH Level 5": 0.3406, "hfopenllm_v2/GPQA": 0.3943, "hfopenllm_v2/MUSR": 0.4794, "hfopenllm_v2/MMLU-PRO": 0.5135 } }, { "id": "EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2", "name": "EVA-Qwen2.5-72B-v0.2", "developer": "EVA-UNIT-01", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6879, "hfopenllm_v2/BBH": 0.7088, "hfopenllm_v2/MATH Level 5": 0.4313, "hfopenllm_v2/GPQA": 0.4086, "hfopenllm_v2/MUSR": 0.472, "hfopenllm_v2/MMLU-PRO": 0.5813 } }, { "id": "eworojoshua/vas-01", "name": "vas-01", "developer": "eworojoshua", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7612, "hfopenllm_v2/BBH": 0.5418, "hfopenllm_v2/MATH Level 5": 0.4736, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4432, "hfopenllm_v2/MMLU-PRO": 0.4348 } }, { "id": "ewre324/ewre324-R1-SmolLM2-135M-Distill", "name": "ewre324-R1-SmolLM2-135M-Distill", "developer": "ewre324", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1649, "hfopenllm_v2/BBH": 0.3042, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3409, "hfopenllm_v2/MMLU-PRO": 0.1134 } }, { "id": "ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning", "name": "Thinker-Llama-3.2-3B-Instruct-Reasoning", "developer": "ewre324", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4439, "hfopenllm_v2/BBH": 0.4273, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.3655, "hfopenllm_v2/MMLU-PRO": 0.2886 } }, { "id": "ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning", "name": "Thinker-Qwen2.5-0.5B-Instruct-Reasoning", "developer": "ewre324", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2476, "hfopenllm_v2/BBH": 0.3292, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.3382, "hfopenllm_v2/MMLU-PRO": 0.1647 } }, { "id": "ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning", "name": "Thinker-SmolLM2-135M-Instruct-Reasoning", "developer": "ewre324", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2584, "hfopenllm_v2/BBH": 0.3071, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3661, "hfopenllm_v2/MMLU-PRO": 0.1094 } }, { "id": "experiment-llm/exp-3-q-r", "name": "exp-3-q-r", "developer": "experiment-llm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6036, "hfopenllm_v2/BBH": 0.5397, "hfopenllm_v2/MATH Level 5": 0.2787, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4315, "hfopenllm_v2/MMLU-PRO": 0.4316 } }, { "id": "facebook/opt-1.3b", "name": "opt-1.3b", "developer": "facebook", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2383, "hfopenllm_v2/BBH": 0.3094, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2424, "hfopenllm_v2/MUSR": 0.342, "hfopenllm_v2/MMLU-PRO": 0.1107 } }, { "id": "facebook/opt-30b", "name": "opt-30b", "developer": "facebook", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2453, "hfopenllm_v2/BBH": 0.307, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3604, "hfopenllm_v2/MMLU-PRO": 0.1164 } }, { "id": "facebook/Self-taught-evaluator-llama3.1-70B", "name": "facebook/Self-taught-evaluator-llama3.1-70B", "developer": "facebook", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9001, "reward-bench/Chat": 0.9693, "reward-bench/Chat Hard": 0.8509, "reward-bench/Safety": 0.8959, "reward-bench/Reasoning": 0.8844 } }, { "id": "facebook/Self-taught-Llama-3-70B", "name": "facebook/Self-taught-Llama-3-70B", "developer": "facebook", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8863, "reward-bench/Chat": 0.9693, "reward-bench/Chat Hard": 0.8399, "reward-bench/Safety": 0.9108, "reward-bench/Reasoning": 0.8251 } }, { "id": "failspy/llama-3-70B-Instruct-abliterated", "name": "llama-3-70B-Instruct-abliterated", "developer": "failspy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8023, "hfopenllm_v2/BBH": 0.6465, "hfopenllm_v2/MATH Level 5": 0.2432, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4128, "hfopenllm_v2/MMLU-PRO": 0.5145 } }, { "id": "failspy/Llama-3-8B-Instruct-abliterated", "name": "Llama-3-8B-Instruct-abliterated", "developer": "failspy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5909, "hfopenllm_v2/BBH": 0.4354, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.4116, "hfopenllm_v2/MMLU-PRO": 0.2742 } }, { "id": "failspy/Llama-3-8B-Instruct-MopeyMule", "name": "Llama-3-8B-Instruct-MopeyMule", "developer": "failspy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.675, "hfopenllm_v2/BBH": 0.3839, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2391, "hfopenllm_v2/MUSR": 0.3513, "hfopenllm_v2/MMLU-PRO": 0.1764 } }, { "id": "failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5", "name": "Meta-Llama-3-70B-Instruct-abliterated-v3.5", "developer": "failspy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7747, "hfopenllm_v2/BBH": 0.5747, "hfopenllm_v2/MATH Level 5": 0.1284, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.3982, "hfopenllm_v2/MMLU-PRO": 0.4452 } }, { "id": "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3", "name": "Meta-Llama-3-8B-Instruct-abliterated-v3", "developer": "failspy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7245, "hfopenllm_v2/BBH": 0.4925, "hfopenllm_v2/MATH Level 5": 0.0959, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3622, "hfopenllm_v2/MMLU-PRO": 0.3654 } }, { "id": "failspy/Phi-3-medium-4k-instruct-abliterated-v3", "name": "Phi-3-medium-4k-instruct-abliterated-v3", "developer": "failspy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6319, "hfopenllm_v2/BBH": 0.6305, "hfopenllm_v2/MATH Level 5": 0.1594, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4604, "hfopenllm_v2/MMLU-PRO": 0.44 } }, { "id": "FallenMerick/Chewy-Lemon-Cookie-11B", "name": "Chewy-Lemon-Cookie-11B", "developer": "FallenMerick", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4875, "hfopenllm_v2/BBH": 0.5251, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.4546, "hfopenllm_v2/MMLU-PRO": 0.3267 } }, { "id": "fblgit/cybertron-v4-qw7B-MGS", "name": "cybertron-v4-qw7B-MGS", "developer": "fblgit", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6264, "hfopenllm_v2/BBH": 0.5592, "hfopenllm_v2/MATH Level 5": 0.3489, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4371, "hfopenllm_v2/MMLU-PRO": 0.4473 } }, { "id": "fblgit/cybertron-v4-qw7B-UNAMGS", "name": "cybertron-v4-qw7B-UNAMGS", "developer": "fblgit", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.609, "hfopenllm_v2/BBH": 0.5643, "hfopenllm_v2/MATH Level 5": 0.3731, "hfopenllm_v2/GPQA": 0.3314, "hfopenllm_v2/MUSR": 0.4343, "hfopenllm_v2/MMLU-PRO": 0.45 } }, { "id": "fblgit/juanako-7b-UNA", "name": "juanako-7b-UNA", "developer": "fblgit", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4837, "hfopenllm_v2/BBH": 0.507, "hfopenllm_v2/MATH Level 5": 0.034, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4645, "hfopenllm_v2/MMLU-PRO": 0.2771 } }, { "id": "fblgit/miniclaus-qw1.5B-UNAMGS", "name": "miniclaus-qw1.5B-UNAMGS", "developer": "fblgit", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3348, "hfopenllm_v2/BBH": 0.4239, "hfopenllm_v2/MATH Level 5": 0.1088, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4293, "hfopenllm_v2/MMLU-PRO": 0.2937 } }, { "id": "fblgit/miniclaus-qw1.5B-UNAMGS-GRPO", "name": "miniclaus-qw1.5B-UNAMGS-GRPO", "developer": "fblgit", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3518, "hfopenllm_v2/BBH": 0.4234, "hfopenllm_v2/MATH Level 5": 0.1103, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4254, "hfopenllm_v2/MMLU-PRO": 0.2945 } }, { "id": "fblgit/pancho-v1-qw25-3B-UNAMGS", "name": "pancho-v1-qw25-3B-UNAMGS", "developer": "fblgit", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5361, "hfopenllm_v2/BBH": 0.4926, "hfopenllm_v2/MATH Level 5": 0.1571, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4027, "hfopenllm_v2/MMLU-PRO": 0.3766 } }, { "id": "fblgit/TheBeagle-v2beta-32B-MGS", "name": "TheBeagle-v2beta-32B-MGS", "developer": "fblgit", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5181, "hfopenllm_v2/BBH": 0.7033, "hfopenllm_v2/MATH Level 5": 0.4947, "hfopenllm_v2/GPQA": 0.3826, "hfopenllm_v2/MUSR": 0.5008, "hfopenllm_v2/MMLU-PRO": 0.5915 } }, { "id": "fblgit/una-cybertron-7b-v2-bf16", "name": "una-cybertron-7b-v2-bf16", "developer": "fblgit", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4737, "hfopenllm_v2/BBH": 0.3973, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4473, "hfopenllm_v2/MMLU-PRO": 0.2443 } }, { "id": "fblgit/UNA-SimpleSmaug-34b-v1beta", "name": "UNA-SimpleSmaug-34b-v1beta", "developer": "fblgit", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4556, "hfopenllm_v2/BBH": 0.5287, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4256, "hfopenllm_v2/MMLU-PRO": 0.454 } }, { "id": "fblgit/UNA-TheBeagle-7b-v1", "name": "UNA-TheBeagle-7b-v1", "developer": "fblgit", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3689, "hfopenllm_v2/BBH": 0.5029, "hfopenllm_v2/MATH Level 5": 0.077, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4564, "hfopenllm_v2/MMLU-PRO": 0.3019 } }, { "id": "fblgit/UNA-ThePitbull-21.4B-v2", "name": "UNA-ThePitbull-21.4B-v2", "developer": "fblgit", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.379, "hfopenllm_v2/BBH": 0.635, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.3922, "hfopenllm_v2/MMLU-PRO": 0.3516 } }, { "id": "Felladrin/Llama-160M-Chat-v1", "name": "Llama-160M-Chat-v1", "developer": "Felladrin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1575, "hfopenllm_v2/BBH": 0.3036, "hfopenllm_v2/MATH Level 5": 0.006, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3661, "hfopenllm_v2/MMLU-PRO": 0.1136 } }, { "id": "Felladrin/Minueza-32M-UltraChat", "name": "Minueza-32M-UltraChat", "developer": "Felladrin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1376, "hfopenllm_v2/BBH": 0.2941, "hfopenllm_v2/MATH Level 5": 0.0045, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3742, "hfopenllm_v2/MMLU-PRO": 0.1133 } }, { "id": "fhai50032/RolePlayLake-7B", "name": "RolePlayLake-7B", "developer": "fhai50032", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5057, "hfopenllm_v2/BBH": 0.5252, "hfopenllm_v2/MATH Level 5": 0.0725, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4459, "hfopenllm_v2/MMLU-PRO": 0.316 } }, { "id": "fhai50032/Unaligned-Thinker-PHI-4", "name": "Unaligned-Thinker-PHI-4", "developer": "fhai50032", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0563, "hfopenllm_v2/BBH": 0.6643, "hfopenllm_v2/MATH Level 5": 0.3353, "hfopenllm_v2/GPQA": 0.3809, "hfopenllm_v2/MUSR": 0.4679, "hfopenllm_v2/MMLU-PRO": 0.5147 } }, { "id": "FINGU-AI/Chocolatine-Fusion-14B", "name": "Chocolatine-Fusion-14B", "developer": "FINGU-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6949, "hfopenllm_v2/BBH": 0.6413, "hfopenllm_v2/MATH Level 5": 0.3852, "hfopenllm_v2/GPQA": 0.3716, "hfopenllm_v2/MUSR": 0.494, "hfopenllm_v2/MMLU-PRO": 0.5262 } }, { "id": "FINGU-AI/L3-8B", "name": "L3-8B", "developer": "FINGU-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7517, "hfopenllm_v2/BBH": 0.4986, "hfopenllm_v2/MATH Level 5": 0.2545, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.3828, "hfopenllm_v2/MMLU-PRO": 0.3639 } }, { "id": "FINGU-AI/Phi-4-RRStock", "name": "Phi-4-RRStock", "developer": "FINGU-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2855, "hfopenllm_v2/BBH": 0.6443, "hfopenllm_v2/MATH Level 5": 0.0582, "hfopenllm_v2/GPQA": 0.38, "hfopenllm_v2/MUSR": 0.4479, "hfopenllm_v2/MMLU-PRO": 0.4883 } }, { "id": "FINGU-AI/Q-Small-3B", "name": "Q-Small-3B", "developer": "FINGU-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4145, "hfopenllm_v2/BBH": 0.4319, "hfopenllm_v2/MATH Level 5": 0.0831, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.4005, "hfopenllm_v2/MMLU-PRO": 0.279 } }, { "id": "FINGU-AI/QwQ-Buddy-32B-Alpha", "name": "QwQ-Buddy-32B-Alpha", "developer": "FINGU-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3446, "hfopenllm_v2/BBH": 0.6424, "hfopenllm_v2/MATH Level 5": 0.3852, "hfopenllm_v2/GPQA": 0.3792, "hfopenllm_v2/MUSR": 0.506, "hfopenllm_v2/MMLU-PRO": 0.5294 } }, { "id": "FINGU-AI/RomboUltima-32B", "name": "RomboUltima-32B", "developer": "FINGU-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6672, "hfopenllm_v2/BBH": 0.6938, "hfopenllm_v2/MATH Level 5": 0.5385, "hfopenllm_v2/GPQA": 0.3716, "hfopenllm_v2/MUSR": 0.4836, "hfopenllm_v2/MMLU-PRO": 0.5789 } }, { "id": "FINGU-AI/Ultimos-32B", "name": "Ultimos-32B", "developer": "FINGU-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1592, "hfopenllm_v2/BBH": 0.2906, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.3286, "hfopenllm_v2/MMLU-PRO": 0.1111 } }, { "id": "flammenai/flammen15-gutenberg-DPO-v1-7B", "name": "flammen15-gutenberg-DPO-v1-7B", "developer": "flammenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4798, "hfopenllm_v2/BBH": 0.5203, "hfopenllm_v2/MATH Level 5": 0.0763, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4293, "hfopenllm_v2/MMLU-PRO": 0.3186 } }, { "id": "flammenai/Llama3.1-Flammades-70B", "name": "Llama3.1-Flammades-70B", "developer": "flammenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7058, "hfopenllm_v2/BBH": 0.666, "hfopenllm_v2/MATH Level 5": 0.2092, "hfopenllm_v2/GPQA": 0.354, "hfopenllm_v2/MUSR": 0.4871, "hfopenllm_v2/MMLU-PRO": 0.4752 } }, { "id": "flammenai/Mahou-1.2a-llama3-8B", "name": "Mahou-1.2a-llama3-8B", "developer": "flammenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5093, "hfopenllm_v2/BBH": 0.5094, "hfopenllm_v2/MATH Level 5": 0.0838, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.3847, "hfopenllm_v2/MMLU-PRO": 0.3817 } }, { "id": "flammenai/Mahou-1.2a-mistral-7B", "name": "Mahou-1.2a-mistral-7B", "developer": "flammenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4552, "hfopenllm_v2/BBH": 0.5118, "hfopenllm_v2/MATH Level 5": 0.0687, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3896, "hfopenllm_v2/MMLU-PRO": 0.3163 } }, { "id": "flammenai/Mahou-1.5-llama3.1-70B", "name": "Mahou-1.5-llama3.1-70B", "developer": "flammenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7147, "hfopenllm_v2/BBH": 0.6651, "hfopenllm_v2/MATH Level 5": 0.21, "hfopenllm_v2/GPQA": 0.354, "hfopenllm_v2/MUSR": 0.495, "hfopenllm_v2/MMLU-PRO": 0.4749 } }, { "id": "flammenai/Mahou-1.5-mistral-nemo-12B", "name": "Mahou-1.5-mistral-nemo-12B", "developer": "flammenai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6751, "hfopenllm_v2/BBH": 0.5522, "hfopenllm_v2/MATH Level 5": 0.0869, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.452, "hfopenllm_v2/MMLU-PRO": 0.3602 } }, { "id": "FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", "name": "100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3083, "hfopenllm_v2/BBH": 0.3323, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3302, "hfopenllm_v2/MMLU-PRO": 0.1498 } }, { "id": "FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", "name": "10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5097, "hfopenllm_v2/BBH": 0.5215, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.431, "hfopenllm_v2/MMLU-PRO": 0.3769 } }, { "id": "FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", "name": "10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2815, "hfopenllm_v2/BBH": 0.3306, "hfopenllm_v2/MATH Level 5": 0.031, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3302, "hfopenllm_v2/MMLU-PRO": 0.1541 } }, { "id": "FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", "name": "40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3016, "hfopenllm_v2/BBH": 0.3325, "hfopenllm_v2/MATH Level 5": 0.0332, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3408, "hfopenllm_v2/MMLU-PRO": 0.1485 } }, { "id": "FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", "name": "83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2869, "hfopenllm_v2/BBH": 0.3347, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3289, "hfopenllm_v2/MMLU-PRO": 0.1555 } }, { "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb", "name": "smollm2-135M_pretrained_1000k_fineweb", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1485, "hfopenllm_v2/BBH": 0.2918, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3581, "hfopenllm_v2/MMLU-PRO": 0.1164 } }, { "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed", "name": "smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1554, "hfopenllm_v2/BBH": 0.3066, "hfopenllm_v2/MATH Level 5": 0.006, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.358, "hfopenllm_v2/MMLU-PRO": 0.1143 } }, { "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected", "name": "smollm2-135M_pretrained_1000k_fineweb_uncovai_selected", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1468, "hfopenllm_v2/BBH": 0.2932, "hfopenllm_v2/MATH Level 5": 0.0068, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.4048, "hfopenllm_v2/MMLU-PRO": 0.1157 } }, { "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb", "name": "smollm2-135M_pretrained_1200k_fineweb", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1581, "hfopenllm_v2/BBH": 0.2941, "hfopenllm_v2/MATH Level 5": 0.0068, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3714, "hfopenllm_v2/MMLU-PRO": 0.1076 } }, { "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed", "name": "smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1578, "hfopenllm_v2/BBH": 0.295, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.37, "hfopenllm_v2/MMLU-PRO": 0.1139 } }, { "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected", "name": "smollm2-135M_pretrained_1200k_fineweb_uncovai_selected", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1585, "hfopenllm_v2/BBH": 0.296, "hfopenllm_v2/MATH Level 5": 0.0076, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3567, "hfopenllm_v2/MMLU-PRO": 0.1164 } }, { "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb", "name": "smollm2-135M_pretrained_1400k_fineweb", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1764, "hfopenllm_v2/BBH": 0.2922, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3873, "hfopenllm_v2/MMLU-PRO": 0.108 } }, { "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed", "name": "smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1707, "hfopenllm_v2/BBH": 0.2992, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3939, "hfopenllm_v2/MMLU-PRO": 0.1105 } }, { "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected", "name": "smollm2-135M_pretrained_1400k_fineweb_uncovai_selected", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1538, "hfopenllm_v2/BBH": 0.2917, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3741, "hfopenllm_v2/MMLU-PRO": 0.1137 } }, { "id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed", "name": "smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1475, "hfopenllm_v2/BBH": 0.3029, "hfopenllm_v2/MATH Level 5": 0.0038, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3578, "hfopenllm_v2/MMLU-PRO": 0.112 } }, { "id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected", "name": "smollm2-135M_pretrained_200k_fineweb_uncovai_selected", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1345, "hfopenllm_v2/BBH": 0.2927, "hfopenllm_v2/MATH Level 5": 0.0076, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.366, "hfopenllm_v2/MMLU-PRO": 0.1131 } }, { "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb", "name": "smollm2-135M_pretrained_400k_fineweb", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1511, "hfopenllm_v2/BBH": 0.2972, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3794, "hfopenllm_v2/MMLU-PRO": 0.1163 } }, { "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed", "name": "smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1556, "hfopenllm_v2/BBH": 0.3049, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.255, "hfopenllm_v2/MUSR": 0.386, "hfopenllm_v2/MMLU-PRO": 0.1138 } }, { "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected", "name": "smollm2-135M_pretrained_400k_fineweb_uncovai_selected", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1584, "hfopenllm_v2/BBH": 0.2925, "hfopenllm_v2/MATH Level 5": 0.0068, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.382, "hfopenllm_v2/MMLU-PRO": 0.1158 } }, { "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb", "name": "smollm2-135M_pretrained_600k_fineweb", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1639, "hfopenllm_v2/BBH": 0.3014, "hfopenllm_v2/MATH Level 5": 0.006, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3809, "hfopenllm_v2/MMLU-PRO": 0.1126 } }, { "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed", "name": "smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1641, "hfopenllm_v2/BBH": 0.3, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3793, "hfopenllm_v2/MMLU-PRO": 0.1147 } }, { "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected", "name": "smollm2-135M_pretrained_600k_fineweb_uncovai_selected", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1606, "hfopenllm_v2/BBH": 0.2983, "hfopenllm_v2/MATH Level 5": 0.0076, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3846, "hfopenllm_v2/MMLU-PRO": 0.1162 } }, { "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb", "name": "smollm2-135M_pretrained_800k_fineweb", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1641, "hfopenllm_v2/BBH": 0.2959, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.3701, "hfopenllm_v2/MMLU-PRO": 0.1152 } }, { "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed", "name": "smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1623, "hfopenllm_v2/BBH": 0.3038, "hfopenllm_v2/MATH Level 5": 0.0068, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3993, "hfopenllm_v2/MMLU-PRO": 0.1138 } }, { "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected", "name": "smollm2-135M_pretrained_800k_fineweb_uncovai_selected", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1474, "hfopenllm_v2/BBH": 0.2943, "hfopenllm_v2/MATH Level 5": 0.0045, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3766, "hfopenllm_v2/MMLU-PRO": 0.113 } }, { "id": "FlofloB/smollm2_pretrained_200k_fineweb", "name": "smollm2_pretrained_200k_fineweb", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1527, "hfopenllm_v2/BBH": 0.2995, "hfopenllm_v2/MATH Level 5": 0.0038, "hfopenllm_v2/GPQA": 0.2475, "hfopenllm_v2/MUSR": 0.3699, "hfopenllm_v2/MMLU-PRO": 0.1159 } }, { "id": "FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", "name": "test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit", "developer": "FlofloB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5215, "hfopenllm_v2/BBH": 0.5241, "hfopenllm_v2/MATH Level 5": 0.1103, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4244, "hfopenllm_v2/MMLU-PRO": 0.3721 } }, { "id": "fluently-lm/FluentlyLM-Prinum", "name": "FluentlyLM-Prinum", "developer": "fluently-lm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.809, "hfopenllm_v2/BBH": 0.7144, "hfopenllm_v2/MATH Level 5": 0.54, "hfopenllm_v2/GPQA": 0.3867, "hfopenllm_v2/MUSR": 0.4471, "hfopenllm_v2/MMLU-PRO": 0.5808 } }, { "id": "fluently-lm/Llama-TI-8B", "name": "Llama-TI-8B", "developer": "fluently-lm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.288, "hfopenllm_v2/BBH": 0.5201, "hfopenllm_v2/MATH Level 5": 0.1964, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4103, "hfopenllm_v2/MMLU-PRO": 0.344 } }, { "id": "fluently-lm/Llama-TI-8B-Instruct", "name": "Llama-TI-8B-Instruct", "developer": "fluently-lm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7716, "hfopenllm_v2/BBH": 0.5252, "hfopenllm_v2/MATH Level 5": 0.2304, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.3813, "hfopenllm_v2/MMLU-PRO": 0.3726 } }, { "id": "fluently-sets/FalconThink3-10B-IT", "name": "FalconThink3-10B-IT", "developer": "fluently-sets", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7326, "hfopenllm_v2/BBH": 0.62, "hfopenllm_v2/MATH Level 5": 0.2447, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.4479, "hfopenllm_v2/MMLU-PRO": 0.4435 } }, { "id": "fluently-sets/reasoning-1-1k-demo", "name": "reasoning-1-1k-demo", "developer": "fluently-sets", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7525, "hfopenllm_v2/BBH": 0.6397, "hfopenllm_v2/MATH Level 5": 0.4282, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.4061, "hfopenllm_v2/MMLU-PRO": 0.4774 } }, { "id": "formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp", "name": "mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp", "developer": "formulae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1614, "hfopenllm_v2/BBH": 0.2976, "hfopenllm_v2/MATH Level 5": 0.0015, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.4219, "hfopenllm_v2/MMLU-PRO": 0.1174 } }, { "id": "formulae/mita-elite-v1.1-7b-2-25-2025", "name": "mita-elite-v1.1-7b-2-25-2025", "developer": "formulae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.125, "hfopenllm_v2/BBH": 0.2867, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.3487, "hfopenllm_v2/MMLU-PRO": 0.1098 } }, { "id": "formulae/mita-elite-v1.1-gen2-7b-2-25-2025", "name": "mita-elite-v1.1-gen2-7b-2-25-2025", "developer": "formulae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1411, "hfopenllm_v2/BBH": 0.2924, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3541, "hfopenllm_v2/MMLU-PRO": 0.1101 } }, { "id": "formulae/mita-elite-v1.2-7b-2-26-2025", "name": "mita-elite-v1.2-7b-2-26-2025", "developer": "formulae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.148, "hfopenllm_v2/BBH": 0.293, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.4287, "hfopenllm_v2/MMLU-PRO": 0.1186 } }, { "id": "formulae/mita-gen3-7b-2-26-2025", "name": "mita-gen3-7b-2-26-2025", "developer": "formulae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1964, "hfopenllm_v2/BBH": 0.2916, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3912, "hfopenllm_v2/MMLU-PRO": 0.1124 } }, { "id": "formulae/mita-gen3-v1.2-7b-2-26-2025", "name": "mita-gen3-v1.2-7b-2-26-2025", "developer": "formulae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2044, "hfopenllm_v2/BBH": 0.3058, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.39, "hfopenllm_v2/MMLU-PRO": 0.1128 } }, { "id": "formulae/mita-math-v2.3-2-25-2025", "name": "mita-math-v2.3-2-25-2025", "developer": "formulae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1373, "hfopenllm_v2/BBH": 0.2949, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.3698, "hfopenllm_v2/MMLU-PRO": 0.1118 } }, { "id": "formulae/mita-v1-7b", "name": "mita-v1-7b", "developer": "formulae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1972, "hfopenllm_v2/BBH": 0.3003, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.4152, "hfopenllm_v2/MMLU-PRO": 0.1147 } }, { "id": "formulae/mita-v1.1-7b-2-24-2025", "name": "mita-v1.1-7b-2-24-2025", "developer": "formulae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3412, "hfopenllm_v2/BBH": 0.5442, "hfopenllm_v2/MATH Level 5": 0.435, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4557, "hfopenllm_v2/MMLU-PRO": 0.4524 } }, { "id": "formulae/mita-v1.2-7b-2-24-2025", "name": "mita-v1.2-7b-2-24-2025", "developer": "formulae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2564, "hfopenllm_v2/BBH": 0.4919, "hfopenllm_v2/MATH Level 5": 0.4879, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4344, "hfopenllm_v2/MMLU-PRO": 0.3359 } }, { "id": "frameai/Loxa-4B", "name": "Loxa-4B", "developer": "frameai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4765, "hfopenllm_v2/BBH": 0.4217, "hfopenllm_v2/MATH Level 5": 0.1095, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3377, "hfopenllm_v2/MMLU-PRO": 0.2802 } }, { "id": "freewheelin/free-evo-qwen72b-v0.8-re", "name": "free-evo-qwen72b-v0.8-re", "developer": "freewheelin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5331, "hfopenllm_v2/BBH": 0.6127, "hfopenllm_v2/MATH Level 5": 0.1805, "hfopenllm_v2/GPQA": 0.3565, "hfopenllm_v2/MUSR": 0.4872, "hfopenllm_v2/MMLU-PRO": 0.487 } }, { "id": "freewheelin/free-solar-evo-v0.1", "name": "free-solar-evo-v0.1", "developer": "freewheelin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.205, "hfopenllm_v2/BBH": 0.4502, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4946, "hfopenllm_v2/MMLU-PRO": 0.3414 } }, { "id": "freewheelin/free-solar-evo-v0.11", "name": "free-solar-evo-v0.11", "developer": "freewheelin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2027, "hfopenllm_v2/BBH": 0.4545, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.5052, "hfopenllm_v2/MMLU-PRO": 0.3467 } }, { "id": "freewheelin/free-solar-evo-v0.13", "name": "free-solar-evo-v0.13", "developer": "freewheelin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2321, "hfopenllm_v2/BBH": 0.4555, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.5052, "hfopenllm_v2/MMLU-PRO": 0.347 } }, { "id": "FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs", "name": "ft-openhermes-25-mistral-7b-irca-dpo-pairs", "developer": "FuJhen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.542, "hfopenllm_v2/BBH": 0.4773, "hfopenllm_v2/MATH Level 5": 0.0483, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.4174, "hfopenllm_v2/MMLU-PRO": 0.2956 } }, { "id": "FuJhen/mistral-instruct-7B-DPO", "name": "mistral-instruct-7B-DPO", "developer": "FuJhen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4968, "hfopenllm_v2/BBH": 0.4624, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4016, "hfopenllm_v2/MMLU-PRO": 0.3034 } }, { "id": "FuJhen/mistral_7b_v0.1_structedData_e2e", "name": "mistral_7b_v0.1_structedData_e2e", "developer": "FuJhen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1727, "hfopenllm_v2/BBH": 0.4114, "hfopenllm_v2/MATH Level 5": 0.0045, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3723, "hfopenllm_v2/MMLU-PRO": 0.2811 } }, { "id": "FuJhen/mistral_7b_v0.1_structedData_viggo", "name": "mistral_7b_v0.1_structedData_viggo", "developer": "FuJhen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1783, "hfopenllm_v2/BBH": 0.4524, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3738, "hfopenllm_v2/MMLU-PRO": 0.2942 } }, { "id": "fulim/FineLlama-3.1-8B", "name": "FineLlama-3.1-8B", "developer": "fulim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1439, "hfopenllm_v2/BBH": 0.4569, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.3867, "hfopenllm_v2/MMLU-PRO": 0.3167 } }, { "id": "FuseAI/FuseChat-7B-v2.0", "name": "FuseChat-7B-v2.0", "developer": "FuseAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3423, "hfopenllm_v2/BBH": 0.4954, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4797, "hfopenllm_v2/MMLU-PRO": 0.3162 } }, { "id": "FuseAI/FuseChat-Llama-3.1-8B-Instruct", "name": "FuseChat-Llama-3.1-8B-Instruct", "developer": "FuseAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7205, "hfopenllm_v2/BBH": 0.512, "hfopenllm_v2/MATH Level 5": 0.2477, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.382, "hfopenllm_v2/MMLU-PRO": 0.3733 } }, { "id": "FuseAI/FuseChat-Llama-3.2-3B-Instruct", "name": "FuseChat-Llama-3.2-3B-Instruct", "developer": "FuseAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6849, "hfopenllm_v2/BBH": 0.4658, "hfopenllm_v2/MATH Level 5": 0.2424, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.3914, "hfopenllm_v2/MMLU-PRO": 0.3132 } }, { "id": "FuseAI/FuseChat-Qwen-2.5-7B-Instruct", "name": "FuseChat-Qwen-2.5-7B-Instruct", "developer": "FuseAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5906, "hfopenllm_v2/BBH": 0.5526, "hfopenllm_v2/MATH Level 5": 0.4562, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.3874, "hfopenllm_v2/MMLU-PRO": 0.4118 } }, { "id": "gabrielmbmb/SmolLM-1.7B-Instruct-IFEval", "name": "SmolLM-1.7B-Instruct-IFEval", "developer": "gabrielmbmb", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2306, "hfopenllm_v2/BBH": 0.3138, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3328, "hfopenllm_v2/MMLU-PRO": 0.1156 } }, { "id": "GalrionSoftworks/MagnusIntellectus-12B-v1", "name": "MagnusIntellectus-12B-v1", "developer": "GalrionSoftworks", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4421, "hfopenllm_v2/BBH": 0.5323, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4428, "hfopenllm_v2/MMLU-PRO": 0.3421 } }, { "id": "GalrionSoftworks/MN-LooseCannon-12B-v1", "name": "MN-LooseCannon-12B-v1", "developer": "GalrionSoftworks", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5418, "hfopenllm_v2/BBH": 0.5128, "hfopenllm_v2/MATH Level 5": 0.0853, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4138, "hfopenllm_v2/MMLU-PRO": 0.3196 } }, { "id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA", "name": "Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA", "developer": "gaverfraxz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4009, "hfopenllm_v2/BBH": 0.3985, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.365, "hfopenllm_v2/MMLU-PRO": 0.1654 } }, { "id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES", "name": "Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES", "developer": "gaverfraxz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4551, "hfopenllm_v2/BBH": 0.5044, "hfopenllm_v2/MATH Level 5": 0.1299, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3738, "hfopenllm_v2/MMLU-PRO": 0.3679 } }, { "id": "gbueno86/Brinebreath-Llama-3.1-70B", "name": "Brinebreath-Llama-3.1-70B", "developer": "gbueno86", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5533, "hfopenllm_v2/BBH": 0.6881, "hfopenllm_v2/MATH Level 5": 0.2976, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.4541, "hfopenllm_v2/MMLU-PRO": 0.5196 } }, { "id": "gbueno86/Meta-LLama-3-Cat-Smaug-LLama-70b", "name": "Meta-LLama-3-Cat-Smaug-LLama-70b", "developer": "gbueno86", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8072, "hfopenllm_v2/BBH": 0.6674, "hfopenllm_v2/MATH Level 5": 0.2938, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.4368, "hfopenllm_v2/MMLU-PRO": 0.5075 } }, { "id": "gemini-1.5-flash-8b", "name": "gemini-1.5-flash-8b", "developer": "unknown", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7601, "reward-bench/Chat": 0.9441, "reward-bench/Chat Hard": 0.5987, "reward-bench/Safety": 0.7399, "reward-bench/Reasoning": 0.7575 } }, { "id": "general-preference/GPM-Gemma-2B", "name": "general-preference/GPM-Gemma-2B", "developer": "general-preference", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7449, "reward-bench/Chat": 0.7151, "reward-bench/Chat Hard": 0.6974, "reward-bench/Safety": 0.8122, "reward-bench/Reasoning": 0.755 } }, { "id": "general-preference/GPM-Llama-3.1-8B", "name": "general-preference/GPM-Llama-3.1-8B", "developer": "general-preference", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9224, "reward-bench/Chat": 0.933, "reward-bench/Chat Hard": 0.886, "reward-bench/Safety": 0.9108, "reward-bench/Reasoning": 0.9597 } }, { "id": "GenVRadmin/AryaBhatta-GemmaOrca-2-Merged", "name": "AryaBhatta-GemmaOrca-2-Merged", "developer": "GenVRadmin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3064, "hfopenllm_v2/BBH": 0.3887, "hfopenllm_v2/MATH Level 5": 0.0498, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.455, "hfopenllm_v2/MMLU-PRO": 0.2384 } }, { "id": "GenVRadmin/AryaBhatta-GemmaOrca-Merged", "name": "AryaBhatta-GemmaOrca-Merged", "developer": "GenVRadmin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3064, "hfopenllm_v2/BBH": 0.4131, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3524, "hfopenllm_v2/MMLU-PRO": 0.2228 } }, { "id": "GenVRadmin/AryaBhatta-GemmaUltra-Merged", "name": "AryaBhatta-GemmaUltra-Merged", "developer": "GenVRadmin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3021, "hfopenllm_v2/BBH": 0.4141, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.4279, "hfopenllm_v2/MMLU-PRO": 0.2266 } }, { "id": "GenVRadmin/llama38bGenZ_Vikas-Merged", "name": "llama38bGenZ_Vikas-Merged", "developer": "GenVRadmin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3, "hfopenllm_v2/BBH": 0.4536, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4402, "hfopenllm_v2/MMLU-PRO": 0.2622 } }, { "id": "ghost-x/ghost-8b-beta-1608", "name": "ghost-8b-beta-1608", "developer": "ghost-x", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4273, "hfopenllm_v2/BBH": 0.4517, "hfopenllm_v2/MATH Level 5": 0.0695, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3516, "hfopenllm_v2/MMLU-PRO": 0.284 } }, { "id": "glaiveai/Reflection-Llama-3.1-70B", "name": "Reflection-Llama-3.1-70B", "developer": "glaiveai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5991, "hfopenllm_v2/BBH": 0.5681, "hfopenllm_v2/MATH Level 5": 0.2757, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.438, "hfopenllm_v2/MMLU-PRO": 0.6341 } }, { "id": "gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES", "name": "gemma2-9b-sahabatai-v1-instruct-BaseTIES", "developer": "gmonsoon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7378, "hfopenllm_v2/BBH": 0.6077, "hfopenllm_v2/MATH Level 5": 0.1994, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4778, "hfopenllm_v2/MMLU-PRO": 0.4347 } }, { "id": "gmonsoon/SahabatAI-Llama-11B-Test", "name": "SahabatAI-Llama-11B-Test", "developer": "gmonsoon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3376, "hfopenllm_v2/BBH": 0.4728, "hfopenllm_v2/MATH Level 5": 0.031, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4001, "hfopenllm_v2/MMLU-PRO": 0.3182 } }, { "id": "gmonsoon/SahabatAI-MediChatIndo-8B-v1", "name": "SahabatAI-MediChatIndo-8B-v1", "developer": "gmonsoon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4163, "hfopenllm_v2/BBH": 0.4509, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.3754, "hfopenllm_v2/MMLU-PRO": 0.3108 } }, { "id": "gmonsoon/SahabatAI-Rebase-8B-Test", "name": "SahabatAI-Rebase-8B-Test", "developer": "gmonsoon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5156, "hfopenllm_v2/BBH": 0.523, "hfopenllm_v2/MATH Level 5": 0.1148, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.4133, "hfopenllm_v2/MMLU-PRO": 0.3664 } }, { "id": "gmonsoon/StockSeaLLMs-7B-v1", "name": "StockSeaLLMs-7B-v1", "developer": "gmonsoon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4599, "hfopenllm_v2/BBH": 0.5271, "hfopenllm_v2/MATH Level 5": 0.1964, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4214, "hfopenllm_v2/MMLU-PRO": 0.3952 } }, { "id": "godlikehhd/alpaca_data_full_2", "name": "alpaca_data_full_2", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3178, "hfopenllm_v2/BBH": 0.4217, "hfopenllm_v2/MATH Level 5": 0.0929, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4052, "hfopenllm_v2/MMLU-PRO": 0.2854 } }, { "id": "godlikehhd/alpaca_data_full_3B", "name": "alpaca_data_full_3B", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3696, "hfopenllm_v2/BBH": 0.4684, "hfopenllm_v2/MATH Level 5": 0.1337, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4955, "hfopenllm_v2/MMLU-PRO": 0.3357 } }, { "id": "godlikehhd/alpaca_data_ifd_max_2600", "name": "alpaca_data_ifd_max_2600", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3043, "hfopenllm_v2/BBH": 0.4029, "hfopenllm_v2/MATH Level 5": 0.0989, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.3509, "hfopenllm_v2/MMLU-PRO": 0.2916 } }, { "id": "godlikehhd/alpaca_data_ifd_max_2600_3B", "name": "alpaca_data_ifd_max_2600_3B", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2982, "hfopenllm_v2/BBH": 0.4626, "hfopenllm_v2/MATH Level 5": 0.1594, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.4346, "hfopenllm_v2/MMLU-PRO": 0.3288 } }, { "id": "godlikehhd/alpaca_data_ifd_me_max_5200", "name": "alpaca_data_ifd_me_max_5200", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3683, "hfopenllm_v2/BBH": 0.4153, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.3483, "hfopenllm_v2/MMLU-PRO": 0.2982 } }, { "id": "godlikehhd/alpaca_data_ifd_min_2600", "name": "alpaca_data_ifd_min_2600", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.375, "hfopenllm_v2/BBH": 0.4219, "hfopenllm_v2/MATH Level 5": 0.0967, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.3656, "hfopenllm_v2/MMLU-PRO": 0.2893 } }, { "id": "godlikehhd/alpaca_data_ins_ans_max_5200", "name": "alpaca_data_ins_ans_max_5200", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3479, "hfopenllm_v2/BBH": 0.4098, "hfopenllm_v2/MATH Level 5": 0.1027, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.3602, "hfopenllm_v2/MMLU-PRO": 0.2901 } }, { "id": "godlikehhd/alpaca_data_ins_max_5200", "name": "alpaca_data_ins_max_5200", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3275, "hfopenllm_v2/BBH": 0.4155, "hfopenllm_v2/MATH Level 5": 0.0997, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.3614, "hfopenllm_v2/MMLU-PRO": 0.2916 } }, { "id": "godlikehhd/alpaca_data_ins_min_2600", "name": "alpaca_data_ins_min_2600", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.333, "hfopenllm_v2/BBH": 0.4187, "hfopenllm_v2/MATH Level 5": 0.111, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.3853, "hfopenllm_v2/MMLU-PRO": 0.288 } }, { "id": "godlikehhd/alpaca_data_ins_min_5200", "name": "alpaca_data_ins_min_5200", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.336, "hfopenllm_v2/BBH": 0.4289, "hfopenllm_v2/MATH Level 5": 0.1035, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.3906, "hfopenllm_v2/MMLU-PRO": 0.2949 } }, { "id": "godlikehhd/alpaca_data_sampled_ifd_5200", "name": "alpaca_data_sampled_ifd_5200", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2924, "hfopenllm_v2/BBH": 0.4033, "hfopenllm_v2/MATH Level 5": 0.1254, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.3521, "hfopenllm_v2/MMLU-PRO": 0.2896 } }, { "id": "godlikehhd/alpaca_data_sampled_ifd_new_5200", "name": "alpaca_data_sampled_ifd_new_5200", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3663, "hfopenllm_v2/BBH": 0.4178, "hfopenllm_v2/MATH Level 5": 0.0944, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.3613, "hfopenllm_v2/MMLU-PRO": 0.2925 } }, { "id": "godlikehhd/alpaca_data_score_max_0.1_2600", "name": "alpaca_data_score_max_0.1_2600", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3288, "hfopenllm_v2/BBH": 0.4252, "hfopenllm_v2/MATH Level 5": 0.0989, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.3706, "hfopenllm_v2/MMLU-PRO": 0.2923 } }, { "id": "godlikehhd/alpaca_data_score_max_0.3_2600", "name": "alpaca_data_score_max_0.3_2600", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3375, "hfopenllm_v2/BBH": 0.4151, "hfopenllm_v2/MATH Level 5": 0.1035, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.3759, "hfopenllm_v2/MMLU-PRO": 0.2913 } }, { "id": "godlikehhd/alpaca_data_score_max_0.7_2600", "name": "alpaca_data_score_max_0.7_2600", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.364, "hfopenllm_v2/BBH": 0.4185, "hfopenllm_v2/MATH Level 5": 0.1073, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.3469, "hfopenllm_v2/MMLU-PRO": 0.2983 } }, { "id": "godlikehhd/alpaca_data_score_max_2500", "name": "alpaca_data_score_max_2500", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3564, "hfopenllm_v2/BBH": 0.418, "hfopenllm_v2/MATH Level 5": 0.0952, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.3627, "hfopenllm_v2/MMLU-PRO": 0.294 } }, { "id": "godlikehhd/alpaca_data_score_max_2600_3B", "name": "alpaca_data_score_max_2600_3B", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3358, "hfopenllm_v2/BBH": 0.4716, "hfopenllm_v2/MATH Level 5": 0.1548, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.4474, "hfopenllm_v2/MMLU-PRO": 0.3342 } }, { "id": "godlikehhd/alpaca_data_score_max_5200", "name": "alpaca_data_score_max_5200", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3445, "hfopenllm_v2/BBH": 0.4242, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.3878, "hfopenllm_v2/MMLU-PRO": 0.2945 } }, { "id": "godlikehhd/ifd_2500_qwen", "name": "ifd_2500_qwen", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3365, "hfopenllm_v2/BBH": 0.4298, "hfopenllm_v2/MATH Level 5": 0.0982, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.3615, "hfopenllm_v2/MMLU-PRO": 0.2921 } }, { "id": "godlikehhd/ifd_new_correct_all_sample_2500_qwen", "name": "ifd_new_correct_all_sample_2500_qwen", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3376, "hfopenllm_v2/BBH": 0.402, "hfopenllm_v2/MATH Level 5": 0.0959, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3562, "hfopenllm_v2/MMLU-PRO": 0.2889 } }, { "id": "godlikehhd/ifd_new_correct_sample_2500_qwen", "name": "ifd_new_correct_sample_2500_qwen", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3397, "hfopenllm_v2/BBH": 0.411, "hfopenllm_v2/MATH Level 5": 0.1042, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.3627, "hfopenllm_v2/MMLU-PRO": 0.2932 } }, { "id": "godlikehhd/ifd_new_qwen_2500", "name": "ifd_new_qwen_2500", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.324, "hfopenllm_v2/BBH": 0.416, "hfopenllm_v2/MATH Level 5": 0.1118, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.359, "hfopenllm_v2/MMLU-PRO": 0.2911 } }, { "id": "godlikehhd/qwen-2.5-1.5b-cherry", "name": "qwen-2.5-1.5b-cherry", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2893, "hfopenllm_v2/BBH": 0.4036, "hfopenllm_v2/MATH Level 5": 0.102, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.3456, "hfopenllm_v2/MMLU-PRO": 0.2923 } }, { "id": "godlikehhd/qwen_2.5-1.5b-cherry_new", "name": "qwen_2.5-1.5b-cherry_new", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.312, "hfopenllm_v2/BBH": 0.415, "hfopenllm_v2/MATH Level 5": 0.0967, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.3496, "hfopenllm_v2/MMLU-PRO": 0.2894 } }, { "id": "godlikehhd/qwen_full_data_alpaca", "name": "qwen_full_data_alpaca", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3136, "hfopenllm_v2/BBH": 0.4229, "hfopenllm_v2/MATH Level 5": 0.0921, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4052, "hfopenllm_v2/MMLU-PRO": 0.2851 } }, { "id": "godlikehhd/qwen_ins_ans_2500", "name": "qwen_ins_ans_2500", "developer": "godlikehhd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2698, "hfopenllm_v2/BBH": 0.4074, "hfopenllm_v2/MATH Level 5": 0.114, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.3589, "hfopenllm_v2/MMLU-PRO": 0.2809 } }, { "id": "Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1", "name": "j.o.s.i.e.v4o-1.5b-dpo-stage1-v1", "developer": "Goekdeniz-Guelmez", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4188, "hfopenllm_v2/BBH": 0.4124, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.3529, "hfopenllm_v2/MMLU-PRO": 0.2555 } }, { "id": "Goekdeniz-Guelmez/josie-3b-v6.0", "name": "josie-3b-v6.0", "developer": "Goekdeniz-Guelmez", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.601, "hfopenllm_v2/BBH": 0.4496, "hfopenllm_v2/MATH Level 5": 0.2938, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3861, "hfopenllm_v2/MMLU-PRO": 0.322 } }, { "id": "Goekdeniz-Guelmez/josie-7b-v6.0", "name": "josie-7b-v6.0", "developer": "Goekdeniz-Guelmez", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7412, "hfopenllm_v2/BBH": 0.5105, "hfopenllm_v2/MATH Level 5": 0.4358, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4154, "hfopenllm_v2/MMLU-PRO": 0.3807 } }, { "id": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000", "name": "josie-7b-v6.0-step2000", "developer": "Goekdeniz-Guelmez", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7628, "hfopenllm_v2/BBH": 0.5098, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.4579, "hfopenllm_v2/MMLU-PRO": 0.4033 } }, { "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", "name": "Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1", "developer": "Goekdeniz-Guelmez", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3472, "hfopenllm_v2/BBH": 0.3268, "hfopenllm_v2/MATH Level 5": 0.0891, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3262, "hfopenllm_v2/MMLU-PRO": 0.1641 } }, { "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1", "name": "Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1", "developer": "Goekdeniz-Guelmez", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4769, "hfopenllm_v2/BBH": 0.4186, "hfopenllm_v2/MATH Level 5": 0.2085, "hfopenllm_v2/GPQA": 0.2433, "hfopenllm_v2/MUSR": 0.3675, "hfopenllm_v2/MMLU-PRO": 0.2783 } }, { "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2", "name": "Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2", "developer": "Goekdeniz-Guelmez", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4216, "hfopenllm_v2/BBH": 0.4042, "hfopenllm_v2/MATH Level 5": 0.1269, "hfopenllm_v2/GPQA": 0.2399, "hfopenllm_v2/MUSR": 0.3769, "hfopenllm_v2/MMLU-PRO": 0.2562 } }, { "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3", "name": "Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3", "developer": "Goekdeniz-Guelmez", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4253, "hfopenllm_v2/BBH": 0.4053, "hfopenllm_v2/MATH Level 5": 0.1307, "hfopenllm_v2/GPQA": 0.2433, "hfopenllm_v2/MUSR": 0.3702, "hfopenllm_v2/MMLU-PRO": 0.2556 } }, { "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4", "name": "Josiefied-Qwen2.5-14B-Instruct-abliterated-v4", "developer": "Goekdeniz-Guelmez", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8292, "hfopenllm_v2/BBH": 0.6356, "hfopenllm_v2/MATH Level 5": 0.5423, "hfopenllm_v2/GPQA": 0.3423, "hfopenllm_v2/MUSR": 0.4287, "hfopenllm_v2/MMLU-PRO": 0.5018 } }, { "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", "name": "Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", "developer": "Goekdeniz-Guelmez", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7814, "hfopenllm_v2/BBH": 0.531, "hfopenllm_v2/MATH Level 5": 0.4532, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4354, "hfopenllm_v2/MMLU-PRO": 0.412 } }, { "id": "google/codegemma-1.1-2b", "name": "codegemma-1.1-2b", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2294, "hfopenllm_v2/BBH": 0.3353, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3871, "hfopenllm_v2/MMLU-PRO": 0.1278 } }, { "id": "google/flame-1.0-24B-july-2024", "name": "google/flame-1.0-24B-july-2024", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8781, "reward-bench/Chat": 0.9218, "reward-bench/Chat Hard": 0.7566, "reward-bench/Safety": 0.8959, "reward-bench/Reasoning": 0.938 } }, { "id": "google/flan-t5-base", "name": "flan-t5-base", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1891, "hfopenllm_v2/BBH": 0.3526, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2383, "hfopenllm_v2/MUSR": 0.3671, "hfopenllm_v2/MMLU-PRO": 0.1357 } }, { "id": "google/flan-t5-large", "name": "flan-t5-large", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2201, "hfopenllm_v2/BBH": 0.4153, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.4083, "hfopenllm_v2/MMLU-PRO": 0.1709 } }, { "id": "google/flan-t5-small", "name": "flan-t5-small", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1524, "hfopenllm_v2/BBH": 0.3283, "hfopenllm_v2/MATH Level 5": 0.0076, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.4123, "hfopenllm_v2/MMLU-PRO": 0.1233 } }, { "id": "google/flan-t5-xl", "name": "flan-t5-xl", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2207, "hfopenllm_v2/BBH": 0.4537, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.422, "hfopenllm_v2/MMLU-PRO": 0.2142 } }, { "id": "google/flan-t5-xxl", "name": "flan-t5-xxl", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.22, "hfopenllm_v2/BBH": 0.5066, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.4218, "hfopenllm_v2/MMLU-PRO": 0.2343 } }, { "id": "google/flan-ul2", "name": "flan-ul2", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2393, "hfopenllm_v2/BBH": 0.5054, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3844, "hfopenllm_v2/MMLU-PRO": 0.2493 } }, { "id": "google/Gemini 2.5 Flash", "name": "Gemini 2.5 Flash", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "ace/Overall Score": 0.38, "ace/Gaming Score": 0.284, "apex-v1/Overall Score": 0.604 } }, { "id": "google/Gemini 2.5 Pro", "name": "Gemini 2.5 Pro", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "ace/Overall Score": 0.4, "ace/Gaming Score": 0.285 } }, { "id": "google/Gemini 3 Flash", "name": "Gemini 3 Flash", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Overall Pass@1": 0.24, "apex-agents/Overall Pass@8": 0.367, "apex-agents/Overall Mean Score": 0.395, "apex-agents/Investment Banking Pass@1": 0.267, "apex-agents/Management Consulting Pass@1": 0.193, "apex-agents/Corporate Law Pass@1": 0.259, "apex-agents/Corporate Lawyer Mean Score": 0.524, "ace/Gaming Score": 0.415, "apex-v1/Overall Score": 0.64, "apex-v1/Consulting Score": 0.64 } }, { "id": "google/Gemini 3 Pro", "name": "Gemini 3 Pro", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Overall Pass@1": 0.184, "apex-agents/Overall Pass@8": 0.373, "apex-agents/Overall Mean Score": 0.341, "apex-agents/Investment Banking Pass@1": 0.188, "apex-agents/Management Consulting Pass@1": 0.124, "apex-agents/Corporate Law Pass@1": 0.239, "apex-agents/Corporate Lawyer Mean Score": 0.487, "ace/Overall Score": 0.47, "ace/Gaming Score": 0.509, "apex-v1/Overall Score": 0.643, "apex-v1/Consulting Score": 0.64, "apex-v1/Investment Banking Score": 0.63 } }, { "id": "google/Gemini 3.1 Pro", "name": "Gemini 3.1 Pro", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Overall Pass@1": 0.335, "apex-agents/Corporate Lawyer Mean Score": 0.494 } }, { "id": "google/gemini-1.0-pro-001", "name": "Gemini 1.0 Pro 001", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_mmlu/MMLU All Subjects": 0.7, "helm_mmlu/Abstract Algebra": 0.34, "helm_mmlu/Anatomy": 0.652, "helm_mmlu/College Physics": 0.333, "helm_mmlu/Computer Security": 0.84, "helm_mmlu/Econometrics": 0.553, "helm_mmlu/Global Facts": 0.49, "helm_mmlu/Jurisprudence": 0.861, "helm_mmlu/Philosophy": 0.762, "helm_mmlu/Professional Psychology": 0.752, "helm_mmlu/Us Foreign Policy": 0.89, "helm_mmlu/Astronomy": 0.796, "helm_mmlu/Business Ethics": 0.69, "helm_mmlu/Clinical Knowledge": 0.758, "helm_mmlu/Conceptual Physics": 0.706, "helm_mmlu/Electrical Engineering": 0.69, "helm_mmlu/Elementary Mathematics": 0.476, "helm_mmlu/Formal Logic": 0.468, "helm_mmlu/High School World History": 0.865, "helm_mmlu/Human Sexuality": 0.618, "helm_mmlu/International Law": 0.876, "helm_mmlu/Logical Fallacies": 0.804, "helm_mmlu/Machine Learning": 0.527, "helm_mmlu/Management": 0.845, "helm_mmlu/Marketing": 0.91, "helm_mmlu/Medical Genetics": 0.8, "helm_mmlu/Miscellaneous": 0.851, "helm_mmlu/Moral Scenarios": 0.46, "helm_mmlu/Nutrition": 0.788, "helm_mmlu/Prehistory": 0.802, "helm_mmlu/Public Relations": 0.691, "helm_mmlu/Security Studies": 0.804, "helm_mmlu/Sociology": 0.9, "helm_mmlu/Virology": 0.536, "helm_mmlu/World Religions": 0.86, "helm_mmlu/Mean win rate": 0.677 } }, { "id": "google/gemini-1.0-pro-002", "name": "Gemini 1.0 Pro 002", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.422, "helm_lite/NarrativeQA": 0.751, "helm_lite/NaturalQuestions (closed-book)": 0.391, "helm_lite/OpenbookQA": 0.788, "helm_lite/MMLU": 0.534, "helm_lite/MATH": 0.665, "helm_lite/GSM8K": 0.816, "helm_lite/LegalBench": 0.475, "helm_lite/MedQA": 0.483, "helm_lite/WMT 2014": 0.194 } }, { "id": "google/gemini-1.5-flash-001", "name": "Gemini 1.5 Flash 001", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.667, "helm_lite/NarrativeQA": 0.783, "helm_lite/NaturalQuestions (closed-book)": 0.332, "helm_lite/OpenbookQA": 0.928, "helm_lite/MMLU": 0.703, "helm_lite/MATH": 0.753, "helm_lite/GSM8K": 0.785, "helm_lite/LegalBench": 0.661, "helm_lite/MedQA": 0.68, "helm_lite/WMT 2014": 0.225, "helm_mmlu/MMLU All Subjects": 0.779, "helm_mmlu/Abstract Algebra": 0.58, "helm_mmlu/Anatomy": 0.8, "helm_mmlu/College Physics": 0.696, "helm_mmlu/Computer Security": 0.79, "helm_mmlu/Econometrics": 0.614, "helm_mmlu/Global Facts": 0.53, "helm_mmlu/Jurisprudence": 0.889, "helm_mmlu/Philosophy": 0.791, "helm_mmlu/Professional Psychology": 0.828, "helm_mmlu/Us Foreign Policy": 0.93, "helm_mmlu/Astronomy": 0.882, "helm_mmlu/Business Ethics": 0.81, "helm_mmlu/Clinical Knowledge": 0.834, "helm_mmlu/Conceptual Physics": 0.851, "helm_mmlu/Electrical Engineering": 0.8, "helm_mmlu/Elementary Mathematics": 0.754, "helm_mmlu/Formal Logic": 0.627, "helm_mmlu/High School World History": 0.907, "helm_mmlu/Human Sexuality": 0.374, "helm_mmlu/International Law": 0.901, "helm_mmlu/Logical Fallacies": 0.853, "helm_mmlu/Machine Learning": 0.571, "helm_mmlu/Management": 0.864, "helm_mmlu/Marketing": 0.94, "helm_mmlu/Medical Genetics": 0.86, "helm_mmlu/Miscellaneous": 0.886, "helm_mmlu/Moral Scenarios": 0.637, "helm_mmlu/Nutrition": 0.82, "helm_mmlu/Prehistory": 0.867, "helm_mmlu/Public Relations": 0.764, "helm_mmlu/Security Studies": 0.808, "helm_mmlu/Sociology": 0.915, "helm_mmlu/Virology": 0.566, "helm_mmlu/World Religions": 0.883, "helm_mmlu/Mean win rate": 0.47, "reward-bench/Score": 0.8054, "reward-bench/Chat": 0.9218, "reward-bench/Chat Hard": 0.6349, "reward-bench/Safety": 0.8696, "reward-bench/Reasoning": 0.8512, "reward-bench/Prior Sets (0.5 weight)": 0.6937 } }, { "id": "google/gemini-1.5-flash-002", "name": "Gemini 1.5 Flash 002", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.609, "helm_capabilities/MMLU-Pro": 0.678, "helm_capabilities/GPQA": 0.437, "helm_capabilities/IFEval": 0.831, "helm_capabilities/WildBench": 0.792, "helm_capabilities/Omni-MATH": 0.305, "helm_lite/Mean win rate": 0.573, "helm_lite/NarrativeQA": 0.746, "helm_lite/NaturalQuestions (closed-book)": 0.323, "helm_lite/OpenbookQA": 0.914, "helm_lite/MMLU": 0.679, "helm_lite/MATH": 0.908, "helm_lite/GSM8K": 0.328, "helm_lite/LegalBench": 0.67, "helm_lite/MedQA": 0.656, "helm_lite/WMT 2014": 0.212, "helm_mmlu/MMLU All Subjects": 0.739, "helm_mmlu/Abstract Algebra": 0.63, "helm_mmlu/Anatomy": 0.793, "helm_mmlu/College Physics": 0.637, "helm_mmlu/Computer Security": 0.72, "helm_mmlu/Econometrics": 0.675, "helm_mmlu/Global Facts": 0.47, "helm_mmlu/Jurisprudence": 0.852, "helm_mmlu/Philosophy": 0.797, "helm_mmlu/Professional Psychology": 0.806, "helm_mmlu/Us Foreign Policy": 0.81, "helm_mmlu/Astronomy": 0.895, "helm_mmlu/Business Ethics": 0.27, "helm_mmlu/Clinical Knowledge": 0.792, "helm_mmlu/Conceptual Physics": 0.851, "helm_mmlu/Electrical Engineering": 0.772, "helm_mmlu/Elementary Mathematics": 0.704, "helm_mmlu/Formal Logic": 0.595, "helm_mmlu/High School World History": 0.869, "helm_mmlu/Human Sexuality": 0.847, "helm_mmlu/International Law": 0.752, "helm_mmlu/Logical Fallacies": 0.859, "helm_mmlu/Machine Learning": 0.616, "helm_mmlu/Management": 0.893, "helm_mmlu/Marketing": 0.953, "helm_mmlu/Medical Genetics": 0.89, "helm_mmlu/Miscellaneous": 0.9, "helm_mmlu/Moral Scenarios": 0.676, "helm_mmlu/Nutrition": 0.588, "helm_mmlu/Prehistory": 0.762, "helm_mmlu/Public Relations": 0.7, "helm_mmlu/Security Studies": 0.547, "helm_mmlu/Sociology": 0.851, "helm_mmlu/Virology": 0.524, "helm_mmlu/World Religions": 0.865, "helm_mmlu/Mean win rate": 0.817 } }, { "id": "google/gemini-1.5-flash-8b", "name": "google/gemini-1.5-flash-8b", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.4851, "reward-bench/Factuality": 0.4611, "reward-bench/Precise IF": 0.3625, "reward-bench/Math": 0.5082, "reward-bench/Safety": 0.6622, "reward-bench/Focus": 0.6747, "reward-bench/Ties": 0.2421 } }, { "id": "google/gemini-1.5-flash-preview-0514", "name": "Gemini 1.5 Flash 0514 preview", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_mmlu/MMLU All Subjects": 0.778, "helm_mmlu/Abstract Algebra": 0.56, "helm_mmlu/Anatomy": 0.807, "helm_mmlu/College Physics": 0.667, "helm_mmlu/Computer Security": 0.77, "helm_mmlu/Econometrics": 0.64, "helm_mmlu/Global Facts": 0.55, "helm_mmlu/Jurisprudence": 0.889, "helm_mmlu/Philosophy": 0.807, "helm_mmlu/Professional Psychology": 0.825, "helm_mmlu/Us Foreign Policy": 0.93, "helm_mmlu/Astronomy": 0.868, "helm_mmlu/Business Ethics": 0.82, "helm_mmlu/Clinical Knowledge": 0.838, "helm_mmlu/Conceptual Physics": 0.855, "helm_mmlu/Electrical Engineering": 0.814, "helm_mmlu/Elementary Mathematics": 0.778, "helm_mmlu/Formal Logic": 0.611, "helm_mmlu/High School World History": 0.907, "helm_mmlu/Human Sexuality": 0.374, "helm_mmlu/International Law": 0.876, "helm_mmlu/Logical Fallacies": 0.853, "helm_mmlu/Machine Learning": 0.562, "helm_mmlu/Management": 0.854, "helm_mmlu/Marketing": 0.936, "helm_mmlu/Medical Genetics": 0.86, "helm_mmlu/Miscellaneous": 0.884, "helm_mmlu/Moral Scenarios": 0.631, "helm_mmlu/Nutrition": 0.801, "helm_mmlu/Prehistory": 0.867, "helm_mmlu/Public Relations": 0.773, "helm_mmlu/Security Studies": 0.812, "helm_mmlu/Sociology": 0.9, "helm_mmlu/Virology": 0.566, "helm_mmlu/World Religions": 0.871, "helm_mmlu/Mean win rate": 0.713 } }, { "id": "google/gemini-1.5-pro-001", "name": "Gemini 1.5 Pro 001", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.739, "helm_lite/NarrativeQA": 0.783, "helm_lite/NaturalQuestions (closed-book)": 0.378, "helm_lite/OpenbookQA": 0.902, "helm_lite/MMLU": 0.772, "helm_lite/MATH": 0.825, "helm_lite/GSM8K": 0.836, "helm_lite/LegalBench": 0.757, "helm_lite/MedQA": 0.692, "helm_lite/WMT 2014": 0.189, "helm_mmlu/MMLU All Subjects": 0.827, "helm_mmlu/Abstract Algebra": 0.75, "helm_mmlu/Anatomy": 0.83, "helm_mmlu/College Physics": 0.745, "helm_mmlu/Computer Security": 0.83, "helm_mmlu/Econometrics": 0.728, "helm_mmlu/Global Facts": 0.66, "helm_mmlu/Jurisprudence": 0.889, "helm_mmlu/Philosophy": 0.871, "helm_mmlu/Professional Psychology": 0.894, "helm_mmlu/Us Foreign Policy": 0.93, "helm_mmlu/Astronomy": 0.914, "helm_mmlu/Business Ethics": 0.8, "helm_mmlu/Clinical Knowledge": 0.853, "helm_mmlu/Conceptual Physics": 0.949, "helm_mmlu/Electrical Engineering": 0.745, "helm_mmlu/Elementary Mathematics": 0.939, "helm_mmlu/Formal Logic": 0.706, "helm_mmlu/High School World History": 0.924, "helm_mmlu/Human Sexuality": 0.374, "helm_mmlu/International Law": 0.917, "helm_mmlu/Logical Fallacies": 0.896, "helm_mmlu/Machine Learning": 0.652, "helm_mmlu/Management": 0.922, "helm_mmlu/Marketing": 0.932, "helm_mmlu/Medical Genetics": 0.91, "helm_mmlu/Miscellaneous": 0.958, "helm_mmlu/Moral Scenarios": 0.739, "helm_mmlu/Nutrition": 0.879, "helm_mmlu/Prehistory": 0.87, "helm_mmlu/Public Relations": 0.818, "helm_mmlu/Security Studies": 0.873, "helm_mmlu/Sociology": 0.92, "helm_mmlu/Virology": 0.554, "helm_mmlu/World Religions": 0.854, "helm_mmlu/Mean win rate": 0.349 } }, { "id": "google/gemini-1.5-pro-002", "name": "Gemini 1.5 Pro 002", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.657, "helm_capabilities/MMLU-Pro": 0.737, "helm_capabilities/GPQA": 0.534, "helm_capabilities/IFEval": 0.837, "helm_capabilities/WildBench": 0.813, "helm_capabilities/Omni-MATH": 0.364, "helm_lite/Mean win rate": 0.842, "helm_lite/NarrativeQA": 0.756, "helm_lite/NaturalQuestions (closed-book)": 0.455, "helm_lite/OpenbookQA": 0.952, "helm_lite/MMLU": 0.795, "helm_lite/MATH": 0.92, "helm_lite/GSM8K": 0.817, "helm_lite/LegalBench": 0.747, "helm_lite/MedQA": 0.771, "helm_lite/WMT 2014": 0.231, "helm_mmlu/MMLU All Subjects": 0.869, "helm_mmlu/Abstract Algebra": 0.82, "helm_mmlu/Anatomy": 0.83, "helm_mmlu/College Physics": 0.863, "helm_mmlu/Computer Security": 0.85, "helm_mmlu/Econometrics": 0.693, "helm_mmlu/Global Facts": 0.77, "helm_mmlu/Jurisprudence": 0.898, "helm_mmlu/Philosophy": 0.887, "helm_mmlu/Professional Psychology": 0.912, "helm_mmlu/Us Foreign Policy": 0.94, "helm_mmlu/Astronomy": 0.934, "helm_mmlu/Business Ethics": 0.84, "helm_mmlu/Clinical Knowledge": 0.906, "helm_mmlu/Conceptual Physics": 0.945, "helm_mmlu/Electrical Engineering": 0.855, "helm_mmlu/Elementary Mathematics": 0.942, "helm_mmlu/Formal Logic": 0.754, "helm_mmlu/High School World History": 0.937, "helm_mmlu/Human Sexuality": 0.878, "helm_mmlu/International Law": 0.917, "helm_mmlu/Logical Fallacies": 0.902, "helm_mmlu/Machine Learning": 0.83, "helm_mmlu/Management": 0.903, "helm_mmlu/Marketing": 0.962, "helm_mmlu/Medical Genetics": 0.92, "helm_mmlu/Miscellaneous": 0.959, "helm_mmlu/Moral Scenarios": 0.792, "helm_mmlu/Nutrition": 0.886, "helm_mmlu/Prehistory": 0.926, "helm_mmlu/Public Relations": 0.809, "helm_mmlu/Security Studies": 0.857, "helm_mmlu/Sociology": 0.95, "helm_mmlu/Virology": 0.566, "helm_mmlu/World Religions": 0.889, "helm_mmlu/Mean win rate": 0.334 } }, { "id": "google/gemini-1.5-pro-0514", "name": "google/gemini-1.5-pro-0514", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.882, "reward-bench/Chat": 0.9232, "reward-bench/Chat Hard": 0.8059, "reward-bench/Safety": 0.8791, "reward-bench/Reasoning": 0.9199 } }, { "id": "google/gemini-1.5-pro-0924", "name": "google/gemini-1.5-pro-0924", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8678, "reward-bench/Chat": 0.9413, "reward-bench/Chat Hard": 0.7697, "reward-bench/Safety": 0.8581, "reward-bench/Reasoning": 0.9022 } }, { "id": "google/gemini-1.5-pro-preview-0409", "name": "Gemini 1.5 Pro 0409 preview", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_mmlu/MMLU All Subjects": 0.81, "helm_mmlu/Abstract Algebra": 0.6, "helm_mmlu/Anatomy": 0.77, "helm_mmlu/College Physics": 0.804, "helm_mmlu/Computer Security": 0.81, "helm_mmlu/Econometrics": 0.737, "helm_mmlu/Global Facts": 0.66, "helm_mmlu/Jurisprudence": 0.87, "helm_mmlu/Philosophy": 0.846, "helm_mmlu/Professional Psychology": 0.866, "helm_mmlu/Us Foreign Policy": 0.94, "helm_mmlu/Astronomy": 0.914, "helm_mmlu/Business Ethics": 0.8, "helm_mmlu/Clinical Knowledge": 0.868, "helm_mmlu/Conceptual Physics": 0.915, "helm_mmlu/Electrical Engineering": 0.772, "helm_mmlu/Elementary Mathematics": 0.884, "helm_mmlu/Formal Logic": 0.643, "helm_mmlu/High School World History": 0.924, "helm_mmlu/Human Sexuality": 0.397, "helm_mmlu/International Law": 0.917, "helm_mmlu/Logical Fallacies": 0.859, "helm_mmlu/Machine Learning": 0.67, "helm_mmlu/Management": 0.874, "helm_mmlu/Marketing": 0.953, "helm_mmlu/Medical Genetics": 0.91, "helm_mmlu/Miscellaneous": 0.928, "helm_mmlu/Moral Scenarios": 0.696, "helm_mmlu/Nutrition": 0.846, "helm_mmlu/Prehistory": 0.886, "helm_mmlu/Public Relations": 0.755, "helm_mmlu/Security Studies": 0.849, "helm_mmlu/Sociology": 0.925, "helm_mmlu/Virology": 0.584, "helm_mmlu/World Religions": 0.877, "helm_mmlu/Mean win rate": 0.118 } }, { "id": "google/gemini-2-5-flash-fc", "name": "Gemini-2.5-Flash (FC)", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 15.0, "bfcl/bfcl.overall.overall_accuracy": 56.24, "bfcl/bfcl.overall.total_cost_usd": 26.36, "bfcl/bfcl.overall.latency_mean_s": 2.99, "bfcl/bfcl.overall.latency_std_s": 9.22, "bfcl/bfcl.overall.latency_p95_s": 5.62, "bfcl/bfcl.non_live.ast_accuracy": 84.96, "bfcl/bfcl.non_live.simple_ast_accuracy": 74.33, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 94.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 79.5, "bfcl/bfcl.live.live_accuracy": 74.39, "bfcl/bfcl.live.live_simple_ast_accuracy": 85.27, "bfcl/bfcl.live.live_multiple_ast_accuracy": 71.7, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 36.25, "bfcl/bfcl.multi_turn.base_accuracy": 41.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 36.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 32.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 35.5, "bfcl/bfcl.web_search.accuracy": 59.0, "bfcl/bfcl.web_search.base_accuracy": 59.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 59.0, "bfcl/bfcl.memory.accuracy": 41.29, "bfcl/bfcl.memory.kv_accuracy": 19.35, "bfcl/bfcl.memory.vector_accuracy": 50.32, "bfcl/bfcl.memory.recursive_summarization_accuracy": 54.19, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 93.67 } }, { "id": "google/gemini-2-5-flash-lite-fc", "name": "Gemini-2.5-Flash-Lite (FC)", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 52.0, "bfcl/bfcl.overall.overall_accuracy": 36.87, "bfcl/bfcl.overall.total_cost_usd": 7.55, "bfcl/bfcl.overall.latency_mean_s": 1.18, "bfcl/bfcl.overall.latency_std_s": 8.06, "bfcl/bfcl.overall.latency_p95_s": 1.67, "bfcl/bfcl.non_live.ast_accuracy": 86.6, "bfcl/bfcl.non_live.simple_ast_accuracy": 70.92, "bfcl/bfcl.non_live.multiple_ast_accuracy": 90.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 92.0, "bfcl/bfcl.live.live_accuracy": 65.8, "bfcl/bfcl.live.live_simple_ast_accuracy": 73.26, "bfcl/bfcl.live.live_multiple_ast_accuracy": 63.82, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 66.67, "bfcl/bfcl.multi_turn.accuracy": 13.5, "bfcl/bfcl.multi_turn.base_accuracy": 20.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 1.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 15.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 17.5, "bfcl/bfcl.web_search.accuracy": 21.0, "bfcl/bfcl.web_search.base_accuracy": 26.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 16.0, "bfcl/bfcl.memory.accuracy": 20.65, "bfcl/bfcl.memory.kv_accuracy": 3.87, "bfcl/bfcl.memory.vector_accuracy": 6.45, "bfcl/bfcl.memory.recursive_summarization_accuracy": 51.61, "bfcl/bfcl.relevance.relevance_detection_accuracy": 43.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 92.5 } }, { "id": "google/gemini-2-5-flash-lite-prompt", "name": "Gemini-2.5-Flash-Lite (Prompt)", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 73.0, "bfcl/bfcl.overall.overall_accuracy": 28.03, "bfcl/bfcl.overall.total_cost_usd": 7.05, "bfcl/bfcl.overall.latency_mean_s": 1.0, "bfcl/bfcl.overall.latency_std_s": 4.75, "bfcl/bfcl.overall.latency_p95_s": 1.4, "bfcl/bfcl.non_live.ast_accuracy": 83.9, "bfcl/bfcl.non_live.simple_ast_accuracy": 70.08, "bfcl/bfcl.non_live.multiple_ast_accuracy": 86.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 90.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 89.5, "bfcl/bfcl.live.live_accuracy": 54.85, "bfcl/bfcl.live.live_simple_ast_accuracy": 67.05, "bfcl/bfcl.live.live_multiple_ast_accuracy": 51.66, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 50.0, "bfcl/bfcl.multi_turn.accuracy": 7.63, "bfcl/bfcl.multi_turn.base_accuracy": 10.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 5.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 6.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 9.0, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 12.69, "bfcl/bfcl.memory.kv_accuracy": 1.94, "bfcl/bfcl.memory.vector_accuracy": 6.45, "bfcl/bfcl.memory.recursive_summarization_accuracy": 29.68, "bfcl/bfcl.relevance.relevance_detection_accuracy": 50.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 93.33, "bfcl/bfcl.format_sensitivity.max_delta": 25.5, "bfcl/bfcl.format_sensitivity.stddev": 6.68 } }, { "id": "google/gemini-2-5-flash-prompt", "name": "Gemini-2.5-Flash (Prompt)", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 26.0, "bfcl/bfcl.overall.overall_accuracy": 50.9, "bfcl/bfcl.overall.total_cost_usd": 33.45, "bfcl/bfcl.overall.latency_mean_s": 3.18, "bfcl/bfcl.overall.latency_std_s": 4.44, "bfcl/bfcl.overall.latency_p95_s": 6.09, "bfcl/bfcl.non_live.ast_accuracy": 88.08, "bfcl/bfcl.non_live.simple_ast_accuracy": 77.33, "bfcl/bfcl.non_live.multiple_ast_accuracy": 91.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 96.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 87.5, "bfcl/bfcl.live.live_accuracy": 78.16, "bfcl/bfcl.live.live_simple_ast_accuracy": 87.21, "bfcl/bfcl.live.live_multiple_ast_accuracy": 75.97, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 75.0, "bfcl/bfcl.multi_turn.accuracy": 16.75, "bfcl/bfcl.multi_turn.base_accuracy": 14.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 16.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 17.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 18.5, "bfcl/bfcl.web_search.accuracy": 62.0, "bfcl/bfcl.web_search.base_accuracy": 60.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 64.0, "bfcl/bfcl.memory.accuracy": 38.71, "bfcl/bfcl.memory.kv_accuracy": 13.55, "bfcl/bfcl.memory.vector_accuracy": 47.1, "bfcl/bfcl.memory.recursive_summarization_accuracy": 55.48, "bfcl/bfcl.relevance.relevance_detection_accuracy": 62.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 91.09, "bfcl/bfcl.format_sensitivity.max_delta": 9.0, "bfcl/bfcl.format_sensitivity.stddev": 2.45 } }, { "id": "google/gemini-2.0-flash-001", "name": "Gemini 2.0 Flash", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.679, "helm_capabilities/MMLU-Pro": 0.737, "helm_capabilities/GPQA": 0.556, "helm_capabilities/IFEval": 0.841, "helm_capabilities/WildBench": 0.8, "helm_capabilities/Omni-MATH": 0.459 } }, { "id": "google/gemini-2.0-flash-exp", "name": "Gemini 2.0 Flash Experimental", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.813, "helm_lite/NarrativeQA": 0.783, "helm_lite/NaturalQuestions (closed-book)": 0.443, "helm_lite/OpenbookQA": 0.946, "helm_lite/MMLU": 0.717, "helm_lite/MATH": 0.901, "helm_lite/GSM8K": 0.946, "helm_lite/LegalBench": 0.674, "helm_lite/MedQA": 0.73, "helm_lite/WMT 2014": 0.212, "helm_mmlu/MMLU All Subjects": 0.797, "helm_mmlu/Abstract Algebra": 0.72, "helm_mmlu/Anatomy": 0.807, "helm_mmlu/College Physics": 0.696, "helm_mmlu/Computer Security": 0.83, "helm_mmlu/Econometrics": 0.693, "helm_mmlu/Global Facts": 0.66, "helm_mmlu/Jurisprudence": 0.898, "helm_mmlu/Philosophy": 0.887, "helm_mmlu/Professional Psychology": 0.876, "helm_mmlu/Us Foreign Policy": 0.78, "helm_mmlu/Astronomy": 0.928, "helm_mmlu/Business Ethics": 0.73, "helm_mmlu/Clinical Knowledge": 0.879, "helm_mmlu/Conceptual Physics": 0.813, "helm_mmlu/Electrical Engineering": 0.834, "helm_mmlu/Elementary Mathematics": 0.857, "helm_mmlu/Formal Logic": 0.571, "helm_mmlu/High School World History": 0.743, "helm_mmlu/Human Sexuality": 0.901, "helm_mmlu/International Law": 0.645, "helm_mmlu/Logical Fallacies": 0.914, "helm_mmlu/Machine Learning": 0.759, "helm_mmlu/Management": 0.718, "helm_mmlu/Marketing": 0.944, "helm_mmlu/Medical Genetics": 0.89, "helm_mmlu/Miscellaneous": 0.939, "helm_mmlu/Moral Scenarios": 0.815, "helm_mmlu/Nutrition": 0.856, "helm_mmlu/Prehistory": 0.898, "helm_mmlu/Public Relations": 0.791, "helm_mmlu/Security Studies": 0.69, "helm_mmlu/Sociology": 0.786, "helm_mmlu/Virology": 0.554, "helm_mmlu/World Religions": 0.731, "helm_mmlu/Mean win rate": 0.567 } }, { "id": "google/gemini-2.0-flash-lite-preview-02-05", "name": "Gemini 2.0 Flash Lite 02-05 preview", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.642, "helm_capabilities/MMLU-Pro": 0.72, "helm_capabilities/GPQA": 0.5, "helm_capabilities/IFEval": 0.824, "helm_capabilities/WildBench": 0.79, "helm_capabilities/Omni-MATH": 0.374 } }, { "id": "google/gemini-2.5-flash", "name": "Gemini 2.5 Flash", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.9145, "global-mmlu-lite/Culturally Sensitive": 0.9, "global-mmlu-lite/Culturally Agnostic": 0.9291, "global-mmlu-lite/Arabic": 0.9125, "global-mmlu-lite/English": 0.9325, "global-mmlu-lite/Bengali": 0.91, "global-mmlu-lite/German": 0.9025, "global-mmlu-lite/French": 0.91, "global-mmlu-lite/Hindi": 0.925, "global-mmlu-lite/Indonesian": 0.9075, "global-mmlu-lite/Italian": 0.9225, "global-mmlu-lite/Japanese": 0.9125, "global-mmlu-lite/Korean": 0.915, "global-mmlu-lite/Portuguese": 0.9125, "global-mmlu-lite/Spanish": 0.9175, "global-mmlu-lite/Swahili": 0.915, "global-mmlu-lite/Yoruba": 0.9075, "global-mmlu-lite/Chinese": 0.915, "global-mmlu-lite/Burmese": 0.915, "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.028169014084507043, "livecodebenchpro/Easy Problems": 0.38028169014084506, "reward-bench/Score": 0.7767, "reward-bench/Factuality": 0.674, "reward-bench/Precise IF": 0.575, "reward-bench/Math": 0.852, "reward-bench/Safety": 0.909, "reward-bench/Focus": 0.841, "reward-bench/Ties": 0.809, "terminal-bench-2.0/terminal-bench-2.0": 16.9 } }, { "id": "google/gemini-2.5-flash-lite", "name": "Gemini 2.5 Flash-Lite", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.591, "helm_capabilities/MMLU-Pro": 0.537, "helm_capabilities/GPQA": 0.309, "helm_capabilities/IFEval": 0.81, "helm_capabilities/WildBench": 0.818, "helm_capabilities/Omni-MATH": 0.48 } }, { "id": "google/gemini-2.5-flash-preview-04-17", "name": "Gemini 2.5 Flash 04-17 preview", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.626, "helm_capabilities/MMLU-Pro": 0.639, "helm_capabilities/GPQA": 0.39, "helm_capabilities/IFEval": 0.898, "helm_capabilities/WildBench": 0.817, "helm_capabilities/Omni-MATH": 0.384, "reward-bench/Score": 0.7721, "reward-bench/Factuality": 0.6574, "reward-bench/Precise IF": 0.5531, "reward-bench/Math": 0.8115, "reward-bench/Safety": 0.9094, "reward-bench/Focus": 0.8672, "reward-bench/Ties": 0.8341 } }, { "id": "google/gemini-2.5-flash-preview-05-20", "name": "gemini-2.5-flash-preview-05-20", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.9092, "global-mmlu-lite/Culturally Sensitive": 0.8925, "global-mmlu-lite/Culturally Agnostic": 0.9259, "global-mmlu-lite/Arabic": 0.905, "global-mmlu-lite/English": 0.9225, "global-mmlu-lite/Bengali": 0.91, "global-mmlu-lite/German": 0.905, "global-mmlu-lite/French": 0.925, "global-mmlu-lite/Hindi": 0.9125, "global-mmlu-lite/Indonesian": 0.9075, "global-mmlu-lite/Italian": 0.89, "global-mmlu-lite/Japanese": 0.9125, "global-mmlu-lite/Korean": 0.9075, "global-mmlu-lite/Portuguese": 0.915, "global-mmlu-lite/Spanish": 0.915, "global-mmlu-lite/Swahili": 0.905, "global-mmlu-lite/Yoruba": 0.8825, "global-mmlu-lite/Chinese": 0.93, "global-mmlu-lite/Burmese": 0.9025 } }, { "id": "google/gemini-2.5-pro", "name": "Gemini 2.5 Pro", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.9323, "global-mmlu-lite/Culturally Sensitive": 0.9241, "global-mmlu-lite/Culturally Agnostic": 0.9406, "global-mmlu-lite/Arabic": 0.9475, "global-mmlu-lite/English": 0.9275, "global-mmlu-lite/Bengali": 0.9275, "global-mmlu-lite/German": 0.93, "global-mmlu-lite/French": 0.9425, "global-mmlu-lite/Hindi": 0.9275, "global-mmlu-lite/Indonesian": 0.925, "global-mmlu-lite/Italian": 0.935, "global-mmlu-lite/Japanese": 0.9375, "global-mmlu-lite/Korean": 0.9275, "global-mmlu-lite/Portuguese": 0.93, "global-mmlu-lite/Spanish": 0.94, "global-mmlu-lite/Swahili": 0.9375, "global-mmlu-lite/Yoruba": 0.925, "global-mmlu-lite/Chinese": 0.9275, "global-mmlu-lite/Burmese": 0.93, "livecodebenchpro/Hard Problems": 0.014084507042253521, "livecodebenchpro/Medium Problems": 0.2112676056338028, "livecodebenchpro/Easy Problems": 0.7183098591549296, "reward-bench/Score": 0.7948, "reward-bench/Factuality": 0.755, "reward-bench/Precise IF": 0.619, "reward-bench/Math": 0.898, "reward-bench/Safety": 0.881, "reward-bench/Focus": 0.805, "reward-bench/Ties": 0.811, "terminal-bench-2.0/terminal-bench-2.0": 19.6 } }, { "id": "google/gemini-2.5-pro-preview-03-25", "name": "Gemini 2.5 Pro 03-25 preview", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.745, "helm_capabilities/MMLU-Pro": 0.863, "helm_capabilities/GPQA": 0.749, "helm_capabilities/IFEval": 0.84, "helm_capabilities/WildBench": 0.857, "helm_capabilities/Omni-MATH": 0.416 } }, { "id": "google/gemini-2.5-pro-preview-05-06", "name": "google/gemini-2.5-pro-preview-05-06", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6775, "reward-bench/Factuality": 0.6532, "reward-bench/Precise IF": 0.4688, "reward-bench/Math": 0.5342, "reward-bench/Safety": 0.8806, "reward-bench/Focus": 0.8308, "reward-bench/Ties": 0.6973 } }, { "id": "google/gemini-3-flash", "name": "Gemini 3 Flash", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 47.4 } }, { "id": "google/gemini-3-pro", "name": "Gemini 3 Pro", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 56.9 } }, { "id": "google/gemini-3-pro-preview", "name": "gemini-3-pro-preview", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "appworld_test_normal/appworld/test_normal": 0.505, "browsecompplus/browsecompplus": 0.48, "global-mmlu-lite/Global MMLU Lite": 0.9453, "global-mmlu-lite/Culturally Sensitive": 0.9397, "global-mmlu-lite/Culturally Agnostic": 0.9509, "global-mmlu-lite/Arabic": 0.9475, "global-mmlu-lite/English": 0.9425, "global-mmlu-lite/Bengali": 0.9425, "global-mmlu-lite/German": 0.94, "global-mmlu-lite/French": 0.9575, "global-mmlu-lite/Hindi": 0.9425, "global-mmlu-lite/Indonesian": 0.955, "global-mmlu-lite/Italian": 0.955, "global-mmlu-lite/Japanese": 0.94, "global-mmlu-lite/Korean": 0.94, "global-mmlu-lite/Portuguese": 0.9425, "global-mmlu-lite/Spanish": 0.9475, "global-mmlu-lite/Swahili": 0.94, "global-mmlu-lite/Yoruba": 0.9425, "global-mmlu-lite/Chinese": 0.9475, "global-mmlu-lite/Burmese": 0.9425, "swe-bench/swe-bench": 0.71, "tau-bench-2_airline/tau-bench-2/airline": 0.62, "tau-bench-2_retail/tau-bench-2/retail": 0.7576, "tau-bench-2_telecom/tau-bench-2/telecom": 0.73 } }, { "id": "google/gemini-3-pro-preview-fc", "name": "Gemini-3-Pro-Preview (FC)", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 7.0, "bfcl/bfcl.overall.overall_accuracy": 68.14, "bfcl/bfcl.overall.total_cost_usd": 224.69, "bfcl/bfcl.overall.latency_mean_s": 15.87, "bfcl/bfcl.overall.latency_std_s": 41.41, "bfcl/bfcl.overall.latency_p95_s": 58.48, "bfcl/bfcl.non_live.ast_accuracy": 85.75, "bfcl/bfcl.non_live.simple_ast_accuracy": 75.5, "bfcl/bfcl.non_live.multiple_ast_accuracy": 94.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 91.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 82.5, "bfcl/bfcl.live.live_accuracy": 81.72, "bfcl/bfcl.live.live_simple_ast_accuracy": 87.6, "bfcl/bfcl.live.live_multiple_ast_accuracy": 80.44, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 79.17, "bfcl/bfcl.multi_turn.accuracy": 63.12, "bfcl/bfcl.multi_turn.base_accuracy": 69.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 63.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 56.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 64.0, "bfcl/bfcl.web_search.accuracy": 68.5, "bfcl/bfcl.web_search.base_accuracy": 63.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 74.0, "bfcl/bfcl.memory.accuracy": 54.84, "bfcl/bfcl.memory.kv_accuracy": 50.32, "bfcl/bfcl.memory.vector_accuracy": 63.23, "bfcl/bfcl.memory.recursive_summarization_accuracy": 50.97, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 77.85 } }, { "id": "google/gemini-3-pro-preview-prompt", "name": "Gemini-3-Pro-Preview (Prompt)", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 3.0, "bfcl/bfcl.overall.overall_accuracy": 72.51, "bfcl/bfcl.overall.total_cost_usd": 298.47, "bfcl/bfcl.overall.latency_mean_s": 12.08, "bfcl/bfcl.overall.latency_std_s": 21.3, "bfcl/bfcl.overall.latency_p95_s": 32.73, "bfcl/bfcl.non_live.ast_accuracy": 90.65, "bfcl/bfcl.non_live.simple_ast_accuracy": 79.58, "bfcl/bfcl.non_live.multiple_ast_accuracy": 96.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 95.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 92.0, "bfcl/bfcl.live.live_accuracy": 83.12, "bfcl/bfcl.live.live_simple_ast_accuracy": 87.6, "bfcl/bfcl.live.live_multiple_ast_accuracy": 81.77, "bfcl/bfcl.live.live_parallel_ast_accuracy": 93.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 87.5, "bfcl/bfcl.multi_turn.accuracy": 60.75, "bfcl/bfcl.multi_turn.base_accuracy": 64.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 60.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 54.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 64.0, "bfcl/bfcl.web_search.accuracy": 80.0, "bfcl/bfcl.web_search.base_accuracy": 78.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 82.0, "bfcl/bfcl.memory.accuracy": 61.72, "bfcl/bfcl.memory.kv_accuracy": 59.35, "bfcl/bfcl.memory.vector_accuracy": 62.58, "bfcl/bfcl.memory.recursive_summarization_accuracy": 63.23, "bfcl/bfcl.relevance.relevance_detection_accuracy": 68.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 85.59, "bfcl/bfcl.format_sensitivity.max_delta": 8.5, "bfcl/bfcl.format_sensitivity.stddev": 1.7 } }, { "id": "google/gemini-3.1-pro", "name": "Gemini 3.1 Pro", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 74.8 } }, { "id": "google/gemma-1.1-2b-it", "name": "gemma-1.1-2b-it", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3067, "hfopenllm_v2/BBH": 0.3185, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3394, "hfopenllm_v2/MMLU-PRO": 0.1484 } }, { "id": "google/gemma-1.1-7b-it", "name": "gemma-1.1-7b-it", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5039, "hfopenllm_v2/BBH": 0.3935, "hfopenllm_v2/MATH Level 5": 0.0491, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.423, "hfopenllm_v2/MMLU-PRO": 0.2584 } }, { "id": "google/gemma-2-27b", "name": "Gemma 2 27B", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_mmlu/MMLU All Subjects": 0.757, "helm_mmlu/Abstract Algebra": 0.4, "helm_mmlu/Anatomy": 0.77, "helm_mmlu/College Physics": 0.5, "helm_mmlu/Computer Security": 0.84, "helm_mmlu/Econometrics": 0.667, "helm_mmlu/Global Facts": 0.43, "helm_mmlu/Jurisprudence": 0.861, "helm_mmlu/Philosophy": 0.849, "helm_mmlu/Professional Psychology": 0.84, "helm_mmlu/Us Foreign Policy": 0.95, "helm_mmlu/Astronomy": 0.829, "helm_mmlu/Business Ethics": 0.78, "helm_mmlu/Clinical Knowledge": 0.808, "helm_mmlu/Conceptual Physics": 0.834, "helm_mmlu/Electrical Engineering": 0.738, "helm_mmlu/Elementary Mathematics": 0.558, "helm_mmlu/Formal Logic": 0.516, "helm_mmlu/High School World History": 0.89, "helm_mmlu/Human Sexuality": 0.84, "helm_mmlu/International Law": 0.843, "helm_mmlu/Logical Fallacies": 0.865, "helm_mmlu/Machine Learning": 0.625, "helm_mmlu/Management": 0.864, "helm_mmlu/Marketing": 0.94, "helm_mmlu/Medical Genetics": 0.87, "helm_mmlu/Miscellaneous": 0.885, "helm_mmlu/Moral Scenarios": 0.394, "helm_mmlu/Nutrition": 0.824, "helm_mmlu/Prehistory": 0.877, "helm_mmlu/Public Relations": 0.745, "helm_mmlu/Security Studies": 0.808, "helm_mmlu/Sociology": 0.9, "helm_mmlu/Virology": 0.56, "helm_mmlu/World Religions": 0.924, "helm_mmlu/Mean win rate": 0.05, "hfopenllm_v2/IFEval": 0.2475, "hfopenllm_v2/BBH": 0.5643, "hfopenllm_v2/MATH Level 5": 0.1662, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.4396, "hfopenllm_v2/MMLU-PRO": 0.4371 } }, { "id": "google/gemma-2-27b-it", "name": "Gemma 2 Instruct 27B", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.675, "helm_lite/NarrativeQA": 0.79, "helm_lite/NaturalQuestions (closed-book)": 0.353, "helm_lite/OpenbookQA": 0.918, "helm_lite/MMLU": 0.664, "helm_lite/MATH": 0.746, "helm_lite/GSM8K": 0.812, "helm_lite/LegalBench": 0.7, "helm_lite/MedQA": 0.684, "helm_lite/WMT 2014": 0.214, "hfopenllm_v2/IFEval": 0.7978, "hfopenllm_v2/BBH": 0.6451, "hfopenllm_v2/MATH Level 5": 0.2387, "hfopenllm_v2/GPQA": 0.375, "hfopenllm_v2/MUSR": 0.4033, "hfopenllm_v2/MMLU-PRO": 0.4451, "reward-bench/Score": 0.809, "reward-bench/Chat": 0.9483, "reward-bench/Chat Hard": 0.591, "reward-bench/Safety": 0.8635, "reward-bench/Reasoning": 0.833 } }, { "id": "google/gemma-2-2b", "name": "gemma-2-2b", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1993, "hfopenllm_v2/BBH": 0.3656, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.4232, "hfopenllm_v2/MMLU-PRO": 0.218 } }, { "id": "google/gemma-2-2b-it", "name": "gemma-2-2b-it", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5668, "hfopenllm_v2/BBH": 0.4199, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3929, "hfopenllm_v2/MMLU-PRO": 0.255 } }, { "id": "google/gemma-2-2b-jpn-it", "name": "gemma-2-2b-jpn-it", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5288, "hfopenllm_v2/BBH": 0.4178, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3728, "hfopenllm_v2/MMLU-PRO": 0.2467 } }, { "id": "google/gemma-2-9b", "name": "Gemma 2 9B", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_mmlu/MMLU All Subjects": 0.721, "helm_mmlu/Abstract Algebra": 0.4, "helm_mmlu/Anatomy": 0.704, "helm_mmlu/College Physics": 0.5, "helm_mmlu/Computer Security": 0.81, "helm_mmlu/Econometrics": 0.579, "helm_mmlu/Global Facts": 0.53, "helm_mmlu/Jurisprudence": 0.833, "helm_mmlu/Philosophy": 0.772, "helm_mmlu/Professional Psychology": 0.788, "helm_mmlu/Us Foreign Policy": 0.9, "helm_mmlu/Astronomy": 0.789, "helm_mmlu/Business Ethics": 0.77, "helm_mmlu/Clinical Knowledge": 0.777, "helm_mmlu/Conceptual Physics": 0.732, "helm_mmlu/Electrical Engineering": 0.724, "helm_mmlu/Elementary Mathematics": 0.577, "helm_mmlu/Formal Logic": 0.492, "helm_mmlu/High School World History": 0.865, "helm_mmlu/Human Sexuality": 0.809, "helm_mmlu/International Law": 0.835, "helm_mmlu/Logical Fallacies": 0.816, "helm_mmlu/Machine Learning": 0.509, "helm_mmlu/Management": 0.874, "helm_mmlu/Marketing": 0.919, "helm_mmlu/Medical Genetics": 0.84, "helm_mmlu/Miscellaneous": 0.844, "helm_mmlu/Moral Scenarios": 0.295, "helm_mmlu/Nutrition": 0.775, "helm_mmlu/Prehistory": 0.812, "helm_mmlu/Public Relations": 0.736, "helm_mmlu/Security Studies": 0.78, "helm_mmlu/Sociology": 0.9, "helm_mmlu/Virology": 0.53, "helm_mmlu/World Religions": 0.86, "helm_mmlu/Mean win rate": 0.265, "hfopenllm_v2/IFEval": 0.204, "hfopenllm_v2/BBH": 0.5377, "hfopenllm_v2/MATH Level 5": 0.1344, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4461, "hfopenllm_v2/MMLU-PRO": 0.4103 } }, { "id": "google/gemma-2-9b-it", "name": "Gemma 2 Instruct 9B", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.562, "helm_lite/NarrativeQA": 0.768, "helm_lite/NaturalQuestions (closed-book)": 0.328, "helm_lite/OpenbookQA": 0.91, "helm_lite/MMLU": 0.645, "helm_lite/MATH": 0.724, "helm_lite/GSM8K": 0.762, "helm_lite/LegalBench": 0.639, "helm_lite/MedQA": 0.63, "helm_lite/WMT 2014": 0.201, "hfopenllm_v2/IFEval": 0.7436, "hfopenllm_v2/BBH": 0.599, "hfopenllm_v2/MATH Level 5": 0.1949, "hfopenllm_v2/GPQA": 0.3607, "hfopenllm_v2/MUSR": 0.4073, "hfopenllm_v2/MMLU-PRO": 0.3875, "la_leaderboard/la_leaderboard": 33.62 } }, { "id": "google/gemma-2b", "name": "gemma-2b", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2038, "hfopenllm_v2/BBH": 0.3366, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.255, "hfopenllm_v2/MUSR": 0.3978, "hfopenllm_v2/MMLU-PRO": 0.1366 } }, { "id": "google/gemma-2b-it", "name": "gemma-2b-it", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.269, "hfopenllm_v2/BBH": 0.3151, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.3341, "hfopenllm_v2/MMLU-PRO": 0.1353 } }, { "id": "google/gemma-3-12b-it-prompt", "name": "Gemma-3-12b-it (Prompt)", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 66.0, "bfcl/bfcl.overall.overall_accuracy": 30.43, "bfcl/bfcl.overall.total_cost_usd": 10.77, "bfcl/bfcl.overall.latency_mean_s": 11.1, "bfcl/bfcl.overall.latency_std_s": 17.17, "bfcl/bfcl.overall.latency_p95_s": 34.66, "bfcl/bfcl.non_live.ast_accuracy": 79.44, "bfcl/bfcl.non_live.simple_ast_accuracy": 76.25, "bfcl/bfcl.non_live.multiple_ast_accuracy": 94.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 91.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 56.5, "bfcl/bfcl.live.live_accuracy": 74.24, "bfcl/bfcl.live.live_simple_ast_accuracy": 85.66, "bfcl/bfcl.live.live_multiple_ast_accuracy": 71.89, "bfcl/bfcl.live.live_parallel_ast_accuracy": 87.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 45.83, "bfcl/bfcl.multi_turn.accuracy": 5.75, "bfcl/bfcl.multi_turn.base_accuracy": 6.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 7.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 5.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 4.0, "bfcl/bfcl.web_search.accuracy": 4.0, "bfcl/bfcl.web_search.base_accuracy": 4.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 4.0, "bfcl/bfcl.memory.accuracy": 27.53, "bfcl/bfcl.memory.kv_accuracy": 8.39, "bfcl/bfcl.memory.vector_accuracy": 25.16, "bfcl/bfcl.memory.recursive_summarization_accuracy": 49.03, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 70.29, "bfcl/bfcl.format_sensitivity.max_delta": 67.5, "bfcl/bfcl.format_sensitivity.stddev": 22.41 } }, { "id": "google/gemma-3-1b-it-prompt", "name": "Gemma-3-1b-it (Prompt)", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 109.0, "bfcl/bfcl.overall.overall_accuracy": 7.17, "bfcl/bfcl.overall.total_cost_usd": 3.4, "bfcl/bfcl.overall.latency_mean_s": 3.98, "bfcl/bfcl.overall.latency_std_s": 9.8, "bfcl/bfcl.overall.latency_p95_s": 12.06, "bfcl/bfcl.non_live.ast_accuracy": 20.21, "bfcl/bfcl.non_live.simple_ast_accuracy": 43.33, "bfcl/bfcl.non_live.multiple_ast_accuracy": 36.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 0.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 1.5, "bfcl/bfcl.live.live_accuracy": 11.84, "bfcl/bfcl.live.live_simple_ast_accuracy": 36.43, "bfcl/bfcl.live.live_multiple_ast_accuracy": 6.27, "bfcl/bfcl.live.live_parallel_ast_accuracy": 0.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 0.0, "bfcl/bfcl.multi_turn.accuracy": 0.0, "bfcl/bfcl.multi_turn.base_accuracy": 0.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 0.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 0.0, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 3.23, "bfcl/bfcl.memory.kv_accuracy": 3.87, "bfcl/bfcl.memory.vector_accuracy": 3.87, "bfcl/bfcl.memory.recursive_summarization_accuracy": 1.94, "bfcl/bfcl.relevance.relevance_detection_accuracy": 37.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 33.18, "bfcl/bfcl.format_sensitivity.max_delta": 25.5, "bfcl/bfcl.format_sensitivity.stddev": 9.76 } }, { "id": "google/gemma-3-27b-it", "name": "gemma-3-27b-it", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.763, "global-mmlu-lite/Culturally Sensitive": 0.7528, "global-mmlu-lite/Culturally Agnostic": 0.7733, "global-mmlu-lite/Arabic": 0.78, "global-mmlu-lite/English": 0.7337, "global-mmlu-lite/Bengali": 0.75, "global-mmlu-lite/German": 0.775, "global-mmlu-lite/French": 0.7481, "global-mmlu-lite/Hindi": 0.7335, "global-mmlu-lite/Indonesian": 0.7563, "global-mmlu-lite/Italian": 0.75, "global-mmlu-lite/Japanese": 0.7925, "global-mmlu-lite/Korean": 0.798, "global-mmlu-lite/Portuguese": 0.7481, "global-mmlu-lite/Spanish": 0.7494, "global-mmlu-lite/Swahili": 0.785, "global-mmlu-lite/Yoruba": 0.7444, "global-mmlu-lite/Chinese": 0.7925, "global-mmlu-lite/Burmese": 0.7719 } }, { "id": "google/gemma-3-27b-it-prompt", "name": "Gemma-3-27b-it (Prompt)", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 69.0, "bfcl/bfcl.overall.overall_accuracy": 29.47, "bfcl/bfcl.overall.total_cost_usd": 11.82, "bfcl/bfcl.overall.latency_mean_s": 10.88, "bfcl/bfcl.overall.latency_std_s": 19.67, "bfcl/bfcl.overall.latency_p95_s": 55.5, "bfcl/bfcl.non_live.ast_accuracy": 87.17, "bfcl/bfcl.non_live.simple_ast_accuracy": 77.67, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 89.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 89.5, "bfcl/bfcl.live.live_accuracy": 74.54, "bfcl/bfcl.live.live_simple_ast_accuracy": 84.5, "bfcl/bfcl.live.live_multiple_ast_accuracy": 72.46, "bfcl/bfcl.live.live_parallel_ast_accuracy": 93.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 45.83, "bfcl/bfcl.multi_turn.accuracy": 10.75, "bfcl/bfcl.multi_turn.base_accuracy": 16.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 4.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 8.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 14.0, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 13.55, "bfcl/bfcl.memory.kv_accuracy": 1.94, "bfcl/bfcl.memory.vector_accuracy": 3.23, "bfcl/bfcl.memory.recursive_summarization_accuracy": 35.48, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 73.67, "bfcl/bfcl.format_sensitivity.max_delta": 34.0, "bfcl/bfcl.format_sensitivity.stddev": 8.06 } }, { "id": "google/gemma-3-4b-it", "name": "gemma-3-4b-it", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.6511, "global-mmlu-lite/Culturally Sensitive": 0.6116, "global-mmlu-lite/Culturally Agnostic": 0.6906, "global-mmlu-lite/Arabic": 0.6525, "global-mmlu-lite/English": 0.67, "global-mmlu-lite/Bengali": 0.68, "global-mmlu-lite/German": 0.6525, "global-mmlu-lite/French": 0.6575, "global-mmlu-lite/Hindi": 0.6475, "global-mmlu-lite/Indonesian": 0.6775, "global-mmlu-lite/Italian": 0.6675, "global-mmlu-lite/Japanese": 0.6325, "global-mmlu-lite/Korean": 0.66, "global-mmlu-lite/Portuguese": 0.68, "global-mmlu-lite/Spanish": 0.6725, "global-mmlu-lite/Swahili": 0.6075, "global-mmlu-lite/Yoruba": 0.5825, "global-mmlu-lite/Chinese": 0.6475, "global-mmlu-lite/Burmese": 0.63 } }, { "id": "google/gemma-3-4b-it-prompt", "name": "Gemma-3-4b-it (Prompt)", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 101.0, "bfcl/bfcl.overall.overall_accuracy": 19.62, "bfcl/bfcl.overall.total_cost_usd": 4.14, "bfcl/bfcl.overall.latency_mean_s": 4.69, "bfcl/bfcl.overall.latency_std_s": 9.53, "bfcl/bfcl.overall.latency_p95_s": 11.42, "bfcl/bfcl.non_live.ast_accuracy": 61.12, "bfcl/bfcl.non_live.simple_ast_accuracy": 64.5, "bfcl/bfcl.non_live.multiple_ast_accuracy": 88.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 56.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 36.0, "bfcl/bfcl.live.live_accuracy": 60.84, "bfcl/bfcl.live.live_simple_ast_accuracy": 70.93, "bfcl/bfcl.live.live_multiple_ast_accuracy": 59.35, "bfcl/bfcl.live.live_parallel_ast_accuracy": 25.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 41.67, "bfcl/bfcl.multi_turn.accuracy": 0.38, "bfcl/bfcl.multi_turn.base_accuracy": 0.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 0.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 0.5, "bfcl/bfcl.web_search.accuracy": 1.0, "bfcl/bfcl.web_search.base_accuracy": 1.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 1.0, "bfcl/bfcl.memory.accuracy": 8.6, "bfcl/bfcl.memory.kv_accuracy": 9.68, "bfcl/bfcl.memory.vector_accuracy": 9.68, "bfcl/bfcl.memory.recursive_summarization_accuracy": 6.45, "bfcl/bfcl.relevance.relevance_detection_accuracy": 100.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 53.94, "bfcl/bfcl.format_sensitivity.max_delta": 69.5, "bfcl/bfcl.format_sensitivity.stddev": 23.67 } }, { "id": "google/gemma-7b", "name": "Gemma 7B", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.336, "helm_lite/NarrativeQA": 0.752, "helm_lite/NaturalQuestions (closed-book)": 0.336, "helm_lite/OpenbookQA": 0.808, "helm_lite/MMLU": 0.571, "helm_lite/MATH": 0.5, "helm_lite/GSM8K": 0.559, "helm_lite/LegalBench": 0.581, "helm_lite/MedQA": 0.513, "helm_lite/WMT 2014": 0.187, "helm_mmlu/MMLU All Subjects": 0.661, "helm_mmlu/Abstract Algebra": 0.28, "helm_mmlu/Anatomy": 0.563, "helm_mmlu/College Physics": 0.412, "helm_mmlu/Computer Security": 0.75, "helm_mmlu/Econometrics": 0.474, "helm_mmlu/Global Facts": 0.42, "helm_mmlu/Jurisprudence": 0.769, "helm_mmlu/Philosophy": 0.727, "helm_mmlu/Professional Psychology": 0.712, "helm_mmlu/Us Foreign Policy": 0.87, "helm_mmlu/Astronomy": 0.717, "helm_mmlu/Business Ethics": 0.65, "helm_mmlu/Clinical Knowledge": 0.698, "helm_mmlu/Conceptual Physics": 0.621, "helm_mmlu/Electrical Engineering": 0.628, "helm_mmlu/Elementary Mathematics": 0.516, "helm_mmlu/Formal Logic": 0.508, "helm_mmlu/High School World History": 0.857, "helm_mmlu/Human Sexuality": 0.733, "helm_mmlu/International Law": 0.835, "helm_mmlu/Logical Fallacies": 0.742, "helm_mmlu/Machine Learning": 0.554, "helm_mmlu/Management": 0.864, "helm_mmlu/Marketing": 0.885, "helm_mmlu/Medical Genetics": 0.7, "helm_mmlu/Miscellaneous": 0.838, "helm_mmlu/Moral Scenarios": 0.377, "helm_mmlu/Nutrition": 0.778, "helm_mmlu/Prehistory": 0.756, "helm_mmlu/Public Relations": 0.682, "helm_mmlu/Security Studies": 0.735, "helm_mmlu/Sociology": 0.841, "helm_mmlu/Virology": 0.548, "helm_mmlu/World Religions": 0.842, "helm_mmlu/Mean win rate": 0.824, "hfopenllm_v2/IFEval": 0.2659, "hfopenllm_v2/BBH": 0.4362, "hfopenllm_v2/MATH Level 5": 0.074, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4062, "hfopenllm_v2/MMLU-PRO": 0.2948 } }, { "id": "google/gemma-7b-it", "name": "gemma-7b-it", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3868, "hfopenllm_v2/BBH": 0.3646, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4274, "hfopenllm_v2/MMLU-PRO": 0.1695 } }, { "id": "google/mt5-base", "name": "mt5-base", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1645, "hfopenllm_v2/BBH": 0.2883, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2391, "hfopenllm_v2/MUSR": 0.3672, "hfopenllm_v2/MMLU-PRO": 0.107 } }, { "id": "google/mt5-small", "name": "mt5-small", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1718, "hfopenllm_v2/BBH": 0.2766, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2424, "hfopenllm_v2/MUSR": 0.3857, "hfopenllm_v2/MMLU-PRO": 0.1123 } }, { "id": "google/mt5-xl", "name": "mt5-xl", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.196, "hfopenllm_v2/BBH": 0.3047, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3795, "hfopenllm_v2/MMLU-PRO": 0.112 } }, { "id": "google/mt5-xxl", "name": "mt5-xxl", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2358, "hfopenllm_v2/BBH": 0.2959, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2416, "hfopenllm_v2/MUSR": 0.3689, "hfopenllm_v2/MMLU-PRO": 0.1089 } }, { "id": "google/Palmyra-X-43B", "name": "Palmyra X 43B", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.732, "helm_classic/MMLU": 0.609, "helm_classic/BoolQ": 0.896, "helm_classic/NarrativeQA": 0.742, "helm_classic/NaturalQuestions (open-book)": -1.0, "helm_classic/QuAC": 0.473, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.616, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": 0.049, "helm_classic/XSUM": 0.149, "helm_classic/IMDB": 0.935, "helm_classic/CivilComments": 0.008, "helm_classic/RAFT": 0.701 } }, { "id": "google/recurrentgemma-2b", "name": "recurrentgemma-2b", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3017, "hfopenllm_v2/BBH": 0.3197, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.3446, "hfopenllm_v2/MMLU-PRO": 0.1176 } }, { "id": "google/recurrentgemma-2b-it", "name": "recurrentgemma-2b-it", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2949, "hfopenllm_v2/BBH": 0.333, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3341, "hfopenllm_v2/MMLU-PRO": 0.1402 } }, { "id": "google/recurrentgemma-9b", "name": "recurrentgemma-9b", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3116, "hfopenllm_v2/BBH": 0.3956, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.3803, "hfopenllm_v2/MMLU-PRO": 0.2605 } }, { "id": "google/recurrentgemma-9b-it", "name": "recurrentgemma-9b-it", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.501, "hfopenllm_v2/BBH": 0.4367, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.4379, "hfopenllm_v2/MMLU-PRO": 0.2843 } }, { "id": "google/switch-base-8", "name": "switch-base-8", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1585, "hfopenllm_v2/BBH": 0.2876, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3517, "hfopenllm_v2/MMLU-PRO": 0.1098 } }, { "id": "google/T5-11B", "name": "T5 11B", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.131, "helm_classic/MMLU": 0.29, "helm_classic/BoolQ": 0.761, "helm_classic/NarrativeQA": 0.086, "helm_classic/NaturalQuestions (open-book)": 0.477, "helm_classic/QuAC": 0.116, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.133, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": 0.043, "helm_classic/XSUM": 0.015, "helm_classic/IMDB": 0.379, "helm_classic/CivilComments": 0.509, "helm_classic/RAFT": 0.37 } }, { "id": "google/text-bison@001", "name": "PaLM-2 Bison", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.526, "helm_lite/NarrativeQA": 0.718, "helm_lite/NaturalQuestions (closed-book)": 0.39, "helm_lite/OpenbookQA": 0.878, "helm_lite/MMLU": 0.608, "helm_lite/MATH": 0.421, "helm_lite/GSM8K": 0.61, "helm_lite/LegalBench": 0.645, "helm_lite/MedQA": 0.547, "helm_lite/WMT 2014": 0.241, "helm_mmlu/MMLU All Subjects": 0.692, "helm_mmlu/Abstract Algebra": 0.39, "helm_mmlu/Anatomy": 0.644, "helm_mmlu/College Physics": 0.51, "helm_mmlu/Computer Security": 0.74, "helm_mmlu/Econometrics": 0.518, "helm_mmlu/Global Facts": 0.38, "helm_mmlu/Jurisprudence": 0.769, "helm_mmlu/Philosophy": 0.736, "helm_mmlu/Professional Psychology": 0.761, "helm_mmlu/Us Foreign Policy": 0.87, "helm_mmlu/Astronomy": 0.803, "helm_mmlu/Business Ethics": 0.76, "helm_mmlu/Clinical Knowledge": 0.725, "helm_mmlu/Conceptual Physics": 0.694, "helm_mmlu/Electrical Engineering": 0.69, "helm_mmlu/Elementary Mathematics": 0.487, "helm_mmlu/Formal Logic": 0.5, "helm_mmlu/High School World History": 0.869, "helm_mmlu/Human Sexuality": 0.84, "helm_mmlu/International Law": 0.835, "helm_mmlu/Logical Fallacies": 0.853, "helm_mmlu/Machine Learning": 0.562, "helm_mmlu/Management": 0.893, "helm_mmlu/Marketing": 0.893, "helm_mmlu/Medical Genetics": 0.75, "helm_mmlu/Miscellaneous": 0.866, "helm_mmlu/Moral Scenarios": 0.369, "helm_mmlu/Nutrition": 0.709, "helm_mmlu/Prehistory": 0.812, "helm_mmlu/Public Relations": 0.691, "helm_mmlu/Security Studies": 0.812, "helm_mmlu/Sociology": 0.92, "helm_mmlu/Virology": 0.494, "helm_mmlu/World Religions": 0.883, "helm_mmlu/Mean win rate": 0.192 } }, { "id": "google/text-unicorn@001", "name": "PaLM-2 Unicorn", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.644, "helm_lite/NarrativeQA": 0.583, "helm_lite/NaturalQuestions (closed-book)": 0.435, "helm_lite/OpenbookQA": 0.938, "helm_lite/MMLU": 0.702, "helm_lite/MATH": 0.674, "helm_lite/GSM8K": 0.831, "helm_lite/LegalBench": 0.677, "helm_lite/MedQA": 0.684, "helm_lite/WMT 2014": 0.26, "helm_mmlu/MMLU All Subjects": 0.786, "helm_mmlu/Abstract Algebra": 0.51, "helm_mmlu/Anatomy": 0.733, "helm_mmlu/College Physics": 0.549, "helm_mmlu/Computer Security": 0.77, "helm_mmlu/Econometrics": 0.649, "helm_mmlu/Global Facts": 0.53, "helm_mmlu/Jurisprudence": 0.88, "helm_mmlu/Philosophy": 0.836, "helm_mmlu/Professional Psychology": 0.858, "helm_mmlu/Us Foreign Policy": 0.96, "helm_mmlu/Astronomy": 0.862, "helm_mmlu/Business Ethics": 0.83, "helm_mmlu/Clinical Knowledge": 0.804, "helm_mmlu/Conceptual Physics": 0.809, "helm_mmlu/Electrical Engineering": 0.772, "helm_mmlu/Elementary Mathematics": 0.661, "helm_mmlu/Formal Logic": 0.659, "helm_mmlu/High School World History": 0.911, "helm_mmlu/Human Sexuality": 0.924, "helm_mmlu/International Law": 0.909, "helm_mmlu/Logical Fallacies": 0.877, "helm_mmlu/Machine Learning": 0.625, "helm_mmlu/Management": 0.903, "helm_mmlu/Marketing": 0.94, "helm_mmlu/Medical Genetics": 0.83, "helm_mmlu/Miscellaneous": 0.894, "helm_mmlu/Moral Scenarios": 0.562, "helm_mmlu/Nutrition": 0.856, "helm_mmlu/Prehistory": 0.87, "helm_mmlu/Public Relations": 0.773, "helm_mmlu/Security Studies": 0.829, "helm_mmlu/Sociology": 0.91, "helm_mmlu/Virology": 0.572, "helm_mmlu/World Religions": 0.877, "helm_mmlu/Mean win rate": 0.142 } }, { "id": "google/UL2-20B", "name": "UL2 20B", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.167, "helm_classic/MMLU": 0.291, "helm_classic/BoolQ": 0.746, "helm_classic/NarrativeQA": 0.083, "helm_classic/NaturalQuestions (open-book)": 0.349, "helm_classic/QuAC": 0.144, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.193, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": 0.03, "helm_classic/XSUM": 0.058, "helm_classic/IMDB": 0.337, "helm_classic/CivilComments": 0.521, "helm_classic/RAFT": 0.404 } }, { "id": "google/umt5-base", "name": "umt5-base", "developer": "Google", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1746, "hfopenllm_v2/BBH": 0.2788, "hfopenllm_v2/MATH Level 5": 0.0045, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3382, "hfopenllm_v2/MMLU-PRO": 0.1078 } }, { "id": "GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct", "name": "gemma2-9b-cpt-sahabatai-v1-instruct", "developer": "GoToCompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6551, "hfopenllm_v2/BBH": 0.5955, "hfopenllm_v2/MATH Level 5": 0.2054, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.4779, "hfopenllm_v2/MMLU-PRO": 0.4264 } }, { "id": "GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct", "name": "llama3-8b-cpt-sahabatai-v1-instruct", "developer": "GoToCompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5238, "hfopenllm_v2/BBH": 0.4951, "hfopenllm_v2/MATH Level 5": 0.1276, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.4488, "hfopenllm_v2/MMLU-PRO": 0.3453 } }, { "id": "goulue5/merging_LLM", "name": "merging_LLM", "developer": "goulue5", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3233, "hfopenllm_v2/BBH": 0.4216, "hfopenllm_v2/MATH Level 5": 0.0967, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4333, "hfopenllm_v2/MMLU-PRO": 0.2958 } }, { "id": "gradientai/Llama-3-8B-Instruct-Gradient-1048k", "name": "Llama-3-8B-Instruct-Gradient-1048k", "developer": "gradientai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4456, "hfopenllm_v2/BBH": 0.4346, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4298, "hfopenllm_v2/MMLU-PRO": 0.294 } }, { "id": "GreenNode/GreenNode-small-9B-it", "name": "GreenNode-small-9B-it", "developer": "GreenNode", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7436, "hfopenllm_v2/BBH": 0.5994, "hfopenllm_v2/MATH Level 5": 0.1745, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4204, "hfopenllm_v2/MMLU-PRO": 0.3927 } }, { "id": "grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B", "name": "DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4797, "hfopenllm_v2/BBH": 0.5269, "hfopenllm_v2/MATH Level 5": 0.2221, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4408, "hfopenllm_v2/MMLU-PRO": 0.3957 } }, { "id": "grimjim/Gigantes-v1-gemma2-9b-it", "name": "Gigantes-v1-gemma2-9b-it", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6925, "hfopenllm_v2/BBH": 0.5978, "hfopenllm_v2/MATH Level 5": 0.2145, "hfopenllm_v2/GPQA": 0.3532, "hfopenllm_v2/MUSR": 0.4555, "hfopenllm_v2/MMLU-PRO": 0.4225 } }, { "id": "grimjim/Gigantes-v2-gemma2-9b-it", "name": "Gigantes-v2-gemma2-9b-it", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7351, "hfopenllm_v2/BBH": 0.5987, "hfopenllm_v2/MATH Level 5": 0.2017, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4595, "hfopenllm_v2/MMLU-PRO": 0.4259 } }, { "id": "grimjim/Gigantes-v3-gemma2-9b-it", "name": "Gigantes-v3-gemma2-9b-it", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6976, "hfopenllm_v2/BBH": 0.5984, "hfopenllm_v2/MATH Level 5": 0.21, "hfopenllm_v2/GPQA": 0.3565, "hfopenllm_v2/MUSR": 0.4608, "hfopenllm_v2/MMLU-PRO": 0.4226 } }, { "id": "grimjim/HuatuoSkywork-o1-Llama-3.1-8B", "name": "HuatuoSkywork-o1-Llama-3.1-8B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3961, "hfopenllm_v2/BBH": 0.4886, "hfopenllm_v2/MATH Level 5": 0.3882, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.3839, "hfopenllm_v2/MMLU-PRO": 0.3095 } }, { "id": "grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge", "name": "Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6806, "hfopenllm_v2/BBH": 0.5022, "hfopenllm_v2/MATH Level 5": 0.0891, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3885, "hfopenllm_v2/MMLU-PRO": 0.3684 } }, { "id": "grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge", "name": "Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4271, "hfopenllm_v2/BBH": 0.4962, "hfopenllm_v2/MATH Level 5": 0.0997, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4043, "hfopenllm_v2/MMLU-PRO": 0.3625 } }, { "id": "grimjim/llama-3-Nephilim-v1-8B", "name": "llama-3-Nephilim-v1-8B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4277, "hfopenllm_v2/BBH": 0.5132, "hfopenllm_v2/MATH Level 5": 0.0906, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4136, "hfopenllm_v2/MMLU-PRO": 0.3796 } }, { "id": "grimjim/llama-3-Nephilim-v2-8B", "name": "llama-3-Nephilim-v2-8B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3922, "hfopenllm_v2/BBH": 0.5048, "hfopenllm_v2/MATH Level 5": 0.1065, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.3895, "hfopenllm_v2/MMLU-PRO": 0.3641 } }, { "id": "grimjim/llama-3-Nephilim-v2.1-8B", "name": "llama-3-Nephilim-v2.1-8B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3895, "hfopenllm_v2/BBH": 0.5095, "hfopenllm_v2/MATH Level 5": 0.0997, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.3935, "hfopenllm_v2/MMLU-PRO": 0.3644 } }, { "id": "grimjim/llama-3-Nephilim-v3-8B", "name": "llama-3-Nephilim-v3-8B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4174, "hfopenllm_v2/BBH": 0.5013, "hfopenllm_v2/MATH Level 5": 0.0952, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.3989, "hfopenllm_v2/MMLU-PRO": 0.3612 } }, { "id": "grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter", "name": "Llama-3.1-8B-Instruct-abliterated_via_adapter", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.487, "hfopenllm_v2/BBH": 0.5105, "hfopenllm_v2/MATH Level 5": 0.1397, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.401, "hfopenllm_v2/MMLU-PRO": 0.3651 } }, { "id": "grimjim/Llama-3.1-Bonsaikraft-8B-Instruct", "name": "Llama-3.1-Bonsaikraft-8B-Instruct", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.425, "hfopenllm_v2/BBH": 0.5287, "hfopenllm_v2/MATH Level 5": 0.1314, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4235, "hfopenllm_v2/MMLU-PRO": 0.3764 } }, { "id": "grimjim/Llama-Nephilim-Metamorphosis-v2-8B", "name": "Llama-Nephilim-Metamorphosis-v2-8B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4545, "hfopenllm_v2/BBH": 0.5013, "hfopenllm_v2/MATH Level 5": 0.1397, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4091, "hfopenllm_v2/MMLU-PRO": 0.3809 } }, { "id": "grimjim/Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B", "name": "Llama3.1-SuperNovaLite-HuatuoSkywork-o1-8B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4366, "hfopenllm_v2/BBH": 0.5287, "hfopenllm_v2/MATH Level 5": 0.3006, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.3999, "hfopenllm_v2/MMLU-PRO": 0.3684 } }, { "id": "grimjim/Magnolia-v1-Gemma2-8k-9B", "name": "Magnolia-v1-Gemma2-8k-9B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3531, "hfopenllm_v2/BBH": 0.5589, "hfopenllm_v2/MATH Level 5": 0.1684, "hfopenllm_v2/GPQA": 0.3364, "hfopenllm_v2/MUSR": 0.4645, "hfopenllm_v2/MMLU-PRO": 0.4242 } }, { "id": "grimjim/Magnolia-v2-12B", "name": "Magnolia-v2-12B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3506, "hfopenllm_v2/BBH": 0.529, "hfopenllm_v2/MATH Level 5": 0.1292, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4171, "hfopenllm_v2/MMLU-PRO": 0.3601 } }, { "id": "grimjim/Magnolia-v2-Gemma2-8k-9B", "name": "Magnolia-v2-Gemma2-8k-9B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7384, "hfopenllm_v2/BBH": 0.6016, "hfopenllm_v2/MATH Level 5": 0.2281, "hfopenllm_v2/GPQA": 0.3574, "hfopenllm_v2/MUSR": 0.4488, "hfopenllm_v2/MMLU-PRO": 0.4332 } }, { "id": "grimjim/Magnolia-v3-12B", "name": "Magnolia-v3-12B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3965, "hfopenllm_v2/BBH": 0.5327, "hfopenllm_v2/MATH Level 5": 0.1352, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.4184, "hfopenllm_v2/MMLU-PRO": 0.3615 } }, { "id": "grimjim/Magnolia-v3-Gemma2-8k-9B", "name": "Magnolia-v3-Gemma2-8k-9B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7378, "hfopenllm_v2/BBH": 0.6015, "hfopenllm_v2/MATH Level 5": 0.2319, "hfopenllm_v2/GPQA": 0.3565, "hfopenllm_v2/MUSR": 0.4488, "hfopenllm_v2/MMLU-PRO": 0.4337 } }, { "id": "grimjim/Magnolia-v4-12B", "name": "Magnolia-v4-12B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3418, "hfopenllm_v2/BBH": 0.5431, "hfopenllm_v2/MATH Level 5": 0.1314, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4211, "hfopenllm_v2/MMLU-PRO": 0.3672 } }, { "id": "grimjim/Magnolia-v5a-12B", "name": "Magnolia-v5a-12B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4114, "hfopenllm_v2/BBH": 0.5312, "hfopenllm_v2/MATH Level 5": 0.1375, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.4145, "hfopenllm_v2/MMLU-PRO": 0.3601 } }, { "id": "grimjim/Magot-v1-Gemma2-8k-9B", "name": "Magot-v1-Gemma2-8k-9B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2997, "hfopenllm_v2/BBH": 0.6019, "hfopenllm_v2/MATH Level 5": 0.0989, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.4488, "hfopenllm_v2/MMLU-PRO": 0.4337 } }, { "id": "grimjim/Magot-v2-Gemma2-8k-9B", "name": "Magot-v2-Gemma2-8k-9B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7347, "hfopenllm_v2/BBH": 0.5897, "hfopenllm_v2/MATH Level 5": 0.2017, "hfopenllm_v2/GPQA": 0.354, "hfopenllm_v2/MUSR": 0.4344, "hfopenllm_v2/MMLU-PRO": 0.4223 } }, { "id": "grimjim/SauerHuatuoSkywork-o1-Llama-3.1-8B", "name": "SauerHuatuoSkywork-o1-Llama-3.1-8B", "developer": "grimjim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5219, "hfopenllm_v2/BBH": 0.5222, "hfopenllm_v2/MATH Level 5": 0.173, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4527, "hfopenllm_v2/MMLU-PRO": 0.3991 } }, { "id": "GritLM/GritLM-7B-KTO", "name": "GritLM-7B-KTO", "developer": "GritLM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.531, "hfopenllm_v2/BBH": 0.4853, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.371, "hfopenllm_v2/MMLU-PRO": 0.268 } }, { "id": "GritLM/GritLM-8x7B-KTO", "name": "GritLM-8x7B-KTO", "developer": "GritLM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5714, "hfopenllm_v2/BBH": 0.582, "hfopenllm_v2/MATH Level 5": 0.1224, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4217, "hfopenllm_v2/MMLU-PRO": 0.3648 } }, { "id": "Groq/Llama-3-Groq-8B-Tool-Use", "name": "Llama-3-Groq-8B-Tool-Use", "developer": "Groq", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6098, "hfopenllm_v2/BBH": 0.4863, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.366, "hfopenllm_v2/MMLU-PRO": 0.3399 } }, { "id": "Gryphe/Pantheon-RP-1.0-8b-Llama-3", "name": "Pantheon-RP-1.0-8b-Llama-3", "developer": "Gryphe", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3933, "hfopenllm_v2/BBH": 0.4539, "hfopenllm_v2/MATH Level 5": 0.0634, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3832, "hfopenllm_v2/MMLU-PRO": 0.3067 } }, { "id": "Gryphe/Pantheon-RP-1.5-12b-Nemo", "name": "Pantheon-RP-1.5-12b-Nemo", "developer": "Gryphe", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4763, "hfopenllm_v2/BBH": 0.5196, "hfopenllm_v2/MATH Level 5": 0.0491, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.442, "hfopenllm_v2/MMLU-PRO": 0.3302 } }, { "id": "Gryphe/Pantheon-RP-1.6-12b-Nemo", "name": "Pantheon-RP-1.6-12b-Nemo", "developer": "Gryphe", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4481, "hfopenllm_v2/BBH": 0.5204, "hfopenllm_v2/MATH Level 5": 0.0461, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4288, "hfopenllm_v2/MMLU-PRO": 0.3311 } }, { "id": "Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO", "name": "Pantheon-RP-1.6-12b-Nemo-KTO", "developer": "Gryphe", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4636, "hfopenllm_v2/BBH": 0.5277, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4248, "hfopenllm_v2/MMLU-PRO": 0.3382 } }, { "id": "Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small", "name": "Pantheon-RP-Pure-1.6.2-22b-Small", "developer": "Gryphe", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6931, "hfopenllm_v2/BBH": 0.5305, "hfopenllm_v2/MATH Level 5": 0.2024, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.3765, "hfopenllm_v2/MMLU-PRO": 0.3942 } }, { "id": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall", "name": "Nature-Reason-1.2-reallysmall", "developer": "GuilhermeNaturaUmana", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4985, "hfopenllm_v2/BBH": 0.5645, "hfopenllm_v2/MATH Level 5": 0.2576, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4373, "hfopenllm_v2/MMLU-PRO": 0.4429 } }, { "id": "Gunulhona/Gemma-Ko-Merge", "name": "Gemma-Ko-Merge", "developer": "Gunulhona", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6416, "hfopenllm_v2/BBH": 0.5813, "hfopenllm_v2/MATH Level 5": 0.1881, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.4047, "hfopenllm_v2/MMLU-PRO": 0.3879 } }, { "id": "Gunulhona/Gemma-Ko-Merge-PEFT", "name": "Gemma-Ko-Merge-PEFT", "developer": "Gunulhona", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.288, "hfopenllm_v2/BBH": 0.5154, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.408, "hfopenllm_v2/MMLU-PRO": 0.3817 } }, { "id": "gupta-tanish/llama-7b-dpo-baseline", "name": "llama-7b-dpo-baseline", "developer": "gupta-tanish", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2693, "hfopenllm_v2/BBH": 0.3897, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.4456, "hfopenllm_v2/MMLU-PRO": 0.2028 } }, { "id": "gz987/qwen2.5-7b-cabs-v0.1", "name": "qwen2.5-7b-cabs-v0.1", "developer": "gz987", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7506, "hfopenllm_v2/BBH": 0.5482, "hfopenllm_v2/MATH Level 5": 0.4796, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4376, "hfopenllm_v2/MMLU-PRO": 0.4406 } }, { "id": "gz987/qwen2.5-7b-cabs-v0.2", "name": "qwen2.5-7b-cabs-v0.2", "developer": "gz987", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7418, "hfopenllm_v2/BBH": 0.5516, "hfopenllm_v2/MATH Level 5": 0.4902, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4429, "hfopenllm_v2/MMLU-PRO": 0.4397 } }, { "id": "gz987/qwen2.5-7b-cabs-v0.3", "name": "qwen2.5-7b-cabs-v0.3", "developer": "gz987", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.757, "hfopenllm_v2/BBH": 0.5494, "hfopenllm_v2/MATH Level 5": 0.4932, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.443, "hfopenllm_v2/MMLU-PRO": 0.4402 } }, { "id": "gz987/qwen2.5-7b-cabs-v0.4", "name": "qwen2.5-7b-cabs-v0.4", "developer": "gz987", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7583, "hfopenllm_v2/BBH": 0.5524, "hfopenllm_v2/MATH Level 5": 0.4849, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.443, "hfopenllm_v2/MMLU-PRO": 0.4396 } }, { "id": "h2oai/h2o-danube-1.8b-chat", "name": "h2o-danube-1.8b-chat", "developer": "h2oai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2199, "hfopenllm_v2/BBH": 0.322, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3989, "hfopenllm_v2/MMLU-PRO": 0.1314 } }, { "id": "h2oai/h2o-danube3-4b-base", "name": "h2o-danube3-4b-base", "developer": "h2oai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2338, "hfopenllm_v2/BBH": 0.3599, "hfopenllm_v2/MATH Level 5": 0.0227, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.3778, "hfopenllm_v2/MMLU-PRO": 0.2109 } }, { "id": "h2oai/h2o-danube3-4b-chat", "name": "h2o-danube3-4b-chat", "developer": "h2oai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3629, "hfopenllm_v2/BBH": 0.3466, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3781, "hfopenllm_v2/MMLU-PRO": 0.2228 } }, { "id": "h2oai/h2o-danube3-500m-chat", "name": "h2o-danube3-500m-chat", "developer": "h2oai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2208, "hfopenllm_v2/BBH": 0.3035, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2307, "hfopenllm_v2/MUSR": 0.3434, "hfopenllm_v2/MMLU-PRO": 0.1144 } }, { "id": "h2oai/h2o-danube3.1-4b-chat", "name": "h2o-danube3.1-4b-chat", "developer": "h2oai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5021, "hfopenllm_v2/BBH": 0.3608, "hfopenllm_v2/MATH Level 5": 0.0332, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4102, "hfopenllm_v2/MMLU-PRO": 0.2719 } }, { "id": "haoranxu/ALMA-13B-R", "name": "ALMA-13B-R", "developer": "haoranxu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0039, "hfopenllm_v2/BBH": 0.3457, "hfopenllm_v2/MATH Level 5": 0.0174, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3528, "hfopenllm_v2/MMLU-PRO": 0.1817 } }, { "id": "haoranxu/Llama-3-Instruct-8B-CPO-SimPO", "name": "Llama-3-Instruct-8B-CPO-SimPO", "developer": "haoranxu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7046, "hfopenllm_v2/BBH": 0.5048, "hfopenllm_v2/MATH Level 5": 0.1027, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.3567, "hfopenllm_v2/MMLU-PRO": 0.3686 } }, { "id": "haoranxu/Llama-3-Instruct-8B-SimPO", "name": "Llama-3-Instruct-8B-SimPO", "developer": "haoranxu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7347, "hfopenllm_v2/BBH": 0.4979, "hfopenllm_v2/MATH Level 5": 0.0876, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3566, "hfopenllm_v2/MMLU-PRO": 0.3733 } }, { "id": "HarbingerX/Zeitgeist-3b-V1", "name": "Zeitgeist-3b-V1", "developer": "HarbingerX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6712, "hfopenllm_v2/BBH": 0.4441, "hfopenllm_v2/MATH Level 5": 0.1035, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.3579, "hfopenllm_v2/MMLU-PRO": 0.3009 } }, { "id": "HarbingerX/Zeitgeist-3b-V1.2", "name": "Zeitgeist-3b-V1.2", "developer": "HarbingerX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6754, "hfopenllm_v2/BBH": 0.4441, "hfopenllm_v2/MATH Level 5": 0.1012, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3579, "hfopenllm_v2/MMLU-PRO": 0.3056 } }, { "id": "Hastagaras/L3.2-JametMini-3B-MK.III", "name": "L3.2-JametMini-3B-MK.III", "developer": "Hastagaras", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6183, "hfopenllm_v2/BBH": 0.4539, "hfopenllm_v2/MATH Level 5": 0.1458, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.3686, "hfopenllm_v2/MMLU-PRO": 0.2983 } }, { "id": "Hastagaras/Llama-3.1-Jamet-8B-MK.I", "name": "Llama-3.1-Jamet-8B-MK.I", "developer": "Hastagaras", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7338, "hfopenllm_v2/BBH": 0.5049, "hfopenllm_v2/MATH Level 5": 0.1269, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3726, "hfopenllm_v2/MMLU-PRO": 0.3482 } }, { "id": "Hastagaras/Zabuza-8B-Llama-3.1", "name": "Zabuza-8B-Llama-3.1", "developer": "Hastagaras", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6265, "hfopenllm_v2/BBH": 0.4539, "hfopenllm_v2/MATH Level 5": 0.0551, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3568, "hfopenllm_v2/MMLU-PRO": 0.2923 } }, { "id": "hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc", "name": "qwen2.5-1.5b-sft-raft-grpo-hra-doc", "developer": "hatemmahmoud", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4196, "hfopenllm_v2/BBH": 0.427, "hfopenllm_v2/MATH Level 5": 0.2175, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.361, "hfopenllm_v2/MMLU-PRO": 0.2776 } }, { "id": "HelpingAI/Cipher-20B", "name": "Cipher-20B", "developer": "HelpingAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5378, "hfopenllm_v2/BBH": 0.6032, "hfopenllm_v2/MATH Level 5": 0.1994, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4003, "hfopenllm_v2/MMLU-PRO": 0.3744 } }, { "id": "HelpingAI/Dhanishtha-Large", "name": "Dhanishtha-Large", "developer": "HelpingAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2457, "hfopenllm_v2/BBH": 0.4604, "hfopenllm_v2/MATH Level 5": 0.3852, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.3845, "hfopenllm_v2/MMLU-PRO": 0.2755 } }, { "id": "HelpingAI/Priya-10B", "name": "Priya-10B", "developer": "HelpingAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4043, "hfopenllm_v2/BBH": 0.4441, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3793, "hfopenllm_v2/MMLU-PRO": 0.2493 } }, { "id": "HelpingAI/Priya-3B", "name": "Priya-3B", "developer": "HelpingAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4526, "hfopenllm_v2/BBH": 0.3961, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3713, "hfopenllm_v2/MMLU-PRO": 0.2339 } }, { "id": "hendrydong/Mistral-RM-for-RAFT-GSHF-v0", "name": "hendrydong/Mistral-RM-for-RAFT-GSHF-v0", "developer": "hendrydong", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7847, "reward-bench/Factuality": 0.5779, "reward-bench/Precise IF": 0.3625, "reward-bench/Math": 0.6011, "reward-bench/Safety": 0.85, "reward-bench/Focus": 0.6747, "reward-bench/Ties": 0.5988, "reward-bench/Chat": 0.9832, "reward-bench/Chat Hard": 0.5789, "reward-bench/Reasoning": 0.7434, "reward-bench/Prior Sets (0.5 weight)": 0.7508 } }, { "id": "HeraiHench/DeepSeek-R1-Qwen-Coder-8B", "name": "DeepSeek-R1-Qwen-Coder-8B", "developer": "HeraiHench", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1869, "hfopenllm_v2/BBH": 0.2913, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3738, "hfopenllm_v2/MMLU-PRO": 0.1123 } }, { "id": "HeraiHench/Double-Down-Qwen-Math-7B", "name": "Double-Down-Qwen-Math-7B", "developer": "HeraiHench", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.167, "hfopenllm_v2/BBH": 0.2845, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3737, "hfopenllm_v2/MMLU-PRO": 0.1112 } }, { "id": "HeraiHench/Marge-Qwen-Math-7B", "name": "Marge-Qwen-Math-7B", "developer": "HeraiHench", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1262, "hfopenllm_v2/BBH": 0.3069, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2391, "hfopenllm_v2/MUSR": 0.3939, "hfopenllm_v2/MMLU-PRO": 0.1056 } }, { "id": "HeraiHench/Phi-4-slerp-ReasoningRP-14B", "name": "Phi-4-slerp-ReasoningRP-14B", "developer": "HeraiHench", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1575, "hfopenllm_v2/BBH": 0.4196, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.3116, "hfopenllm_v2/MMLU-PRO": 0.19 } }, { "id": "HFXM/RAMO-Llama3.1-8B", "name": "HFXM/RAMO-Llama3.1-8B", "developer": "HFXM", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6917, "reward-bench/Factuality": 0.6547, "reward-bench/Precise IF": 0.375, "reward-bench/Math": 0.5628, "reward-bench/Safety": 0.9756, "reward-bench/Focus": 0.9071, "reward-bench/Ties": 0.6752 } }, { "id": "HiroseKoichi/Llama-Salad-4x8B-V3", "name": "Llama-Salad-4x8B-V3", "developer": "HiroseKoichi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6654, "hfopenllm_v2/BBH": 0.5245, "hfopenllm_v2/MATH Level 5": 0.0959, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.374, "hfopenllm_v2/MMLU-PRO": 0.3518 } }, { "id": "HoangHa/Pensez-Llama3.1-8B", "name": "Pensez-Llama3.1-8B", "developer": "HoangHa", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3887, "hfopenllm_v2/BBH": 0.4669, "hfopenllm_v2/MATH Level 5": 0.1148, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.3597, "hfopenllm_v2/MMLU-PRO": 0.3126 } }, { "id": "hon9kon9ize/CantoneseLLMChat-v0.5", "name": "CantoneseLLMChat-v0.5", "developer": "hon9kon9ize", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3231, "hfopenllm_v2/BBH": 0.4345, "hfopenllm_v2/MATH Level 5": 0.0415, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4706, "hfopenllm_v2/MMLU-PRO": 0.2504 } }, { "id": "hon9kon9ize/CantoneseLLMChat-v1.0-7B", "name": "CantoneseLLMChat-v1.0-7B", "developer": "hon9kon9ize", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4455, "hfopenllm_v2/BBH": 0.4866, "hfopenllm_v2/MATH Level 5": 0.2107, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.3883, "hfopenllm_v2/MMLU-PRO": 0.3785 } }, { "id": "hongbai12/li-0.4-pre", "name": "li-0.4-pre", "developer": "hongbai12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.52, "hfopenllm_v2/BBH": 0.6298, "hfopenllm_v2/MATH Level 5": 0.4924, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4513, "hfopenllm_v2/MMLU-PRO": 0.5015 } }, { "id": "hotmailuser/Deepseek-qwen-modelstock-2B", "name": "Deepseek-qwen-modelstock-2B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2149, "hfopenllm_v2/BBH": 0.3549, "hfopenllm_v2/MATH Level 5": 0.3399, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3475, "hfopenllm_v2/MMLU-PRO": 0.1911 } }, { "id": "hotmailuser/Falcon3Slerp1-10B", "name": "Falcon3Slerp1-10B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5694, "hfopenllm_v2/BBH": 0.617, "hfopenllm_v2/MATH Level 5": 0.2598, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.4318, "hfopenllm_v2/MMLU-PRO": 0.4402 } }, { "id": "hotmailuser/Falcon3Slerp2-10B", "name": "Falcon3Slerp2-10B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6118, "hfopenllm_v2/BBH": 0.6164, "hfopenllm_v2/MATH Level 5": 0.2319, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4096, "hfopenllm_v2/MMLU-PRO": 0.4369 } }, { "id": "hotmailuser/Falcon3Slerp4-10B", "name": "Falcon3Slerp4-10B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6072, "hfopenllm_v2/BBH": 0.6114, "hfopenllm_v2/MATH Level 5": 0.2289, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4017, "hfopenllm_v2/MMLU-PRO": 0.4387 } }, { "id": "hotmailuser/FalconSlerp-3B", "name": "FalconSlerp-3B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5695, "hfopenllm_v2/BBH": 0.4624, "hfopenllm_v2/MATH Level 5": 0.176, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3989, "hfopenllm_v2/MMLU-PRO": 0.2968 } }, { "id": "hotmailuser/FalconSlerp1-7B", "name": "FalconSlerp1-7B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5395, "hfopenllm_v2/BBH": 0.5355, "hfopenllm_v2/MATH Level 5": 0.2379, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4452, "hfopenllm_v2/MMLU-PRO": 0.4129 } }, { "id": "hotmailuser/FalconSlerp2-7B", "name": "FalconSlerp2-7B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.616, "hfopenllm_v2/BBH": 0.5538, "hfopenllm_v2/MATH Level 5": 0.2983, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4479, "hfopenllm_v2/MMLU-PRO": 0.4141 } }, { "id": "hotmailuser/FalconSlerp3-10B", "name": "FalconSlerp3-10B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6002, "hfopenllm_v2/BBH": 0.606, "hfopenllm_v2/MATH Level 5": 0.2273, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.4031, "hfopenllm_v2/MMLU-PRO": 0.4323 } }, { "id": "hotmailuser/FalconSlerp3-7B", "name": "FalconSlerp3-7B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6096, "hfopenllm_v2/BBH": 0.5533, "hfopenllm_v2/MATH Level 5": 0.3157, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4507, "hfopenllm_v2/MMLU-PRO": 0.4127 } }, { "id": "hotmailuser/FalconSlerp4-7B", "name": "FalconSlerp4-7B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6285, "hfopenllm_v2/BBH": 0.5524, "hfopenllm_v2/MATH Level 5": 0.2213, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.4585, "hfopenllm_v2/MMLU-PRO": 0.4032 } }, { "id": "hotmailuser/FalconSlerp6-7B", "name": "FalconSlerp6-7B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6027, "hfopenllm_v2/BBH": 0.5384, "hfopenllm_v2/MATH Level 5": 0.2047, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.4492, "hfopenllm_v2/MMLU-PRO": 0.3995 } }, { "id": "hotmailuser/Gemma2atlas-27B", "name": "Gemma2atlas-27B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7214, "hfopenllm_v2/BBH": 0.6545, "hfopenllm_v2/MATH Level 5": 0.2145, "hfopenllm_v2/GPQA": 0.3557, "hfopenllm_v2/MUSR": 0.4445, "hfopenllm_v2/MMLU-PRO": 0.475 } }, { "id": "hotmailuser/Gemma2Crono-27B", "name": "Gemma2Crono-27B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7086, "hfopenllm_v2/BBH": 0.6505, "hfopenllm_v2/MATH Level 5": 0.2424, "hfopenllm_v2/GPQA": 0.3708, "hfopenllm_v2/MUSR": 0.4567, "hfopenllm_v2/MMLU-PRO": 0.4633 } }, { "id": "hotmailuser/Gemma2magnum-27b", "name": "Gemma2magnum-27b", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5051, "hfopenllm_v2/BBH": 0.62, "hfopenllm_v2/MATH Level 5": 0.2205, "hfopenllm_v2/GPQA": 0.3851, "hfopenllm_v2/MUSR": 0.4723, "hfopenllm_v2/MMLU-PRO": 0.4596 } }, { "id": "hotmailuser/Gemma2SimPO-27B", "name": "Gemma2SimPO-27B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7222, "hfopenllm_v2/BBH": 0.6413, "hfopenllm_v2/MATH Level 5": 0.2817, "hfopenllm_v2/GPQA": 0.3582, "hfopenllm_v2/MUSR": 0.4447, "hfopenllm_v2/MMLU-PRO": 0.4642 } }, { "id": "hotmailuser/Llama-Hermes-slerp-8B", "name": "Llama-Hermes-slerp-8B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.339, "hfopenllm_v2/BBH": 0.531, "hfopenllm_v2/MATH Level 5": 0.0801, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4078, "hfopenllm_v2/MMLU-PRO": 0.3331 } }, { "id": "hotmailuser/Llama-Hermes-slerp2-8B", "name": "Llama-Hermes-slerp2-8B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3728, "hfopenllm_v2/BBH": 0.5265, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4248, "hfopenllm_v2/MMLU-PRO": 0.3379 } }, { "id": "hotmailuser/LlamaStock-8B", "name": "LlamaStock-8B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.425, "hfopenllm_v2/BBH": 0.5329, "hfopenllm_v2/MATH Level 5": 0.1699, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.4129, "hfopenllm_v2/MMLU-PRO": 0.3807 } }, { "id": "hotmailuser/Mistral-modelstock-24B", "name": "Mistral-modelstock-24B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3424, "hfopenllm_v2/BBH": 0.6452, "hfopenllm_v2/MATH Level 5": 0.1307, "hfopenllm_v2/GPQA": 0.4102, "hfopenllm_v2/MUSR": 0.459, "hfopenllm_v2/MMLU-PRO": 0.507 } }, { "id": "hotmailuser/Mistral-modelstock2-24B", "name": "Mistral-modelstock2-24B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4318, "hfopenllm_v2/BBH": 0.6689, "hfopenllm_v2/MATH Level 5": 0.2402, "hfopenllm_v2/GPQA": 0.3926, "hfopenllm_v2/MUSR": 0.4616, "hfopenllm_v2/MMLU-PRO": 0.5318 } }, { "id": "hotmailuser/Phi4-Slerp4-14B", "name": "Phi4-Slerp4-14B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0629, "hfopenllm_v2/BBH": 0.6731, "hfopenllm_v2/MATH Level 5": 0.3474, "hfopenllm_v2/GPQA": 0.3968, "hfopenllm_v2/MUSR": 0.5097, "hfopenllm_v2/MMLU-PRO": 0.5278 } }, { "id": "hotmailuser/Qwen2.5-HomerSlerp-7B", "name": "Qwen2.5-HomerSlerp-7B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4488, "hfopenllm_v2/BBH": 0.5633, "hfopenllm_v2/MATH Level 5": 0.3316, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4383, "hfopenllm_v2/MMLU-PRO": 0.4549 } }, { "id": "hotmailuser/QwenModelStock-1.8B", "name": "QwenModelStock-1.8B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3263, "hfopenllm_v2/BBH": 0.4188, "hfopenllm_v2/MATH Level 5": 0.0989, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4359, "hfopenllm_v2/MMLU-PRO": 0.2959 } }, { "id": "hotmailuser/QwenSlerp-14B", "name": "QwenSlerp-14B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7025, "hfopenllm_v2/BBH": 0.6491, "hfopenllm_v2/MATH Level 5": 0.3837, "hfopenllm_v2/GPQA": 0.3876, "hfopenllm_v2/MUSR": 0.4634, "hfopenllm_v2/MMLU-PRO": 0.54 } }, { "id": "hotmailuser/QwenSlerp-3B", "name": "QwenSlerp-3B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4334, "hfopenllm_v2/BBH": 0.4892, "hfopenllm_v2/MATH Level 5": 0.2749, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4317, "hfopenllm_v2/MMLU-PRO": 0.3693 } }, { "id": "hotmailuser/QwenSlerp-7B", "name": "QwenSlerp-7B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4673, "hfopenllm_v2/BBH": 0.5636, "hfopenllm_v2/MATH Level 5": 0.3444, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.4409, "hfopenllm_v2/MMLU-PRO": 0.4509 } }, { "id": "hotmailuser/QwenSlerp2-14B", "name": "QwenSlerp2-14B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7037, "hfopenllm_v2/BBH": 0.6493, "hfopenllm_v2/MATH Level 5": 0.3965, "hfopenllm_v2/GPQA": 0.3809, "hfopenllm_v2/MUSR": 0.4807, "hfopenllm_v2/MMLU-PRO": 0.5379 } }, { "id": "hotmailuser/QwenSlerp2-3B", "name": "QwenSlerp2-3B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.428, "hfopenllm_v2/BBH": 0.4802, "hfopenllm_v2/MATH Level 5": 0.2606, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4252, "hfopenllm_v2/MMLU-PRO": 0.3742 } }, { "id": "hotmailuser/QwenSlerp3-14B", "name": "QwenSlerp3-14B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6632, "hfopenllm_v2/BBH": 0.6267, "hfopenllm_v2/MATH Level 5": 0.4305, "hfopenllm_v2/GPQA": 0.3666, "hfopenllm_v2/MUSR": 0.4808, "hfopenllm_v2/MMLU-PRO": 0.5263 } }, { "id": "hotmailuser/QwenSparse-7B", "name": "QwenSparse-7B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1086, "hfopenllm_v2/BBH": 0.2896, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3562, "hfopenllm_v2/MMLU-PRO": 0.1122 } }, { "id": "hotmailuser/QwenStock-0.5B", "name": "QwenStock-0.5B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2049, "hfopenllm_v2/BBH": 0.2912, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3575, "hfopenllm_v2/MMLU-PRO": 0.1167 } }, { "id": "hotmailuser/QwenStock-1.7B", "name": "QwenStock-1.7B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3214, "hfopenllm_v2/BBH": 0.4188, "hfopenllm_v2/MATH Level 5": 0.0997, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.4412, "hfopenllm_v2/MMLU-PRO": 0.2955 } }, { "id": "hotmailuser/QwenStock1-14B", "name": "QwenStock1-14B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6693, "hfopenllm_v2/BBH": 0.6502, "hfopenllm_v2/MATH Level 5": 0.3701, "hfopenllm_v2/GPQA": 0.3859, "hfopenllm_v2/MUSR": 0.4781, "hfopenllm_v2/MMLU-PRO": 0.5416 } }, { "id": "hotmailuser/RombosBeagle-v2beta-MGS-32B", "name": "RombosBeagle-v2beta-MGS-32B", "developer": "hotmailuser", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5157, "hfopenllm_v2/BBH": 0.7037, "hfopenllm_v2/MATH Level 5": 0.4992, "hfopenllm_v2/GPQA": 0.38, "hfopenllm_v2/MUSR": 0.5021, "hfopenllm_v2/MMLU-PRO": 0.5908 } }, { "id": "HPAI-BSC/Llama3-Aloe-8B-Alpha", "name": "Llama3-Aloe-8B-Alpha", "developer": "HPAI-BSC", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5081, "hfopenllm_v2/BBH": 0.4831, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3673, "hfopenllm_v2/MMLU-PRO": 0.3295 } }, { "id": "HPAI-BSC/Llama3.1-Aloe-Beta-8B", "name": "Llama3.1-Aloe-Beta-8B", "developer": "HPAI-BSC", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7253, "hfopenllm_v2/BBH": 0.5093, "hfopenllm_v2/MATH Level 5": 0.1828, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3835, "hfopenllm_v2/MMLU-PRO": 0.358 } }, { "id": "HPAI-BSC/Qwen2.5-Aloe-Beta-7B", "name": "Qwen2.5-Aloe-Beta-7B", "developer": "HPAI-BSC", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4554, "hfopenllm_v2/BBH": 0.5049, "hfopenllm_v2/MATH Level 5": 0.3542, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.426, "hfopenllm_v2/MMLU-PRO": 0.4354 } }, { "id": "huawei-noah-ustc/toolace-2-8b-fc", "name": "ToolACE-2-8B (FC)", "developer": "huawei-noah-ustc", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 40.0, "bfcl/bfcl.overall.overall_accuracy": 42.44, "bfcl/bfcl.overall.total_cost_usd": 24.43, "bfcl/bfcl.overall.latency_mean_s": 15.95, "bfcl/bfcl.overall.latency_std_s": 40.06, "bfcl/bfcl.overall.latency_p95_s": 65.26, "bfcl/bfcl.non_live.ast_accuracy": 87.1, "bfcl/bfcl.non_live.simple_ast_accuracy": 73.42, "bfcl/bfcl.non_live.multiple_ast_accuracy": 91.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 93.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 91.0, "bfcl/bfcl.live.live_accuracy": 77.42, "bfcl/bfcl.live.live_simple_ast_accuracy": 71.32, "bfcl/bfcl.live.live_multiple_ast_accuracy": 79.39, "bfcl/bfcl.live.live_parallel_ast_accuracy": 68.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 62.5, "bfcl/bfcl.multi_turn.accuracy": 38.38, "bfcl/bfcl.multi_turn.base_accuracy": 49.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 28.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 30.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 46.0, "bfcl/bfcl.web_search.accuracy": 8.5, "bfcl/bfcl.web_search.base_accuracy": 13.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 4.0, "bfcl/bfcl.memory.accuracy": 18.49, "bfcl/bfcl.memory.kv_accuracy": 5.81, "bfcl/bfcl.memory.vector_accuracy": 16.13, "bfcl/bfcl.memory.recursive_summarization_accuracy": 33.55, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 90.79, "bfcl/bfcl.format_sensitivity.max_delta": 81.5, "bfcl/bfcl.format_sensitivity.stddev": 27.92 } }, { "id": "HuggingFaceH4/starchat2-15b-v0.1", "name": "HuggingFaceH4/starchat2-15b-v0.1", "developer": "HuggingFaceH4", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7322, "reward-bench/Chat": 0.9385, "reward-bench/Chat Hard": 0.5548, "reward-bench/Safety": 0.7095, "reward-bench/Reasoning": 0.8159, "reward-bench/Prior Sets (0.5 weight)": 0.5525 } }, { "id": "HuggingFaceH4/zephyr-7b-alpha", "name": "HuggingFaceH4/zephyr-7b-alpha", "developer": "HuggingFaceH4", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5191, "hfopenllm_v2/BBH": 0.4583, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.395, "hfopenllm_v2/MMLU-PRO": 0.2795, "reward-bench/Score": 0.7392, "reward-bench/Chat": 0.9162, "reward-bench/Chat Hard": 0.625, "reward-bench/Safety": 0.7662, "reward-bench/Reasoning": 0.7514, "reward-bench/Prior Sets (0.5 weight)": 0.5353 } }, { "id": "HuggingFaceH4/zephyr-7b-beta", "name": "HuggingFaceH4/zephyr-7b-beta", "developer": "HuggingFaceH4", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.495, "hfopenllm_v2/BBH": 0.4316, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3925, "hfopenllm_v2/MMLU-PRO": 0.2781, "reward-bench/Score": 0.7281, "reward-bench/Chat": 0.9525, "reward-bench/Chat Hard": 0.6272, "reward-bench/Safety": 0.6568, "reward-bench/Reasoning": 0.7789, "reward-bench/Prior Sets (0.5 weight)": 0.5216 } }, { "id": "HuggingFaceH4/zephyr-7b-gemma-v0.1", "name": "HuggingFaceH4/zephyr-7b-gemma-v0.1", "developer": "HuggingFaceH4", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3364, "hfopenllm_v2/BBH": 0.4624, "hfopenllm_v2/MATH Level 5": 0.0816, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.374, "hfopenllm_v2/MMLU-PRO": 0.2847, "reward-bench/Score": 0.6758, "reward-bench/Chat": 0.9581, "reward-bench/Chat Hard": 0.4956, "reward-bench/Safety": 0.5824, "reward-bench/Reasoning": 0.7463, "reward-bench/Prior Sets (0.5 weight)": 0.5171 } }, { "id": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", "name": "zephyr-orpo-141b-A35b-v0.1", "developer": "HuggingFaceH4", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6511, "hfopenllm_v2/BBH": 0.629, "hfopenllm_v2/MATH Level 5": 0.2047, "hfopenllm_v2/GPQA": 0.3784, "hfopenllm_v2/MUSR": 0.4465, "hfopenllm_v2/MMLU-PRO": 0.4586 } }, { "id": "HuggingFaceTB/SmolLM-1.7B", "name": "SmolLM-1.7B", "developer": "HuggingFaceTB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2362, "hfopenllm_v2/BBH": 0.3181, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2416, "hfopenllm_v2/MUSR": 0.3421, "hfopenllm_v2/MMLU-PRO": 0.1148 } }, { "id": "HuggingFaceTB/SmolLM-1.7B-Instruct", "name": "SmolLM-1.7B-Instruct", "developer": "HuggingFaceTB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2348, "hfopenllm_v2/BBH": 0.2885, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3487, "hfopenllm_v2/MMLU-PRO": 0.1166 } }, { "id": "HuggingFaceTB/SmolLM-135M", "name": "SmolLM-135M", "developer": "HuggingFaceTB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2125, "hfopenllm_v2/BBH": 0.3046, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.4366, "hfopenllm_v2/MMLU-PRO": 0.1122 } }, { "id": "HuggingFaceTB/SmolLM-135M-Instruct", "name": "SmolLM-135M-Instruct", "developer": "HuggingFaceTB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1214, "hfopenllm_v2/BBH": 0.3015, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3635, "hfopenllm_v2/MMLU-PRO": 0.1176 } }, { "id": "HuggingFaceTB/SmolLM-360M", "name": "SmolLM-360M", "developer": "HuggingFaceTB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2134, "hfopenllm_v2/BBH": 0.3065, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.4018, "hfopenllm_v2/MMLU-PRO": 0.1124 } }, { "id": "HuggingFaceTB/SmolLM-360M-Instruct", "name": "SmolLM-360M-Instruct", "developer": "HuggingFaceTB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1952, "hfopenllm_v2/BBH": 0.2885, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3472, "hfopenllm_v2/MMLU-PRO": 0.1166 } }, { "id": "HuggingFaceTB/SmolLM2-1.7B", "name": "SmolLM2-1.7B", "developer": "HuggingFaceTB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.244, "hfopenllm_v2/BBH": 0.3453, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3485, "hfopenllm_v2/MMLU-PRO": 0.2138 } }, { "id": "HuggingFaceTB/SmolLM2-1.7B-Instruct", "name": "SmolLM2-1.7B-Instruct", "developer": "HuggingFaceTB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5368, "hfopenllm_v2/BBH": 0.3599, "hfopenllm_v2/MATH Level 5": 0.0582, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3421, "hfopenllm_v2/MMLU-PRO": 0.2054 } }, { "id": "HuggingFaceTB/SmolLM2-135M", "name": "SmolLM2-135M", "developer": "HuggingFaceTB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1818, "hfopenllm_v2/BBH": 0.3044, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.4112, "hfopenllm_v2/MMLU-PRO": 0.1095 } }, { "id": "HuggingFaceTB/SmolLM2-135M-Instruct", "name": "SmolLM2-135M-Instruct", "developer": "HuggingFaceTB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0593, "hfopenllm_v2/BBH": 0.3135, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2341, "hfopenllm_v2/MUSR": 0.3871, "hfopenllm_v2/MMLU-PRO": 0.1092 } }, { "id": "HuggingFaceTB/SmolLM2-360M", "name": "SmolLM2-360M", "developer": "HuggingFaceTB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2115, "hfopenllm_v2/BBH": 0.3233, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.3954, "hfopenllm_v2/MMLU-PRO": 0.1169 } }, { "id": "HuggingFaceTB/SmolLM2-360M-Instruct", "name": "SmolLM2-360M-Instruct", "developer": "HuggingFaceTB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.083, "hfopenllm_v2/BBH": 0.3053, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3423, "hfopenllm_v2/MMLU-PRO": 0.1126 } }, { "id": "huggyllama/llama-13b", "name": "llama-13b", "developer": "huggyllama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2411, "hfopenllm_v2/BBH": 0.3988, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.255, "hfopenllm_v2/MUSR": 0.3462, "hfopenllm_v2/MMLU-PRO": 0.1952 } }, { "id": "huggyllama/llama-65b", "name": "llama-65b", "developer": "huggyllama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2526, "hfopenllm_v2/BBH": 0.4703, "hfopenllm_v2/MATH Level 5": 0.031, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3595, "hfopenllm_v2/MMLU-PRO": 0.3078 } }, { "id": "huggyllama/llama-7b", "name": "llama-7b", "developer": "huggyllama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2501, "hfopenllm_v2/BBH": 0.3277, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3354, "hfopenllm_v2/MMLU-PRO": 0.1313 } }, { "id": "huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2", "name": "DeepSeek-R1-Distill-Qwen-14B-abliterated-v2", "developer": "huihui-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4211, "hfopenllm_v2/BBH": 0.3487, "hfopenllm_v2/MATH Level 5": 0.2205, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.4701, "hfopenllm_v2/MMLU-PRO": 0.1915 } }, { "id": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2", "name": "Qwen2.5-14B-Instruct-abliterated-v2", "developer": "huihui-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8328, "hfopenllm_v2/BBH": 0.6324, "hfopenllm_v2/MATH Level 5": 0.5302, "hfopenllm_v2/GPQA": 0.3339, "hfopenllm_v2/MUSR": 0.422, "hfopenllm_v2/MMLU-PRO": 0.4962 } }, { "id": "huihui-ai/Qwen2.5-72B-Instruct-abliterated", "name": "Qwen2.5-72B-Instruct-abliterated", "developer": "huihui-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8593, "hfopenllm_v2/BBH": 0.719, "hfopenllm_v2/MATH Level 5": 0.6012, "hfopenllm_v2/GPQA": 0.3951, "hfopenllm_v2/MUSR": 0.4233, "hfopenllm_v2/MMLU-PRO": 0.5537 } }, { "id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated", "name": "Qwen2.5-7B-Instruct-abliterated", "developer": "huihui-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7546, "hfopenllm_v2/BBH": 0.5262, "hfopenllm_v2/MATH Level 5": 0.4577, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.3967, "hfopenllm_v2/MMLU-PRO": 0.418 } }, { "id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2", "name": "Qwen2.5-7B-Instruct-abliterated-v2", "developer": "huihui-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7606, "hfopenllm_v2/BBH": 0.5377, "hfopenllm_v2/MATH Level 5": 0.4637, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.3981, "hfopenllm_v2/MMLU-PRO": 0.4208 } }, { "id": "huihui-ai/QwQ-32B-Coder-Fusion-7030", "name": "QwQ-32B-Coder-Fusion-7030", "developer": "huihui-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3865, "hfopenllm_v2/BBH": 0.6178, "hfopenllm_v2/MATH Level 5": 0.2795, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.3922, "hfopenllm_v2/MMLU-PRO": 0.4368 } }, { "id": "huihui-ai/QwQ-32B-Coder-Fusion-8020", "name": "QwQ-32B-Coder-Fusion-8020", "developer": "huihui-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6021, "hfopenllm_v2/BBH": 0.6665, "hfopenllm_v2/MATH Level 5": 0.4592, "hfopenllm_v2/GPQA": 0.3549, "hfopenllm_v2/MUSR": 0.4293, "hfopenllm_v2/MMLU-PRO": 0.5367 } }, { "id": "huihui-ai/QwQ-32B-Coder-Fusion-9010", "name": "QwQ-32B-Coder-Fusion-9010", "developer": "huihui-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5778, "hfopenllm_v2/BBH": 0.6727, "hfopenllm_v2/MATH Level 5": 0.5317, "hfopenllm_v2/GPQA": 0.3616, "hfopenllm_v2/MUSR": 0.4682, "hfopenllm_v2/MMLU-PRO": 0.56 } }, { "id": "HumanLLMs/Humanish-LLama3-8B-Instruct", "name": "Humanish-LLama3-8B-Instruct", "developer": "HumanLLMs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6498, "hfopenllm_v2/BBH": 0.4968, "hfopenllm_v2/MATH Level 5": 0.1027, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3582, "hfopenllm_v2/MMLU-PRO": 0.3702 } }, { "id": "HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407", "name": "Humanish-Mistral-Nemo-Instruct-2407", "developer": "HumanLLMs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5451, "hfopenllm_v2/BBH": 0.5262, "hfopenllm_v2/MATH Level 5": 0.1367, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3968, "hfopenllm_v2/MMLU-PRO": 0.3521 } }, { "id": "HumanLLMs/Humanish-Qwen2.5-7B-Instruct", "name": "Humanish-Qwen2.5-7B-Instruct", "developer": "HumanLLMs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7284, "hfopenllm_v2/BBH": 0.5364, "hfopenllm_v2/MATH Level 5": 0.5, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.3981, "hfopenllm_v2/MMLU-PRO": 0.4398 } }, { "id": "huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3", "name": "wide_3b_orpo_stage1.1-ss1-orpo3", "developer": "huu-ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1505, "hfopenllm_v2/BBH": 0.2937, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3618, "hfopenllm_v2/MMLU-PRO": 0.1164 } }, { "id": "ibivibiv/colossus_120b", "name": "colossus_120b", "developer": "ibivibiv", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4276, "hfopenllm_v2/BBH": 0.6061, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4733, "hfopenllm_v2/MMLU-PRO": 0.3961 } }, { "id": "ibivibiv/multimaster-7b-v6", "name": "multimaster-7b-v6", "developer": "ibivibiv", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4473, "hfopenllm_v2/BBH": 0.5194, "hfopenllm_v2/MATH Level 5": 0.0559, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4396, "hfopenllm_v2/MMLU-PRO": 0.3095 } }, { "id": "ibm-granite/granite-3.0-1b-a400m-base", "name": "granite-3.0-1b-a400m-base", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2404, "hfopenllm_v2/BBH": 0.3221, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2475, "hfopenllm_v2/MUSR": 0.3367, "hfopenllm_v2/MMLU-PRO": 0.1152 } }, { "id": "ibm-granite/granite-3.0-1b-a400m-instruct", "name": "granite-3.0-1b-a400m-instruct", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3332, "hfopenllm_v2/BBH": 0.3224, "hfopenllm_v2/MATH Level 5": 0.0279, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3623, "hfopenllm_v2/MMLU-PRO": 0.1244 } }, { "id": "ibm-granite/granite-3.0-2b-base", "name": "granite-3.0-2b-base", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3874, "hfopenllm_v2/BBH": 0.4047, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3434, "hfopenllm_v2/MMLU-PRO": 0.2381 } }, { "id": "ibm-granite/granite-3.0-2b-instruct", "name": "granite-3.0-2b-instruct", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.514, "hfopenllm_v2/BBH": 0.4412, "hfopenllm_v2/MATH Level 5": 0.0921, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.3515, "hfopenllm_v2/MMLU-PRO": 0.2814 } }, { "id": "ibm-granite/granite-3.0-3b-a800m-base", "name": "granite-3.0-3b-a800m-base", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2732, "hfopenllm_v2/BBH": 0.3667, "hfopenllm_v2/MATH Level 5": 0.0483, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.342, "hfopenllm_v2/MMLU-PRO": 0.1891 } }, { "id": "ibm-granite/granite-3.0-3b-a800m-instruct", "name": "granite-3.0-3b-a800m-instruct", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4298, "hfopenllm_v2/BBH": 0.3753, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3487, "hfopenllm_v2/MMLU-PRO": 0.2152 } }, { "id": "ibm-granite/granite-3.0-8b-base", "name": "granite-3.0-8b-base", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4583, "hfopenllm_v2/BBH": 0.4944, "hfopenllm_v2/MATH Level 5": 0.1012, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.4081, "hfopenllm_v2/MMLU-PRO": 0.3313 } }, { "id": "ibm-granite/granite-3.0-8b-instruct", "name": "granite-3.0-8b-instruct", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.531, "hfopenllm_v2/BBH": 0.5192, "hfopenllm_v2/MATH Level 5": 0.142, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.3901, "hfopenllm_v2/MMLU-PRO": 0.3457 } }, { "id": "ibm-granite/granite-3.1-1b-a400m-base", "name": "granite-3.1-1b-a400m-base", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2519, "hfopenllm_v2/BBH": 0.3299, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3501, "hfopenllm_v2/MMLU-PRO": 0.1139 } }, { "id": "ibm-granite/granite-3.1-1b-a400m-instruct", "name": "granite-3.1-1b-a400m-instruct", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4686, "hfopenllm_v2/BBH": 0.328, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2399, "hfopenllm_v2/MUSR": 0.3302, "hfopenllm_v2/MMLU-PRO": 0.1217 } }, { "id": "ibm-granite/granite-3.1-2b-base", "name": "granite-3.1-2b-base", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3522, "hfopenllm_v2/BBH": 0.4047, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3486, "hfopenllm_v2/MMLU-PRO": 0.2251 } }, { "id": "ibm-granite/granite-3.1-2b-instruct", "name": "granite-3.1-2b-instruct", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6286, "hfopenllm_v2/BBH": 0.4409, "hfopenllm_v2/MATH Level 5": 0.1526, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.3605, "hfopenllm_v2/MMLU-PRO": 0.2819 } }, { "id": "ibm-granite/granite-3.1-3b-a800m-base", "name": "granite-3.1-3b-a800m-base", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2996, "hfopenllm_v2/BBH": 0.3628, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3275, "hfopenllm_v2/MMLU-PRO": 0.1793 } }, { "id": "ibm-granite/granite-3.1-3b-a800m-instruct", "name": "granite-3.1-3b-a800m-instruct", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5516, "hfopenllm_v2/BBH": 0.4009, "hfopenllm_v2/MATH Level 5": 0.114, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.3486, "hfopenllm_v2/MMLU-PRO": 0.2148 } }, { "id": "ibm-granite/granite-3.1-8b-base", "name": "granite-3.1-8b-base", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4221, "hfopenllm_v2/BBH": 0.4777, "hfopenllm_v2/MATH Level 5": 0.0944, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.3922, "hfopenllm_v2/MMLU-PRO": 0.3232 } }, { "id": "ibm-granite/granite-3.1-8b-instruct", "name": "granite-3.1-8b-instruct", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7208, "hfopenllm_v2/BBH": 0.5364, "hfopenllm_v2/MATH Level 5": 0.2198, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4707, "hfopenllm_v2/MMLU-PRO": 0.3537 } }, { "id": "ibm-granite/granite-3.2-2b-instruct", "name": "granite-3.2-2b-instruct", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6152, "hfopenllm_v2/BBH": 0.4387, "hfopenllm_v2/MATH Level 5": 0.1443, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3646, "hfopenllm_v2/MMLU-PRO": 0.2783 } }, { "id": "ibm-granite/granite-3.2-8b-instruct", "name": "granite-3.2-8b-instruct", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7275, "hfopenllm_v2/BBH": 0.5402, "hfopenllm_v2/MATH Level 5": 0.2379, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.4562, "hfopenllm_v2/MMLU-PRO": 0.3512 } }, { "id": "ibm-granite/granite-7b-base", "name": "granite-7b-base", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2414, "hfopenllm_v2/BBH": 0.348, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.3555, "hfopenllm_v2/MMLU-PRO": 0.1834 } }, { "id": "ibm-granite/granite-7b-instruct", "name": "granite-7b-instruct", "developer": "ibm-granite", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2972, "hfopenllm_v2/BBH": 0.3723, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.402, "hfopenllm_v2/MMLU-PRO": 0.2286 } }, { "id": "ibm/granite-20b-functioncalling-fc", "name": "Granite-20b-FunctionCalling (FC)", "developer": "ibm", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 93.0, "bfcl/bfcl.overall.overall_accuracy": 23.23, "bfcl/bfcl.overall.total_cost_usd": 5.23, "bfcl/bfcl.overall.latency_mean_s": 3.2, "bfcl/bfcl.overall.latency_std_s": 3.43, "bfcl/bfcl.overall.latency_p95_s": 9.97, "bfcl/bfcl.non_live.ast_accuracy": 82.35, "bfcl/bfcl.non_live.simple_ast_accuracy": 72.92, "bfcl/bfcl.non_live.multiple_ast_accuracy": 91.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 83.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 81.5, "bfcl/bfcl.live.live_accuracy": 58.7, "bfcl/bfcl.live.live_simple_ast_accuracy": 67.83, "bfcl/bfcl.live.live_multiple_ast_accuracy": 56.7, "bfcl/bfcl.live.live_parallel_ast_accuracy": 43.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 58.33, "bfcl/bfcl.multi_turn.accuracy": 5.38, "bfcl/bfcl.multi_turn.base_accuracy": 9.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 3.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 6.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 3.0, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 0.0, "bfcl/bfcl.memory.kv_accuracy": 0.0, "bfcl/bfcl.memory.vector_accuracy": 0.0, "bfcl/bfcl.memory.recursive_summarization_accuracy": 0.0, "bfcl/bfcl.relevance.relevance_detection_accuracy": 87.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 75.13 } }, { "id": "ibm/granite-3-1-8b-instruct-fc", "name": "Granite-3.1-8B-Instruct (FC)", "developer": "ibm", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 81.0, "bfcl/bfcl.overall.overall_accuracy": 27.1, "bfcl/bfcl.overall.total_cost_usd": 9.32, "bfcl/bfcl.overall.latency_mean_s": 13.23, "bfcl/bfcl.overall.latency_std_s": 31.28, "bfcl/bfcl.overall.latency_p95_s": 65.19, "bfcl/bfcl.non_live.ast_accuracy": 78.33, "bfcl/bfcl.non_live.simple_ast_accuracy": 67.33, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 84.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 70.0, "bfcl/bfcl.live.live_accuracy": 60.33, "bfcl/bfcl.live.live_simple_ast_accuracy": 58.53, "bfcl/bfcl.live.live_multiple_ast_accuracy": 61.82, "bfcl/bfcl.live.live_parallel_ast_accuracy": 18.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 41.67, "bfcl/bfcl.multi_turn.accuracy": 7.5, "bfcl/bfcl.multi_turn.base_accuracy": 11.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 2.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 7.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 9.0, "bfcl/bfcl.web_search.accuracy": 0.5, "bfcl/bfcl.web_search.base_accuracy": 1.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 14.41, "bfcl/bfcl.memory.kv_accuracy": 9.68, "bfcl/bfcl.memory.vector_accuracy": 7.1, "bfcl/bfcl.memory.recursive_summarization_accuracy": 26.45, "bfcl/bfcl.relevance.relevance_detection_accuracy": 68.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 79.98 } }, { "id": "ibm/granite-3-2-8b-instruct-fc", "name": "Granite-3.2-8B-Instruct (FC)", "developer": "ibm", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 83.0, "bfcl/bfcl.overall.overall_accuracy": 26.87, "bfcl/bfcl.overall.total_cost_usd": 25.02, "bfcl/bfcl.overall.latency_mean_s": 36.13, "bfcl/bfcl.overall.latency_std_s": 81.76, "bfcl/bfcl.overall.latency_p95_s": 216.28, "bfcl/bfcl.non_live.ast_accuracy": 79.77, "bfcl/bfcl.non_live.simple_ast_accuracy": 69.58, "bfcl/bfcl.non_live.multiple_ast_accuracy": 88.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 88.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 72.5, "bfcl/bfcl.live.live_accuracy": 60.33, "bfcl/bfcl.live.live_simple_ast_accuracy": 60.47, "bfcl/bfcl.live.live_multiple_ast_accuracy": 61.16, "bfcl/bfcl.live.live_parallel_ast_accuracy": 25.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 45.83, "bfcl/bfcl.multi_turn.accuracy": 7.38, "bfcl/bfcl.multi_turn.base_accuracy": 9.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 3.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 8.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 9.0, "bfcl/bfcl.web_search.accuracy": 0.5, "bfcl/bfcl.web_search.base_accuracy": 1.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 12.47, "bfcl/bfcl.memory.kv_accuracy": 6.45, "bfcl/bfcl.memory.vector_accuracy": 9.68, "bfcl/bfcl.memory.recursive_summarization_accuracy": 21.29, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 80.53 } }, { "id": "ibm/granite-3.3-8b-instruct", "name": "IBM Granite 3.3 8B Instruct", "developer": "ibm", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.463, "helm_capabilities/MMLU-Pro": 0.343, "helm_capabilities/GPQA": 0.325, "helm_capabilities/IFEval": 0.729, "helm_capabilities/WildBench": 0.741, "helm_capabilities/Omni-MATH": 0.176 } }, { "id": "ibm/granite-4-0-350m-fc", "name": "Granite-4.0-350m (FC)", "developer": "ibm", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 103.0, "bfcl/bfcl.overall.overall_accuracy": 18.98, "bfcl/bfcl.overall.total_cost_usd": 1.44, "bfcl/bfcl.overall.latency_mean_s": 1.74, "bfcl/bfcl.overall.latency_std_s": 4.85, "bfcl/bfcl.overall.latency_p95_s": 3.44, "bfcl/bfcl.non_live.ast_accuracy": 67.92, "bfcl/bfcl.non_live.simple_ast_accuracy": 61.67, "bfcl/bfcl.non_live.multiple_ast_accuracy": 84.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 70.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 55.5, "bfcl/bfcl.live.live_accuracy": 46.11, "bfcl/bfcl.live.live_simple_ast_accuracy": 61.24, "bfcl/bfcl.live.live_multiple_ast_accuracy": 42.36, "bfcl/bfcl.live.live_parallel_ast_accuracy": 68.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 33.33, "bfcl/bfcl.multi_turn.accuracy": 2.5, "bfcl/bfcl.multi_turn.base_accuracy": 5.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 2.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 2.0, "bfcl/bfcl.web_search.accuracy": 0.5, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 1.0, "bfcl/bfcl.memory.accuracy": 3.23, "bfcl/bfcl.memory.kv_accuracy": 1.94, "bfcl/bfcl.memory.vector_accuracy": 1.29, "bfcl/bfcl.memory.recursive_summarization_accuracy": 6.45, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 60.84 } }, { "id": "ibm/granite-4.0-h-small", "name": "granite-4.0-h-small", "developer": "ibm", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.7503, "global-mmlu-lite/Culturally Sensitive": 0.7182, "global-mmlu-lite/Culturally Agnostic": 0.7826, "global-mmlu-lite/Arabic": 0.7613, "global-mmlu-lite/English": 0.77, "global-mmlu-lite/Bengali": 0.7613, "global-mmlu-lite/German": 0.755, "global-mmlu-lite/French": 0.7594, "global-mmlu-lite/Hindi": 0.7575, "global-mmlu-lite/Indonesian": 0.7614, "global-mmlu-lite/Italian": 0.7525, "global-mmlu-lite/Japanese": 0.7406, "global-mmlu-lite/Korean": 0.7525, "global-mmlu-lite/Portuguese": 0.757, "global-mmlu-lite/Spanish": 0.7638, "global-mmlu-lite/Swahili": 0.7318, "global-mmlu-lite/Yoruba": 0.6921, "global-mmlu-lite/Chinese": 0.7475, "global-mmlu-lite/Burmese": 0.7419 } }, { "id": "ibm/merlinite-7b", "name": "merlinite-7b", "developer": "ibm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2499, "hfopenllm_v2/BBH": 0.5007, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4412, "hfopenllm_v2/MMLU-PRO": 0.3068 } }, { "id": "ibm/PowerLM-3b", "name": "PowerLM-3b", "developer": "ibm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3321, "hfopenllm_v2/BBH": 0.3679, "hfopenllm_v2/MATH Level 5": 0.0363, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3563, "hfopenllm_v2/MMLU-PRO": 0.2016 } }, { "id": "icefog72/Ice0.15-02.10-RP", "name": "Ice0.15-02.10-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5343, "hfopenllm_v2/BBH": 0.4976, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.432, "hfopenllm_v2/MMLU-PRO": 0.3066 } }, { "id": "icefog72/Ice0.16-02.10-RP", "name": "Ice0.16-02.10-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5069, "hfopenllm_v2/BBH": 0.4946, "hfopenllm_v2/MATH Level 5": 0.0589, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.4334, "hfopenllm_v2/MMLU-PRO": 0.3068 } }, { "id": "icefog72/Ice0.17-03.10-RP", "name": "Ice0.17-03.10-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5124, "hfopenllm_v2/BBH": 0.5007, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4334, "hfopenllm_v2/MMLU-PRO": 0.3085 } }, { "id": "icefog72/Ice0.27-06.11-RP", "name": "Ice0.27-06.11-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4918, "hfopenllm_v2/BBH": 0.5112, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4328, "hfopenllm_v2/MMLU-PRO": 0.3154 } }, { "id": "icefog72/Ice0.29-06.11-RP", "name": "Ice0.29-06.11-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4861, "hfopenllm_v2/BBH": 0.5088, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4459, "hfopenllm_v2/MMLU-PRO": 0.3093 } }, { "id": "icefog72/Ice0.31-08.11-RP", "name": "Ice0.31-08.11-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5146, "hfopenllm_v2/BBH": 0.5032, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4277, "hfopenllm_v2/MMLU-PRO": 0.3131 } }, { "id": "icefog72/Ice0.32-10.11-RP", "name": "Ice0.32-10.11-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4915, "hfopenllm_v2/BBH": 0.5048, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4382, "hfopenllm_v2/MMLU-PRO": 0.31 } }, { "id": "icefog72/Ice0.34b-14.11-RP", "name": "Ice0.34b-14.11-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4762, "hfopenllm_v2/BBH": 0.5067, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.442, "hfopenllm_v2/MMLU-PRO": 0.3125 } }, { "id": "icefog72/Ice0.34n-14.11-RP", "name": "Ice0.34n-14.11-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4787, "hfopenllm_v2/BBH": 0.5091, "hfopenllm_v2/MATH Level 5": 0.0725, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.438, "hfopenllm_v2/MMLU-PRO": 0.3124 } }, { "id": "icefog72/Ice0.37-18.11-RP", "name": "Ice0.37-18.11-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4972, "hfopenllm_v2/BBH": 0.5084, "hfopenllm_v2/MATH Level 5": 0.0642, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4339, "hfopenllm_v2/MMLU-PRO": 0.3143 } }, { "id": "icefog72/Ice0.38-19.11-RP", "name": "Ice0.38-19.11-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4403, "hfopenllm_v2/BBH": 0.5101, "hfopenllm_v2/MATH Level 5": 0.0551, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4367, "hfopenllm_v2/MMLU-PRO": 0.314 } }, { "id": "icefog72/Ice0.39-19.11-RP", "name": "Ice0.39-19.11-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4757, "hfopenllm_v2/BBH": 0.5093, "hfopenllm_v2/MATH Level 5": 0.0498, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4341, "hfopenllm_v2/MMLU-PRO": 0.3127 } }, { "id": "icefog72/Ice0.40-20.11-RP", "name": "Ice0.40-20.11-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4763, "hfopenllm_v2/BBH": 0.5093, "hfopenllm_v2/MATH Level 5": 0.0642, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4446, "hfopenllm_v2/MMLU-PRO": 0.3099 } }, { "id": "icefog72/Ice0.41-22.11-RP", "name": "Ice0.41-22.11-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.462, "hfopenllm_v2/BBH": 0.4723, "hfopenllm_v2/MATH Level 5": 0.031, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.456, "hfopenllm_v2/MMLU-PRO": 0.2618 } }, { "id": "icefog72/Ice0.50-16.01-RP", "name": "Ice0.50-16.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4385, "hfopenllm_v2/BBH": 0.498, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4381, "hfopenllm_v2/MMLU-PRO": 0.3069 } }, { "id": "icefog72/Ice0.50.1-16.01-RP", "name": "Ice0.50.1-16.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4829, "hfopenllm_v2/BBH": 0.5107, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4327, "hfopenllm_v2/MMLU-PRO": 0.3132 } }, { "id": "icefog72/Ice0.51-16.01-RP", "name": "Ice0.51-16.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4431, "hfopenllm_v2/BBH": 0.5044, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4437, "hfopenllm_v2/MMLU-PRO": 0.306 } }, { "id": "icefog72/Ice0.51.1-16.01-RP", "name": "Ice0.51.1-16.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4573, "hfopenllm_v2/BBH": 0.5121, "hfopenllm_v2/MATH Level 5": 0.0642, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4394, "hfopenllm_v2/MMLU-PRO": 0.3104 } }, { "id": "icefog72/Ice0.52-16.01-RP", "name": "Ice0.52-16.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4503, "hfopenllm_v2/BBH": 0.5047, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4396, "hfopenllm_v2/MMLU-PRO": 0.308 } }, { "id": "icefog72/Ice0.52.1-16.01-RP", "name": "Ice0.52.1-16.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4549, "hfopenllm_v2/BBH": 0.5106, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4394, "hfopenllm_v2/MMLU-PRO": 0.3105 } }, { "id": "icefog72/Ice0.53-16.01-RP", "name": "Ice0.53-16.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4741, "hfopenllm_v2/BBH": 0.5102, "hfopenllm_v2/MATH Level 5": 0.0634, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4327, "hfopenllm_v2/MMLU-PRO": 0.313 } }, { "id": "icefog72/Ice0.54-17.01-RP", "name": "Ice0.54-17.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4379, "hfopenllm_v2/BBH": 0.4853, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4874, "hfopenllm_v2/MMLU-PRO": 0.2326 } }, { "id": "icefog72/Ice0.55-17.01-RP", "name": "Ice0.55-17.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4961, "hfopenllm_v2/BBH": 0.5077, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4725, "hfopenllm_v2/MMLU-PRO": 0.2658 } }, { "id": "icefog72/Ice0.57-17.01-RP", "name": "Ice0.57-17.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5152, "hfopenllm_v2/BBH": 0.5064, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4686, "hfopenllm_v2/MMLU-PRO": 0.2651 } }, { "id": "icefog72/Ice0.60-18.01-RP", "name": "Ice0.60-18.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5374, "hfopenllm_v2/BBH": 0.5094, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.467, "hfopenllm_v2/MMLU-PRO": 0.2837 } }, { "id": "icefog72/Ice0.60.1-18.01-RP", "name": "Ice0.60.1-18.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5188, "hfopenllm_v2/BBH": 0.512, "hfopenllm_v2/MATH Level 5": 0.0461, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4498, "hfopenllm_v2/MMLU-PRO": 0.2914 } }, { "id": "icefog72/Ice0.61-18.01-RP", "name": "Ice0.61-18.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5441, "hfopenllm_v2/BBH": 0.5105, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4697, "hfopenllm_v2/MMLU-PRO": 0.2709 } }, { "id": "icefog72/Ice0.62-18.01-RP", "name": "Ice0.62-18.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5367, "hfopenllm_v2/BBH": 0.5103, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4538, "hfopenllm_v2/MMLU-PRO": 0.2877 } }, { "id": "icefog72/Ice0.62.1-24.01-RP", "name": "Ice0.62.1-24.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5182, "hfopenllm_v2/BBH": 0.5109, "hfopenllm_v2/MATH Level 5": 0.0559, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4551, "hfopenllm_v2/MMLU-PRO": 0.2871 } }, { "id": "icefog72/Ice0.64-24.01-RP", "name": "Ice0.64-24.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5441, "hfopenllm_v2/BBH": 0.506, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.462, "hfopenllm_v2/MMLU-PRO": 0.2933 } }, { "id": "icefog72/Ice0.64.1-24.01-RP", "name": "Ice0.64.1-24.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5447, "hfopenllm_v2/BBH": 0.506, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.462, "hfopenllm_v2/MMLU-PRO": 0.2933 } }, { "id": "icefog72/Ice0.65-25.01-RP", "name": "Ice0.65-25.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5029, "hfopenllm_v2/BBH": 0.5096, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.434, "hfopenllm_v2/MMLU-PRO": 0.2997 } }, { "id": "icefog72/Ice0.66-25.01-RP", "name": "Ice0.66-25.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5325, "hfopenllm_v2/BBH": 0.5129, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4434, "hfopenllm_v2/MMLU-PRO": 0.3039 } }, { "id": "icefog72/Ice0.67-25.01-RP", "name": "Ice0.67-25.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5361, "hfopenllm_v2/BBH": 0.5113, "hfopenllm_v2/MATH Level 5": 0.0748, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4279, "hfopenllm_v2/MMLU-PRO": 0.3097 } }, { "id": "icefog72/Ice0.68-25.01-RP", "name": "Ice0.68-25.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5514, "hfopenllm_v2/BBH": 0.513, "hfopenllm_v2/MATH Level 5": 0.0725, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4446, "hfopenllm_v2/MMLU-PRO": 0.3012 } }, { "id": "icefog72/Ice0.69-25.01-RP", "name": "Ice0.69-25.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5438, "hfopenllm_v2/BBH": 0.5098, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4486, "hfopenllm_v2/MMLU-PRO": 0.2965 } }, { "id": "icefog72/Ice0.7-29.09-RP", "name": "Ice0.7-29.09-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5176, "hfopenllm_v2/BBH": 0.5048, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.4238, "hfopenllm_v2/MMLU-PRO": 0.3127 } }, { "id": "icefog72/Ice0.70-25.01-RP", "name": "Ice0.70-25.01-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5498, "hfopenllm_v2/BBH": 0.5136, "hfopenllm_v2/MATH Level 5": 0.0597, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4512, "hfopenllm_v2/MMLU-PRO": 0.2996 } }, { "id": "icefog72/Ice0.70.1-01.02-RP", "name": "Ice0.70.1-01.02-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.507, "hfopenllm_v2/BBH": 0.506, "hfopenllm_v2/MATH Level 5": 0.034, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4599, "hfopenllm_v2/MMLU-PRO": 0.2749 } }, { "id": "icefog72/Ice0.73-01.02-RP", "name": "Ice0.73-01.02-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5292, "hfopenllm_v2/BBH": 0.5103, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4664, "hfopenllm_v2/MMLU-PRO": 0.2702 } }, { "id": "icefog72/Ice0.74-02.02-RP", "name": "Ice0.74-02.02-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2935, "hfopenllm_v2/BBH": 0.4646, "hfopenllm_v2/MATH Level 5": 0.0015, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.428, "hfopenllm_v2/MMLU-PRO": 0.2143 } }, { "id": "icefog72/Ice0.76-02.02-RP", "name": "Ice0.76-02.02-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4529, "hfopenllm_v2/BBH": 0.5086, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4362, "hfopenllm_v2/MMLU-PRO": 0.2652 } }, { "id": "icefog72/Ice0.77-02.02-RP", "name": "Ice0.77-02.02-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.531, "hfopenllm_v2/BBH": 0.5109, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4765, "hfopenllm_v2/MMLU-PRO": 0.2999 } }, { "id": "icefog72/Ice0.78-02.02-RP", "name": "Ice0.78-02.02-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4053, "hfopenllm_v2/BBH": 0.5002, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4686, "hfopenllm_v2/MMLU-PRO": 0.2955 } }, { "id": "icefog72/Ice0.80-03.02-RP", "name": "Ice0.80-03.02-RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5516, "hfopenllm_v2/BBH": 0.5098, "hfopenllm_v2/MATH Level 5": 0.0559, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.4923, "hfopenllm_v2/MMLU-PRO": 0.2912 } }, { "id": "icefog72/IceCocoaRP-7b", "name": "IceCocoaRP-7b", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4962, "hfopenllm_v2/BBH": 0.4938, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4198, "hfopenllm_v2/MMLU-PRO": 0.3098 } }, { "id": "icefog72/IceCoffeeRP-7b", "name": "IceCoffeeRP-7b", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4959, "hfopenllm_v2/BBH": 0.4889, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.416, "hfopenllm_v2/MMLU-PRO": 0.2975 } }, { "id": "icefog72/IceDrinkByFrankensteinV3RP", "name": "IceDrinkByFrankensteinV3RP", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4975, "hfopenllm_v2/BBH": 0.4833, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.4253, "hfopenllm_v2/MMLU-PRO": 0.2927 } }, { "id": "icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock", "name": "IceDrinkNameGoesHereRP-7b-Model_Stock", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4968, "hfopenllm_v2/BBH": 0.4658, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.4067, "hfopenllm_v2/MMLU-PRO": 0.2817 } }, { "id": "icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock", "name": "IceDrinkNameNotFoundRP-7b-Model_Stock", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.513, "hfopenllm_v2/BBH": 0.5026, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4372, "hfopenllm_v2/MMLU-PRO": 0.3064 } }, { "id": "icefog72/IceDrunkCherryRP-7b", "name": "IceDrunkCherryRP-7b", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4898, "hfopenllm_v2/BBH": 0.4847, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.4292, "hfopenllm_v2/MMLU-PRO": 0.3009 } }, { "id": "icefog72/IceDrunkenCherryRP-7b", "name": "IceDrunkenCherryRP-7b", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4763, "hfopenllm_v2/BBH": 0.5093, "hfopenllm_v2/MATH Level 5": 0.0642, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4446, "hfopenllm_v2/MMLU-PRO": 0.3099 } }, { "id": "icefog72/IceEspressoRPv2-7b", "name": "IceEspressoRPv2-7b", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4977, "hfopenllm_v2/BBH": 0.5055, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4331, "hfopenllm_v2/MMLU-PRO": 0.3061 } }, { "id": "icefog72/IceLemonTeaRP-32k-7b", "name": "IceLemonTeaRP-32k-7b", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5212, "hfopenllm_v2/BBH": 0.4997, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.429, "hfopenllm_v2/MMLU-PRO": 0.3068 } }, { "id": "icefog72/IceMartiniRP-7b", "name": "IceMartiniRP-7b", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5045, "hfopenllm_v2/BBH": 0.4972, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.4345, "hfopenllm_v2/MMLU-PRO": 0.3073 } }, { "id": "icefog72/IceNalyvkaRP-7b", "name": "IceNalyvkaRP-7b", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5498, "hfopenllm_v2/BBH": 0.5136, "hfopenllm_v2/MATH Level 5": 0.0597, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4512, "hfopenllm_v2/MMLU-PRO": 0.2996 } }, { "id": "icefog72/IceSakeRP-7b", "name": "IceSakeRP-7b", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5228, "hfopenllm_v2/BBH": 0.5119, "hfopenllm_v2/MATH Level 5": 0.0634, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.413, "hfopenllm_v2/MMLU-PRO": 0.3177 } }, { "id": "icefog72/IceSakeV4RP-7b", "name": "IceSakeV4RP-7b", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4634, "hfopenllm_v2/BBH": 0.493, "hfopenllm_v2/MATH Level 5": 0.0559, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4082, "hfopenllm_v2/MMLU-PRO": 0.3103 } }, { "id": "icefog72/IceSakeV6RP-7b", "name": "IceSakeV6RP-7b", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5033, "hfopenllm_v2/BBH": 0.4976, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.42, "hfopenllm_v2/MMLU-PRO": 0.3093 } }, { "id": "icefog72/IceSakeV8RP-7b", "name": "IceSakeV8RP-7b", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6086, "hfopenllm_v2/BBH": 0.4885, "hfopenllm_v2/MATH Level 5": 0.0597, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3993, "hfopenllm_v2/MMLU-PRO": 0.301 } }, { "id": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3", "name": "IceTea21EnergyDrinkRPV13-DPOv3", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5263, "hfopenllm_v2/BBH": 0.502, "hfopenllm_v2/MATH Level 5": 0.0582, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4372, "hfopenllm_v2/MMLU-PRO": 0.3056 } }, { "id": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5", "name": "IceTea21EnergyDrinkRPV13-DPOv3.5", "developer": "icefog72", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4871, "hfopenllm_v2/BBH": 0.44, "hfopenllm_v2/MATH Level 5": 0.0363, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.3964, "hfopenllm_v2/MMLU-PRO": 0.2498 } }, { "id": "IDEA-CCNL/Ziya-LLaMA-13B-v1", "name": "Ziya-LLaMA-13B-v1", "developer": "IDEA-CCNL", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1697, "hfopenllm_v2/BBH": 0.2877, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.3751, "hfopenllm_v2/MMLU-PRO": 0.1101 } }, { "id": "IDEA-CCNL/Ziya-LLaMA-7B-Reward", "name": "IDEA-CCNL/Ziya-LLaMA-7B-Reward", "developer": "IDEA-CCNL", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6378, "reward-bench/Chat": 0.8687, "reward-bench/Chat Hard": 0.4605, "reward-bench/Safety": 0.6405, "reward-bench/Reasoning": 0.5775, "reward-bench/Prior Sets (0.5 weight)": 0.6461 } }, { "id": "ifable/gemma-2-Ifable-9B", "name": "gemma-2-Ifable-9B", "developer": "ifable", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2984, "hfopenllm_v2/BBH": 0.5866, "hfopenllm_v2/MATH Level 5": 0.1397, "hfopenllm_v2/GPQA": 0.3414, "hfopenllm_v2/MUSR": 0.4053, "hfopenllm_v2/MMLU-PRO": 0.4226 } }, { "id": "iFaz/llama31_8B_en_emo_v4", "name": "llama31_8B_en_emo_v4", "developer": "iFaz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3043, "hfopenllm_v2/BBH": 0.4916, "hfopenllm_v2/MATH Level 5": 0.0884, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.3643, "hfopenllm_v2/MMLU-PRO": 0.3049 } }, { "id": "iFaz/llama32_1B_en_emo_v1", "name": "llama32_1B_en_emo_v1", "developer": "iFaz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4408, "hfopenllm_v2/BBH": 0.338, "hfopenllm_v2/MATH Level 5": 0.0378, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3489, "hfopenllm_v2/MMLU-PRO": 0.1761 } }, { "id": "iFaz/llama32_3B_en_emo_1000_stp", "name": "llama32_3B_en_emo_1000_stp", "developer": "iFaz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7295, "hfopenllm_v2/BBH": 0.4522, "hfopenllm_v2/MATH Level 5": 0.1465, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3621, "hfopenllm_v2/MMLU-PRO": 0.3123 } }, { "id": "iFaz/llama32_3B_en_emo_2000_stp", "name": "llama32_3B_en_emo_2000_stp", "developer": "iFaz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7369, "hfopenllm_v2/BBH": 0.4535, "hfopenllm_v2/MATH Level 5": 0.1533, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3527, "hfopenllm_v2/MMLU-PRO": 0.3098 } }, { "id": "iFaz/llama32_3B_en_emo_300_stp", "name": "llama32_3B_en_emo_300_stp", "developer": "iFaz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7256, "hfopenllm_v2/BBH": 0.4505, "hfopenllm_v2/MATH Level 5": 0.1601, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3621, "hfopenllm_v2/MMLU-PRO": 0.3148 } }, { "id": "iFaz/llama32_3B_en_emo_5000_stp", "name": "llama32_3B_en_emo_5000_stp", "developer": "iFaz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.71, "hfopenllm_v2/BBH": 0.4568, "hfopenllm_v2/MATH Level 5": 0.1292, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3446, "hfopenllm_v2/MMLU-PRO": 0.3067 } }, { "id": "iFaz/llama32_3B_en_emo_v2", "name": "llama32_3B_en_emo_v2", "developer": "iFaz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5454, "hfopenllm_v2/BBH": 0.4284, "hfopenllm_v2/MATH Level 5": 0.1088, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3482, "hfopenllm_v2/MMLU-PRO": 0.3004 } }, { "id": "iFaz/llama32_3B_en_emo_v3", "name": "llama32_3B_en_emo_v3", "developer": "iFaz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5759, "hfopenllm_v2/BBH": 0.4301, "hfopenllm_v2/MATH Level 5": 0.068, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3553, "hfopenllm_v2/MMLU-PRO": 0.271 } }, { "id": "ilsp/Llama-Krikri-8B-Instruct", "name": "Llama-Krikri-8B-Instruct", "developer": "ilsp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6079, "hfopenllm_v2/BBH": 0.5047, "hfopenllm_v2/MATH Level 5": 0.1178, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.408, "hfopenllm_v2/MMLU-PRO": 0.3313 } }, { "id": "IlyaGusev/gemma-2-2b-it-abliterated", "name": "gemma-2-2b-it-abliterated", "developer": "IlyaGusev", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5331, "hfopenllm_v2/BBH": 0.4119, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3782, "hfopenllm_v2/MMLU-PRO": 0.2538 } }, { "id": "IlyaGusev/gemma-2-9b-it-abliterated", "name": "gemma-2-9b-it-abliterated", "developer": "IlyaGusev", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7473, "hfopenllm_v2/BBH": 0.5906, "hfopenllm_v2/MATH Level 5": 0.1775, "hfopenllm_v2/GPQA": 0.3456, "hfopenllm_v2/MUSR": 0.4034, "hfopenllm_v2/MMLU-PRO": 0.3915 } }, { "id": "Infinirc/Infinirc-Llama3-8B-2G-Release-v1.0", "name": "Infinirc-Llama3-8B-2G-Release-v1.0", "developer": "Infinirc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2024, "hfopenllm_v2/BBH": 0.4351, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4609, "hfopenllm_v2/MMLU-PRO": 0.216 } }, { "id": "inflatebot/MN-12B-Mag-Mell-R1", "name": "MN-12B-Mag-Mell-R1", "developer": "inflatebot", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4613, "hfopenllm_v2/BBH": 0.5304, "hfopenllm_v2/MATH Level 5": 0.1299, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4002, "hfopenllm_v2/MMLU-PRO": 0.3438 } }, { "id": "infly/INF-ORM-Llama3.1-70B", "name": "infly/INF-ORM-Llama3.1-70B", "developer": "infly", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7648, "reward-bench/Chat": 0.9665, "reward-bench/Chat Hard": 0.9101, "reward-bench/Safety": 0.9644, "reward-bench/Reasoning": 0.9912, "reward-bench/Factuality": 0.7411, "reward-bench/Precise IF": 0.4188, "reward-bench/Math": 0.6995, "reward-bench/Focus": 0.903, "reward-bench/Ties": 0.8622 } }, { "id": "informatiker/Qwen2-7B-Instruct-abliterated", "name": "Qwen2-7B-Instruct-abliterated", "developer": "informatiker", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5822, "hfopenllm_v2/BBH": 0.5534, "hfopenllm_v2/MATH Level 5": 0.2636, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.3888, "hfopenllm_v2/MMLU-PRO": 0.3873 } }, { "id": "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0", "name": "BgGPT-Gemma-2-27B-IT-v1.0", "developer": "INSAIT-Institute", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0, "hfopenllm_v2/BBH": 0.2912, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3575, "hfopenllm_v2/MMLU-PRO": 0.1167 } }, { "id": "insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model", "name": "Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model", "developer": "insightfactory", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4588, "hfopenllm_v2/BBH": 0.4146, "hfopenllm_v2/MATH Level 5": 0.105, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3499, "hfopenllm_v2/MMLU-PRO": 0.296 } }, { "id": "instruction-pretrain/InstructLM-500M", "name": "InstructLM-500M", "developer": "instruction-pretrain", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1028, "hfopenllm_v2/BBH": 0.2941, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3528, "hfopenllm_v2/MMLU-PRO": 0.1141 } }, { "id": "Intel/neural-chat-7b-v3", "name": "neural-chat-7b-v3", "developer": "Intel", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2778, "hfopenllm_v2/BBH": 0.5048, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.5055, "hfopenllm_v2/MMLU-PRO": 0.2699 } }, { "id": "Intel/neural-chat-7b-v3-1", "name": "neural-chat-7b-v3-1", "developer": "Intel", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4687, "hfopenllm_v2/BBH": 0.5052, "hfopenllm_v2/MATH Level 5": 0.0355, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4979, "hfopenllm_v2/MMLU-PRO": 0.2678 } }, { "id": "Intel/neural-chat-7b-v3-2", "name": "neural-chat-7b-v3-2", "developer": "Intel", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4988, "hfopenllm_v2/BBH": 0.5032, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4895, "hfopenllm_v2/MMLU-PRO": 0.2667 } }, { "id": "Intel/neural-chat-7b-v3-3", "name": "neural-chat-7b-v3-3", "developer": "Intel", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4763, "hfopenllm_v2/BBH": 0.4877, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.486, "hfopenllm_v2/MMLU-PRO": 0.2625 } }, { "id": "internlm/internlm2-1_8b", "name": "internlm2-1_8b", "developer": "internlm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2198, "hfopenllm_v2/BBH": 0.388, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.3813, "hfopenllm_v2/MMLU-PRO": 0.1588 } }, { "id": "internlm/internlm2-1_8b-reward", "name": "internlm/internlm2-1_8b-reward", "developer": "internlm", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.3902, "reward-bench/Chat": 0.9358, "reward-bench/Chat Hard": 0.6623, "reward-bench/Safety": 0.4711, "reward-bench/Reasoning": 0.8724, "reward-bench/Factuality": 0.2758, "reward-bench/Precise IF": 0.3625, "reward-bench/Math": 0.4426, "reward-bench/Focus": 0.596, "reward-bench/Ties": 0.1934 } }, { "id": "internlm/internlm2-20b-reward", "name": "internlm/internlm2-20b-reward", "developer": "internlm", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9016, "reward-bench/Factuality": 0.5558, "reward-bench/Precise IF": 0.3625, "reward-bench/Math": 0.5738, "reward-bench/Safety": 0.8946, "reward-bench/Focus": 0.7253, "reward-bench/Ties": 0.5483, "reward-bench/Chat": 0.9888, "reward-bench/Chat Hard": 0.7654, "reward-bench/Reasoning": 0.9576 } }, { "id": "internlm/internlm2-7b", "name": "internlm2-7b", "developer": "internlm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.228, "hfopenllm_v2/BBH": 0.5825, "hfopenllm_v2/MATH Level 5": 0.0857, "hfopenllm_v2/GPQA": 0.3367, "hfopenllm_v2/MUSR": 0.44, "hfopenllm_v2/MMLU-PRO": 0.19 } }, { "id": "internlm/internlm2-7b-reward", "name": "internlm/internlm2-7b-reward", "developer": "internlm", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5335, "reward-bench/Chat": 0.9916, "reward-bench/Chat Hard": 0.6952, "reward-bench/Safety": 0.5956, "reward-bench/Reasoning": 0.9453, "reward-bench/Factuality": 0.4211, "reward-bench/Precise IF": 0.4, "reward-bench/Math": 0.5628, "reward-bench/Focus": 0.7051, "reward-bench/Ties": 0.5164 } }, { "id": "internlm/internlm2-chat-1_8b", "name": "internlm2-chat-1_8b", "developer": "internlm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2387, "hfopenllm_v2/BBH": 0.4452, "hfopenllm_v2/MATH Level 5": 0.0325, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3631, "hfopenllm_v2/MMLU-PRO": 0.1839 } }, { "id": "internlm/internlm2_5-1_8b-chat", "name": "internlm2_5-1_8b-chat", "developer": "internlm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3849, "hfopenllm_v2/BBH": 0.4489, "hfopenllm_v2/MATH Level 5": 0.1586, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3594, "hfopenllm_v2/MMLU-PRO": 0.1299 } }, { "id": "internlm/internlm2_5-20b-chat", "name": "internlm2_5-20b-chat", "developer": "internlm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.701, "hfopenllm_v2/BBH": 0.7474, "hfopenllm_v2/MATH Level 5": 0.4079, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4558, "hfopenllm_v2/MMLU-PRO": 0.3998 } }, { "id": "internlm/internlm2_5-7b-chat", "name": "internlm2_5-7b-chat", "developer": "internlm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5539, "hfopenllm_v2/BBH": 0.7073, "hfopenllm_v2/MATH Level 5": 0.253, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4594, "hfopenllm_v2/MMLU-PRO": 0.3777 } }, { "id": "intervitens/mini-magnum-12b-v1.1", "name": "mini-magnum-12b-v1.1", "developer": "intervitens", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5156, "hfopenllm_v2/BBH": 0.5062, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4004, "hfopenllm_v2/MMLU-PRO": 0.3291 } }, { "id": "IntervitensInc/internlm2_5-20b-llamafied", "name": "internlm2_5-20b-llamafied", "developer": "IntervitensInc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.341, "hfopenllm_v2/BBH": 0.7478, "hfopenllm_v2/MATH Level 5": 0.1715, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4475, "hfopenllm_v2/MMLU-PRO": 0.4051 } }, { "id": "inumulaisk/eval_model", "name": "eval_model", "developer": "inumulaisk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1931, "hfopenllm_v2/BBH": 0.3512, "hfopenllm_v2/MATH Level 5": 0.2976, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.358, "hfopenllm_v2/MMLU-PRO": 0.1664 } }, { "id": "invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp", "name": "Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp", "developer": "invalid-coder", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4555, "hfopenllm_v2/BBH": 0.5158, "hfopenllm_v2/MATH Level 5": 0.0491, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.3992, "hfopenllm_v2/MMLU-PRO": 0.3146 } }, { "id": "Invalid-Null/PeiYangMe-0.5", "name": "PeiYangMe-0.5", "developer": "Invalid-Null", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1409, "hfopenllm_v2/BBH": 0.2791, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2441, "hfopenllm_v2/MUSR": 0.3738, "hfopenllm_v2/MMLU-PRO": 0.1109 } }, { "id": "Invalid-Null/PeiYangMe-0.7", "name": "PeiYangMe-0.7", "developer": "Invalid-Null", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1491, "hfopenllm_v2/BBH": 0.3028, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2332, "hfopenllm_v2/MUSR": 0.3857, "hfopenllm_v2/MMLU-PRO": 0.1101 } }, { "id": "invisietch/EtherealRainbow-v0.2-8B", "name": "EtherealRainbow-v0.2-8B", "developer": "invisietch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3903, "hfopenllm_v2/BBH": 0.5102, "hfopenllm_v2/MATH Level 5": 0.0823, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.3827, "hfopenllm_v2/MMLU-PRO": 0.3653 } }, { "id": "invisietch/EtherealRainbow-v0.3-8B", "name": "EtherealRainbow-v0.3-8B", "developer": "invisietch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3682, "hfopenllm_v2/BBH": 0.5097, "hfopenllm_v2/MATH Level 5": 0.0763, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.3904, "hfopenllm_v2/MMLU-PRO": 0.3626 } }, { "id": "invisietch/MiS-Firefly-v0.2-22B", "name": "MiS-Firefly-v0.2-22B", "developer": "invisietch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5371, "hfopenllm_v2/BBH": 0.5514, "hfopenllm_v2/MATH Level 5": 0.1654, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4694, "hfopenllm_v2/MMLU-PRO": 0.362 } }, { "id": "invisietch/Nimbus-Miqu-v0.1-70B", "name": "Nimbus-Miqu-v0.1-70B", "developer": "invisietch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4647, "hfopenllm_v2/BBH": 0.601, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.3389, "hfopenllm_v2/MUSR": 0.4133, "hfopenllm_v2/MMLU-PRO": 0.3853 } }, { "id": "irahulpandey/mistralai-7B-slerp-v0.1", "name": "mistralai-7B-slerp-v0.1", "developer": "irahulpandey", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4966, "hfopenllm_v2/BBH": 0.5011, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.455, "hfopenllm_v2/MMLU-PRO": 0.2951 } }, { "id": "iRyanBell/ARC1", "name": "ARC1", "developer": "iRyanBell", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4411, "hfopenllm_v2/BBH": 0.4903, "hfopenllm_v2/MATH Level 5": 0.0687, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3991, "hfopenllm_v2/MMLU-PRO": 0.3371 } }, { "id": "iRyanBell/ARC1-II", "name": "ARC1-II", "developer": "iRyanBell", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1708, "hfopenllm_v2/BBH": 0.3382, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.4913, "hfopenllm_v2/MMLU-PRO": 0.1686 } }, { "id": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated", "name": "Josiefied-Qwen2.5-7B-Instruct-abliterated", "developer": "Isaak-Carter", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7317, "hfopenllm_v2/BBH": 0.5396, "hfopenllm_v2/MATH Level 5": 0.4924, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4087, "hfopenllm_v2/MMLU-PRO": 0.4276 } }, { "id": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", "name": "Josiefied-Qwen2.5-7B-Instruct-abliterated-v2", "developer": "Isaak-Carter", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7841, "hfopenllm_v2/BBH": 0.5311, "hfopenllm_v2/MATH Level 5": 0.4721, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4354, "hfopenllm_v2/MMLU-PRO": 0.4128 } }, { "id": "Isaak-Carter/JOSIEv4o-8b-stage1-v4", "name": "JOSIEv4o-8b-stage1-v4", "developer": "Isaak-Carter", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2477, "hfopenllm_v2/BBH": 0.4758, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.3641, "hfopenllm_v2/MMLU-PRO": 0.3292 } }, { "id": "J-LAB/Thynk_orpo", "name": "Thynk_orpo", "developer": "J-LAB", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2102, "hfopenllm_v2/BBH": 0.4463, "hfopenllm_v2/MATH Level 5": 0.148, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4515, "hfopenllm_v2/MMLU-PRO": 0.3231 } }, { "id": "JackFram/llama-160m", "name": "llama-160m", "developer": "JackFram", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1791, "hfopenllm_v2/BBH": 0.2888, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3792, "hfopenllm_v2/MMLU-PRO": 0.1128 } }, { "id": "JackFram/llama-68m", "name": "llama-68m", "developer": "JackFram", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1726, "hfopenllm_v2/BBH": 0.2936, "hfopenllm_v2/MATH Level 5": 0.006, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.391, "hfopenllm_v2/MMLU-PRO": 0.1144 } }, { "id": "Jacoby746/Casual-Magnum-34B", "name": "Casual-Magnum-34B", "developer": "Jacoby746", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.193, "hfopenllm_v2/BBH": 0.6032, "hfopenllm_v2/MATH Level 5": 0.0921, "hfopenllm_v2/GPQA": 0.3725, "hfopenllm_v2/MUSR": 0.4078, "hfopenllm_v2/MMLU-PRO": 0.5184 } }, { "id": "Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B", "name": "Inf-Silent-Kunoichi-v0.1-2x7B", "developer": "Jacoby746", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.388, "hfopenllm_v2/BBH": 0.5185, "hfopenllm_v2/MATH Level 5": 0.071, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.428, "hfopenllm_v2/MMLU-PRO": 0.3271 } }, { "id": "Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B", "name": "Inf-Silent-Kunoichi-v0.2-2x7B", "developer": "Jacoby746", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3636, "hfopenllm_v2/BBH": 0.5209, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.432, "hfopenllm_v2/MMLU-PRO": 0.3272 } }, { "id": "Jacoby746/Proto-Athena-4x7B", "name": "Proto-Athena-4x7B", "developer": "Jacoby746", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3703, "hfopenllm_v2/BBH": 0.5107, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4348, "hfopenllm_v2/MMLU-PRO": 0.3206 } }, { "id": "Jacoby746/Proto-Athena-v0.2-4x7B", "name": "Proto-Athena-v0.2-4x7B", "developer": "Jacoby746", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3752, "hfopenllm_v2/BBH": 0.5068, "hfopenllm_v2/MATH Level 5": 0.0634, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4213, "hfopenllm_v2/MMLU-PRO": 0.3197 } }, { "id": "Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B", "name": "Proto-Harpy-Blazing-Light-v0.1-2x7B", "developer": "Jacoby746", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4905, "hfopenllm_v2/BBH": 0.5187, "hfopenllm_v2/MATH Level 5": 0.0748, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.445, "hfopenllm_v2/MMLU-PRO": 0.3301 } }, { "id": "Jacoby746/Proto-Harpy-Spark-v0.1-7B", "name": "Proto-Harpy-Spark-v0.1-7B", "developer": "Jacoby746", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4333, "hfopenllm_v2/BBH": 0.4736, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4317, "hfopenllm_v2/MMLU-PRO": 0.3069 } }, { "id": "jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model", "name": "pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model", "developer": "jaredjoss", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1572, "hfopenllm_v2/BBH": 0.2863, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3607, "hfopenllm_v2/MMLU-PRO": 0.1169 } }, { "id": "jaspionjader/Auro-Kosmos-EVAA-v2-8B", "name": "Auro-Kosmos-EVAA-v2-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4778, "hfopenllm_v2/BBH": 0.5447, "hfopenllm_v2/MATH Level 5": 0.1412, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.425, "hfopenllm_v2/MMLU-PRO": 0.3858 } }, { "id": "jaspionjader/Auro-Kosmos-EVAA-v2.1-8B", "name": "Auro-Kosmos-EVAA-v2.1-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4666, "hfopenllm_v2/BBH": 0.5444, "hfopenllm_v2/MATH Level 5": 0.1458, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4317, "hfopenllm_v2/MMLU-PRO": 0.3826 } }, { "id": "jaspionjader/Auro-Kosmos-EVAA-v2.2-8B", "name": "Auro-Kosmos-EVAA-v2.2-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4268, "hfopenllm_v2/BBH": 0.5431, "hfopenllm_v2/MATH Level 5": 0.1412, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4251, "hfopenllm_v2/MMLU-PRO": 0.3798 } }, { "id": "jaspionjader/Auro-Kosmos-EVAA-v2.3-8B", "name": "Auro-Kosmos-EVAA-v2.3-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4271, "hfopenllm_v2/BBH": 0.5441, "hfopenllm_v2/MATH Level 5": 0.1344, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4278, "hfopenllm_v2/MMLU-PRO": 0.3784 } }, { "id": "jaspionjader/bbb-1", "name": "bbb-1", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4864, "hfopenllm_v2/BBH": 0.5376, "hfopenllm_v2/MATH Level 5": 0.1367, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4171, "hfopenllm_v2/MMLU-PRO": 0.3897 } }, { "id": "jaspionjader/bbb-2", "name": "bbb-2", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4077, "hfopenllm_v2/BBH": 0.5067, "hfopenllm_v2/MATH Level 5": 0.1125, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4145, "hfopenllm_v2/MMLU-PRO": 0.3635 } }, { "id": "jaspionjader/bbb-3", "name": "bbb-3", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4168, "hfopenllm_v2/BBH": 0.5158, "hfopenllm_v2/MATH Level 5": 0.1405, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4265, "hfopenllm_v2/MMLU-PRO": 0.3856 } }, { "id": "jaspionjader/bbb-4", "name": "bbb-4", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4768, "hfopenllm_v2/BBH": 0.5212, "hfopenllm_v2/MATH Level 5": 0.1276, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4092, "hfopenllm_v2/MMLU-PRO": 0.3773 } }, { "id": "jaspionjader/bbb-5", "name": "bbb-5", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4703, "hfopenllm_v2/BBH": 0.5207, "hfopenllm_v2/MATH Level 5": 0.1397, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.3998, "hfopenllm_v2/MMLU-PRO": 0.3834 } }, { "id": "jaspionjader/bbb-6", "name": "bbb-6", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.488, "hfopenllm_v2/BBH": 0.5211, "hfopenllm_v2/MATH Level 5": 0.139, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4052, "hfopenllm_v2/MMLU-PRO": 0.3871 } }, { "id": "jaspionjader/bbb-7", "name": "bbb-7", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4828, "hfopenllm_v2/BBH": 0.5211, "hfopenllm_v2/MATH Level 5": 0.1367, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4038, "hfopenllm_v2/MMLU-PRO": 0.386 } }, { "id": "jaspionjader/bh-1", "name": "bh-1", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4284, "hfopenllm_v2/BBH": 0.589, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4441, "hfopenllm_v2/MMLU-PRO": 0.3449 } }, { "id": "jaspionjader/bh-10", "name": "bh-10", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4618, "hfopenllm_v2/BBH": 0.5856, "hfopenllm_v2/MATH Level 5": 0.1103, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4199, "hfopenllm_v2/MMLU-PRO": 0.3708 } }, { "id": "jaspionjader/bh-11", "name": "bh-11", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4575, "hfopenllm_v2/BBH": 0.5851, "hfopenllm_v2/MATH Level 5": 0.1178, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4146, "hfopenllm_v2/MMLU-PRO": 0.3738 } }, { "id": "jaspionjader/bh-12", "name": "bh-12", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4734, "hfopenllm_v2/BBH": 0.5802, "hfopenllm_v2/MATH Level 5": 0.1186, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4145, "hfopenllm_v2/MMLU-PRO": 0.3737 } }, { "id": "jaspionjader/bh-13", "name": "bh-13", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4698, "hfopenllm_v2/BBH": 0.5778, "hfopenllm_v2/MATH Level 5": 0.1125, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4159, "hfopenllm_v2/MMLU-PRO": 0.373 } }, { "id": "jaspionjader/bh-15", "name": "bh-15", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4745, "hfopenllm_v2/BBH": 0.5819, "hfopenllm_v2/MATH Level 5": 0.1246, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4105, "hfopenllm_v2/MMLU-PRO": 0.3767 } }, { "id": "jaspionjader/bh-16", "name": "bh-16", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4731, "hfopenllm_v2/BBH": 0.5783, "hfopenllm_v2/MATH Level 5": 0.1193, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4159, "hfopenllm_v2/MMLU-PRO": 0.3776 } }, { "id": "jaspionjader/bh-17", "name": "bh-17", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4722, "hfopenllm_v2/BBH": 0.5776, "hfopenllm_v2/MATH Level 5": 0.1133, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4158, "hfopenllm_v2/MMLU-PRO": 0.3757 } }, { "id": "jaspionjader/bh-18", "name": "bh-18", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4725, "hfopenllm_v2/BBH": 0.5824, "hfopenllm_v2/MATH Level 5": 0.1186, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4185, "hfopenllm_v2/MMLU-PRO": 0.3757 } }, { "id": "jaspionjader/bh-19", "name": "bh-19", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4584, "hfopenllm_v2/BBH": 0.5766, "hfopenllm_v2/MATH Level 5": 0.1193, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4171, "hfopenllm_v2/MMLU-PRO": 0.3775 } }, { "id": "jaspionjader/bh-2", "name": "bh-2", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4579, "hfopenllm_v2/BBH": 0.5937, "hfopenllm_v2/MATH Level 5": 0.1027, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4186, "hfopenllm_v2/MMLU-PRO": 0.3695 } }, { "id": "jaspionjader/bh-20", "name": "bh-20", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4727, "hfopenllm_v2/BBH": 0.575, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.4105, "hfopenllm_v2/MMLU-PRO": 0.3768 } }, { "id": "jaspionjader/bh-21", "name": "bh-21", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.47, "hfopenllm_v2/BBH": 0.5738, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4158, "hfopenllm_v2/MMLU-PRO": 0.3776 } }, { "id": "jaspionjader/bh-22", "name": "bh-22", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.46, "hfopenllm_v2/BBH": 0.5793, "hfopenllm_v2/MATH Level 5": 0.1186, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4172, "hfopenllm_v2/MMLU-PRO": 0.3764 } }, { "id": "jaspionjader/bh-23", "name": "bh-23", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4658, "hfopenllm_v2/BBH": 0.57, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4197, "hfopenllm_v2/MMLU-PRO": 0.3796 } }, { "id": "jaspionjader/bh-24", "name": "bh-24", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4715, "hfopenllm_v2/BBH": 0.5717, "hfopenllm_v2/MATH Level 5": 0.1269, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4158, "hfopenllm_v2/MMLU-PRO": 0.3809 } }, { "id": "jaspionjader/bh-25", "name": "bh-25", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4752, "hfopenllm_v2/BBH": 0.5706, "hfopenllm_v2/MATH Level 5": 0.1133, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4118, "hfopenllm_v2/MMLU-PRO": 0.3782 } }, { "id": "jaspionjader/bh-26", "name": "bh-26", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4691, "hfopenllm_v2/BBH": 0.5735, "hfopenllm_v2/MATH Level 5": 0.1163, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4277, "hfopenllm_v2/MMLU-PRO": 0.3772 } }, { "id": "jaspionjader/bh-27", "name": "bh-27", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4819, "hfopenllm_v2/BBH": 0.5714, "hfopenllm_v2/MATH Level 5": 0.1276, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4091, "hfopenllm_v2/MMLU-PRO": 0.3799 } }, { "id": "jaspionjader/bh-28", "name": "bh-28", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4785, "hfopenllm_v2/BBH": 0.5703, "hfopenllm_v2/MATH Level 5": 0.1231, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4131, "hfopenllm_v2/MMLU-PRO": 0.3812 } }, { "id": "jaspionjader/bh-29", "name": "bh-29", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4688, "hfopenllm_v2/BBH": 0.567, "hfopenllm_v2/MATH Level 5": 0.1208, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4237, "hfopenllm_v2/MMLU-PRO": 0.3819 } }, { "id": "jaspionjader/bh-3", "name": "bh-3", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4664, "hfopenllm_v2/BBH": 0.5891, "hfopenllm_v2/MATH Level 5": 0.1148, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4173, "hfopenllm_v2/MMLU-PRO": 0.3702 } }, { "id": "jaspionjader/bh-30", "name": "bh-30", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4666, "hfopenllm_v2/BBH": 0.5706, "hfopenllm_v2/MATH Level 5": 0.1231, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4144, "hfopenllm_v2/MMLU-PRO": 0.3782 } }, { "id": "jaspionjader/bh-31", "name": "bh-31", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4727, "hfopenllm_v2/BBH": 0.5665, "hfopenllm_v2/MATH Level 5": 0.1284, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4104, "hfopenllm_v2/MMLU-PRO": 0.382 } }, { "id": "jaspionjader/bh-32", "name": "bh-32", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4636, "hfopenllm_v2/BBH": 0.5662, "hfopenllm_v2/MATH Level 5": 0.1246, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4157, "hfopenllm_v2/MMLU-PRO": 0.3812 } }, { "id": "jaspionjader/bh-33", "name": "bh-33", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4685, "hfopenllm_v2/BBH": 0.5653, "hfopenllm_v2/MATH Level 5": 0.1178, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4157, "hfopenllm_v2/MMLU-PRO": 0.3808 } }, { "id": "jaspionjader/bh-34", "name": "bh-34", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4624, "hfopenllm_v2/BBH": 0.5681, "hfopenllm_v2/MATH Level 5": 0.1208, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4185, "hfopenllm_v2/MMLU-PRO": 0.3804 } }, { "id": "jaspionjader/bh-35", "name": "bh-35", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4721, "hfopenllm_v2/BBH": 0.564, "hfopenllm_v2/MATH Level 5": 0.1246, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4183, "hfopenllm_v2/MMLU-PRO": 0.383 } }, { "id": "jaspionjader/bh-36", "name": "bh-36", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4666, "hfopenllm_v2/BBH": 0.5664, "hfopenllm_v2/MATH Level 5": 0.1239, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4196, "hfopenllm_v2/MMLU-PRO": 0.3831 } }, { "id": "jaspionjader/bh-37", "name": "bh-37", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.488, "hfopenllm_v2/BBH": 0.5625, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4156, "hfopenllm_v2/MMLU-PRO": 0.3828 } }, { "id": "jaspionjader/bh-38", "name": "bh-38", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4618, "hfopenllm_v2/BBH": 0.5658, "hfopenllm_v2/MATH Level 5": 0.1239, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4117, "hfopenllm_v2/MMLU-PRO": 0.3811 } }, { "id": "jaspionjader/bh-39", "name": "bh-39", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4576, "hfopenllm_v2/BBH": 0.5633, "hfopenllm_v2/MATH Level 5": 0.1254, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4262, "hfopenllm_v2/MMLU-PRO": 0.3831 } }, { "id": "jaspionjader/bh-4", "name": "bh-4", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4673, "hfopenllm_v2/BBH": 0.5892, "hfopenllm_v2/MATH Level 5": 0.1095, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4173, "hfopenllm_v2/MMLU-PRO": 0.3705 } }, { "id": "jaspionjader/bh-40", "name": "bh-40", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4536, "hfopenllm_v2/BBH": 0.5634, "hfopenllm_v2/MATH Level 5": 0.1246, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4236, "hfopenllm_v2/MMLU-PRO": 0.3835 } }, { "id": "jaspionjader/bh-41", "name": "bh-41", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.474, "hfopenllm_v2/BBH": 0.5614, "hfopenllm_v2/MATH Level 5": 0.1254, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4183, "hfopenllm_v2/MMLU-PRO": 0.3825 } }, { "id": "jaspionjader/bh-42", "name": "bh-42", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.466, "hfopenllm_v2/BBH": 0.5646, "hfopenllm_v2/MATH Level 5": 0.1269, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.421, "hfopenllm_v2/MMLU-PRO": 0.3812 } }, { "id": "jaspionjader/bh-43", "name": "bh-43", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.46, "hfopenllm_v2/BBH": 0.5635, "hfopenllm_v2/MATH Level 5": 0.1239, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4156, "hfopenllm_v2/MMLU-PRO": 0.382 } }, { "id": "jaspionjader/bh-44", "name": "bh-44", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4706, "hfopenllm_v2/BBH": 0.5643, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4249, "hfopenllm_v2/MMLU-PRO": 0.3834 } }, { "id": "jaspionjader/bh-46", "name": "bh-46", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4727, "hfopenllm_v2/BBH": 0.5632, "hfopenllm_v2/MATH Level 5": 0.1276, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4262, "hfopenllm_v2/MMLU-PRO": 0.3822 } }, { "id": "jaspionjader/bh-47", "name": "bh-47", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4652, "hfopenllm_v2/BBH": 0.5546, "hfopenllm_v2/MATH Level 5": 0.1276, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4156, "hfopenllm_v2/MMLU-PRO": 0.3855 } }, { "id": "jaspionjader/bh-48", "name": "bh-48", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4688, "hfopenllm_v2/BBH": 0.5541, "hfopenllm_v2/MATH Level 5": 0.1254, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4209, "hfopenllm_v2/MMLU-PRO": 0.386 } }, { "id": "jaspionjader/bh-49", "name": "bh-49", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4725, "hfopenllm_v2/BBH": 0.554, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4129, "hfopenllm_v2/MMLU-PRO": 0.3808 } }, { "id": "jaspionjader/bh-5", "name": "bh-5", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4652, "hfopenllm_v2/BBH": 0.5882, "hfopenllm_v2/MATH Level 5": 0.1057, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4186, "hfopenllm_v2/MMLU-PRO": 0.3702 } }, { "id": "jaspionjader/bh-50", "name": "bh-50", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4725, "hfopenllm_v2/BBH": 0.5553, "hfopenllm_v2/MATH Level 5": 0.1208, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4169, "hfopenllm_v2/MMLU-PRO": 0.3842 } }, { "id": "jaspionjader/bh-51", "name": "bh-51", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.463, "hfopenllm_v2/BBH": 0.5557, "hfopenllm_v2/MATH Level 5": 0.1239, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4168, "hfopenllm_v2/MMLU-PRO": 0.3831 } }, { "id": "jaspionjader/bh-52", "name": "bh-52", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4536, "hfopenllm_v2/BBH": 0.5444, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4169, "hfopenllm_v2/MMLU-PRO": 0.3843 } }, { "id": "jaspionjader/bh-53", "name": "bh-53", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.478, "hfopenllm_v2/BBH": 0.5494, "hfopenllm_v2/MATH Level 5": 0.1269, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4196, "hfopenllm_v2/MMLU-PRO": 0.3858 } }, { "id": "jaspionjader/bh-54", "name": "bh-54", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4841, "hfopenllm_v2/BBH": 0.5548, "hfopenllm_v2/MATH Level 5": 0.1292, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4155, "hfopenllm_v2/MMLU-PRO": 0.3825 } }, { "id": "jaspionjader/bh-55", "name": "bh-55", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4709, "hfopenllm_v2/BBH": 0.555, "hfopenllm_v2/MATH Level 5": 0.1284, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4222, "hfopenllm_v2/MMLU-PRO": 0.3846 } }, { "id": "jaspionjader/bh-56", "name": "bh-56", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.46, "hfopenllm_v2/BBH": 0.5447, "hfopenllm_v2/MATH Level 5": 0.1231, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4116, "hfopenllm_v2/MMLU-PRO": 0.3844 } }, { "id": "jaspionjader/bh-57", "name": "bh-57", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4405, "hfopenllm_v2/BBH": 0.5425, "hfopenllm_v2/MATH Level 5": 0.1261, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.421, "hfopenllm_v2/MMLU-PRO": 0.3896 } }, { "id": "jaspionjader/bh-58", "name": "bh-58", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.463, "hfopenllm_v2/BBH": 0.5446, "hfopenllm_v2/MATH Level 5": 0.1322, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4183, "hfopenllm_v2/MMLU-PRO": 0.3896 } }, { "id": "jaspionjader/bh-59", "name": "bh-59", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4341, "hfopenllm_v2/BBH": 0.5512, "hfopenllm_v2/MATH Level 5": 0.1541, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.417, "hfopenllm_v2/MMLU-PRO": 0.3838 } }, { "id": "jaspionjader/bh-6", "name": "bh-6", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4621, "hfopenllm_v2/BBH": 0.5891, "hfopenllm_v2/MATH Level 5": 0.1088, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4199, "hfopenllm_v2/MMLU-PRO": 0.3698 } }, { "id": "jaspionjader/bh-60", "name": "bh-60", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4207, "hfopenllm_v2/BBH": 0.5369, "hfopenllm_v2/MATH Level 5": 0.1579, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.4289, "hfopenllm_v2/MMLU-PRO": 0.3689 } }, { "id": "jaspionjader/bh-61", "name": "bh-61", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4247, "hfopenllm_v2/BBH": 0.5271, "hfopenllm_v2/MATH Level 5": 0.1707, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4356, "hfopenllm_v2/MMLU-PRO": 0.3679 } }, { "id": "jaspionjader/bh-62", "name": "bh-62", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.415, "hfopenllm_v2/BBH": 0.5379, "hfopenllm_v2/MATH Level 5": 0.1624, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4289, "hfopenllm_v2/MMLU-PRO": 0.3719 } }, { "id": "jaspionjader/bh-63", "name": "bh-63", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4308, "hfopenllm_v2/BBH": 0.4917, "hfopenllm_v2/MATH Level 5": 0.111, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4313, "hfopenllm_v2/MMLU-PRO": 0.3248 } }, { "id": "jaspionjader/bh-64", "name": "bh-64", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.414, "hfopenllm_v2/BBH": 0.536, "hfopenllm_v2/MATH Level 5": 0.1548, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4355, "hfopenllm_v2/MMLU-PRO": 0.3693 } }, { "id": "jaspionjader/bh-7", "name": "bh-7", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4624, "hfopenllm_v2/BBH": 0.5861, "hfopenllm_v2/MATH Level 5": 0.114, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4119, "hfopenllm_v2/MMLU-PRO": 0.3715 } }, { "id": "jaspionjader/bh-8", "name": "bh-8", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4597, "hfopenllm_v2/BBH": 0.59, "hfopenllm_v2/MATH Level 5": 0.1178, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4265, "hfopenllm_v2/MMLU-PRO": 0.372 } }, { "id": "jaspionjader/bh-9", "name": "bh-9", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4509, "hfopenllm_v2/BBH": 0.585, "hfopenllm_v2/MATH Level 5": 0.1156, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4146, "hfopenllm_v2/MMLU-PRO": 0.3703 } }, { "id": "jaspionjader/dp-6-8b", "name": "dp-6-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4806, "hfopenllm_v2/BBH": 0.53, "hfopenllm_v2/MATH Level 5": 0.1329, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4434, "hfopenllm_v2/MMLU-PRO": 0.3897 } }, { "id": "jaspionjader/dp-7-8b", "name": "dp-7-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4498, "hfopenllm_v2/BBH": 0.5291, "hfopenllm_v2/MATH Level 5": 0.1261, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4407, "hfopenllm_v2/MMLU-PRO": 0.3934 } }, { "id": "jaspionjader/ek-6", "name": "ek-6", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4642, "hfopenllm_v2/BBH": 0.5219, "hfopenllm_v2/MATH Level 5": 0.1322, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4144, "hfopenllm_v2/MMLU-PRO": 0.3861 } }, { "id": "jaspionjader/ek-7", "name": "ek-7", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4767, "hfopenllm_v2/BBH": 0.5194, "hfopenllm_v2/MATH Level 5": 0.1329, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4171, "hfopenllm_v2/MMLU-PRO": 0.3887 } }, { "id": "jaspionjader/f-1-8b", "name": "f-1-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4983, "hfopenllm_v2/BBH": 0.5141, "hfopenllm_v2/MATH Level 5": 0.1284, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4527, "hfopenllm_v2/MMLU-PRO": 0.3907 } }, { "id": "jaspionjader/f-2-8b", "name": "f-2-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4824, "hfopenllm_v2/BBH": 0.5294, "hfopenllm_v2/MATH Level 5": 0.1171, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4501, "hfopenllm_v2/MMLU-PRO": 0.3962 } }, { "id": "jaspionjader/f-3-8b", "name": "f-3-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4803, "hfopenllm_v2/BBH": 0.5275, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4421, "hfopenllm_v2/MMLU-PRO": 0.3954 } }, { "id": "jaspionjader/f-4-8b", "name": "f-4-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4797, "hfopenllm_v2/BBH": 0.5289, "hfopenllm_v2/MATH Level 5": 0.1148, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4514, "hfopenllm_v2/MMLU-PRO": 0.3956 } }, { "id": "jaspionjader/f-5-8b", "name": "f-5-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5044, "hfopenllm_v2/BBH": 0.5313, "hfopenllm_v2/MATH Level 5": 0.1239, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4461, "hfopenllm_v2/MMLU-PRO": 0.3949 } }, { "id": "jaspionjader/f-6-8b", "name": "f-6-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4846, "hfopenllm_v2/BBH": 0.5241, "hfopenllm_v2/MATH Level 5": 0.1193, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4474, "hfopenllm_v2/MMLU-PRO": 0.3939 } }, { "id": "jaspionjader/f-7-8b", "name": "f-7-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4462, "hfopenllm_v2/BBH": 0.5277, "hfopenllm_v2/MATH Level 5": 0.1239, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4315, "hfopenllm_v2/MMLU-PRO": 0.3936 } }, { "id": "jaspionjader/f-8-8b", "name": "f-8-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4739, "hfopenllm_v2/BBH": 0.5259, "hfopenllm_v2/MATH Level 5": 0.1224, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4354, "hfopenllm_v2/MMLU-PRO": 0.394 } }, { "id": "jaspionjader/f-9-8b", "name": "f-9-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4602, "hfopenllm_v2/BBH": 0.5292, "hfopenllm_v2/MATH Level 5": 0.1299, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4461, "hfopenllm_v2/MMLU-PRO": 0.3944 } }, { "id": "jaspionjader/fct-14-8b", "name": "fct-14-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4129, "hfopenllm_v2/BBH": 0.5206, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4186, "hfopenllm_v2/MMLU-PRO": 0.3875 } }, { "id": "jaspionjader/fct-9-8b", "name": "fct-9-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4354, "hfopenllm_v2/BBH": 0.5205, "hfopenllm_v2/MATH Level 5": 0.1193, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4291, "hfopenllm_v2/MMLU-PRO": 0.3932 } }, { "id": "jaspionjader/fr-1-8b", "name": "fr-1-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4211, "hfopenllm_v2/BBH": 0.5142, "hfopenllm_v2/MATH Level 5": 0.1118, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4277, "hfopenllm_v2/MMLU-PRO": 0.361 } }, { "id": "jaspionjader/fr-10-8b", "name": "fr-10-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4402, "hfopenllm_v2/BBH": 0.5207, "hfopenllm_v2/MATH Level 5": 0.1224, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4119, "hfopenllm_v2/MMLU-PRO": 0.3863 } }, { "id": "jaspionjader/fr-3-8b", "name": "fr-3-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4326, "hfopenllm_v2/BBH": 0.5255, "hfopenllm_v2/MATH Level 5": 0.1133, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4198, "hfopenllm_v2/MMLU-PRO": 0.3863 } }, { "id": "jaspionjader/gamma-Kosmos-EVAA-8B", "name": "gamma-Kosmos-EVAA-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.425, "hfopenllm_v2/BBH": 0.5253, "hfopenllm_v2/MATH Level 5": 0.0899, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4412, "hfopenllm_v2/MMLU-PRO": 0.3776 } }, { "id": "jaspionjader/gamma-Kosmos-EVAA-v2-8B", "name": "gamma-Kosmos-EVAA-v2-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4233, "hfopenllm_v2/BBH": 0.5262, "hfopenllm_v2/MATH Level 5": 0.1057, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4344, "hfopenllm_v2/MMLU-PRO": 0.3756 } }, { "id": "jaspionjader/gamma-Kosmos-EVAA-v3-8B", "name": "gamma-Kosmos-EVAA-v3-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4333, "hfopenllm_v2/BBH": 0.5278, "hfopenllm_v2/MATH Level 5": 0.111, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4263, "hfopenllm_v2/MMLU-PRO": 0.3898 } }, { "id": "jaspionjader/knf-2-8b", "name": "knf-2-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.425, "hfopenllm_v2/BBH": 0.5207, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4185, "hfopenllm_v2/MMLU-PRO": 0.3875 } }, { "id": "jaspionjader/knfp-2-8b", "name": "knfp-2-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5327, "hfopenllm_v2/BBH": 0.5305, "hfopenllm_v2/MATH Level 5": 0.1427, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4185, "hfopenllm_v2/MMLU-PRO": 0.3726 } }, { "id": "jaspionjader/knfp-3-8b", "name": "knfp-3-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4946, "hfopenllm_v2/BBH": 0.52, "hfopenllm_v2/MATH Level 5": 0.1224, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4171, "hfopenllm_v2/MMLU-PRO": 0.3881 } }, { "id": "jaspionjader/Kosmos-Aurora_faustus-8B", "name": "Kosmos-Aurora_faustus-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4432, "hfopenllm_v2/BBH": 0.526, "hfopenllm_v2/MATH Level 5": 0.1125, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4117, "hfopenllm_v2/MMLU-PRO": 0.3813 } }, { "id": "jaspionjader/Kosmos-Elusive-8b", "name": "Kosmos-Elusive-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4169, "hfopenllm_v2/BBH": 0.5339, "hfopenllm_v2/MATH Level 5": 0.1261, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4078, "hfopenllm_v2/MMLU-PRO": 0.376 } }, { "id": "jaspionjader/Kosmos-Elusive-VENN-8B", "name": "Kosmos-Elusive-VENN-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4233, "hfopenllm_v2/BBH": 0.5356, "hfopenllm_v2/MATH Level 5": 0.1246, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4157, "hfopenllm_v2/MMLU-PRO": 0.3797 } }, { "id": "jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B", "name": "Kosmos-Elusive-VENN-Asymmetric-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4542, "hfopenllm_v2/BBH": 0.5313, "hfopenllm_v2/MATH Level 5": 0.1344, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4251, "hfopenllm_v2/MMLU-PRO": 0.3842 } }, { "id": "jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B", "name": "Kosmos-Elusive-VENN-Aurora_faustus-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4335, "hfopenllm_v2/BBH": 0.5304, "hfopenllm_v2/MATH Level 5": 0.1125, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.417, "hfopenllm_v2/MMLU-PRO": 0.3795 } }, { "id": "jaspionjader/Kosmos-EVAA-8B", "name": "Kosmos-EVAA-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4405, "hfopenllm_v2/BBH": 0.5312, "hfopenllm_v2/MATH Level 5": 0.1178, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4237, "hfopenllm_v2/MMLU-PRO": 0.3818 } }, { "id": "jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B", "name": "Kosmos-EVAA-Franken-Immersive-v39-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4378, "hfopenllm_v2/BBH": 0.519, "hfopenllm_v2/MATH Level 5": 0.1292, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.4236, "hfopenllm_v2/MMLU-PRO": 0.39 } }, { "id": "jaspionjader/Kosmos-EVAA-Franken-v38-8B", "name": "Kosmos-EVAA-Franken-v38-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4356, "hfopenllm_v2/BBH": 0.523, "hfopenllm_v2/MATH Level 5": 0.1292, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4212, "hfopenllm_v2/MMLU-PRO": 0.389 } }, { "id": "jaspionjader/Kosmos-EVAA-Fusion-8B", "name": "Kosmos-EVAA-Fusion-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4418, "hfopenllm_v2/BBH": 0.5406, "hfopenllm_v2/MATH Level 5": 0.1352, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4277, "hfopenllm_v2/MMLU-PRO": 0.386 } }, { "id": "jaspionjader/Kosmos-EVAA-gamma-8B", "name": "Kosmos-EVAA-gamma-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4572, "hfopenllm_v2/BBH": 0.5322, "hfopenllm_v2/MATH Level 5": 0.105, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4306, "hfopenllm_v2/MMLU-PRO": 0.3901 } }, { "id": "jaspionjader/Kosmos-EVAA-gamma-alt-8B", "name": "Kosmos-EVAA-gamma-alt-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4542, "hfopenllm_v2/BBH": 0.5298, "hfopenllm_v2/MATH Level 5": 0.1095, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4292, "hfopenllm_v2/MMLU-PRO": 0.3896 } }, { "id": "jaspionjader/Kosmos-EVAA-gamma-light-8B", "name": "Kosmos-EVAA-gamma-light-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4581, "hfopenllm_v2/BBH": 0.5376, "hfopenllm_v2/MATH Level 5": 0.1103, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4291, "hfopenllm_v2/MMLU-PRO": 0.3943 } }, { "id": "jaspionjader/Kosmos-EVAA-gamma-light-alt-8B", "name": "Kosmos-EVAA-gamma-light-alt-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4454, "hfopenllm_v2/BBH": 0.5327, "hfopenllm_v2/MATH Level 5": 0.1133, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4305, "hfopenllm_v2/MMLU-PRO": 0.3923 } }, { "id": "jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B", "name": "Kosmos-EVAA-gamma-ultra-light-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4563, "hfopenllm_v2/BBH": 0.5316, "hfopenllm_v2/MATH Level 5": 0.1178, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4197, "hfopenllm_v2/MMLU-PRO": 0.3915 } }, { "id": "jaspionjader/Kosmos-EVAA-gamma-v13-8B", "name": "Kosmos-EVAA-gamma-v13-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4429, "hfopenllm_v2/BBH": 0.5359, "hfopenllm_v2/MATH Level 5": 0.1118, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4278, "hfopenllm_v2/MMLU-PRO": 0.393 } }, { "id": "jaspionjader/Kosmos-EVAA-gamma-v14-8B", "name": "Kosmos-EVAA-gamma-v14-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.438, "hfopenllm_v2/BBH": 0.5363, "hfopenllm_v2/MATH Level 5": 0.1103, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4277, "hfopenllm_v2/MMLU-PRO": 0.3931 } }, { "id": "jaspionjader/Kosmos-EVAA-gamma-v15-8B", "name": "Kosmos-EVAA-gamma-v15-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4654, "hfopenllm_v2/BBH": 0.5343, "hfopenllm_v2/MATH Level 5": 0.111, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4277, "hfopenllm_v2/MMLU-PRO": 0.3941 } }, { "id": "jaspionjader/Kosmos-EVAA-gamma-v16-8B", "name": "Kosmos-EVAA-gamma-v16-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4557, "hfopenllm_v2/BBH": 0.5344, "hfopenllm_v2/MATH Level 5": 0.1171, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.4264, "hfopenllm_v2/MMLU-PRO": 0.3917 } }, { "id": "jaspionjader/Kosmos-EVAA-gamma-v17-8B", "name": "Kosmos-EVAA-gamma-v17-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4462, "hfopenllm_v2/BBH": 0.5347, "hfopenllm_v2/MATH Level 5": 0.111, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4291, "hfopenllm_v2/MMLU-PRO": 0.3923 } }, { "id": "jaspionjader/Kosmos-EVAA-gamma-v18-8B", "name": "Kosmos-EVAA-gamma-v18-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4341, "hfopenllm_v2/BBH": 0.5339, "hfopenllm_v2/MATH Level 5": 0.111, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4317, "hfopenllm_v2/MMLU-PRO": 0.3905 } }, { "id": "jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B", "name": "Kosmos-EVAA-immersive-sof-v44-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4408, "hfopenllm_v2/BBH": 0.5215, "hfopenllm_v2/MATH Level 5": 0.1186, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4144, "hfopenllm_v2/MMLU-PRO": 0.3888 } }, { "id": "jaspionjader/Kosmos-EVAA-PRP-8B", "name": "Kosmos-EVAA-PRP-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3405, "hfopenllm_v2/BBH": 0.5196, "hfopenllm_v2/MATH Level 5": 0.0884, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4301, "hfopenllm_v2/MMLU-PRO": 0.3647 } }, { "id": "jaspionjader/Kosmos-EVAA-PRP-light-8B", "name": "Kosmos-EVAA-PRP-light-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3824, "hfopenllm_v2/BBH": 0.5271, "hfopenllm_v2/MATH Level 5": 0.1103, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4249, "hfopenllm_v2/MMLU-PRO": 0.3782 } }, { "id": "jaspionjader/Kosmos-EVAA-PRP-v23-8B", "name": "Kosmos-EVAA-PRP-v23-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4041, "hfopenllm_v2/BBH": 0.529, "hfopenllm_v2/MATH Level 5": 0.1156, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4368, "hfopenllm_v2/MMLU-PRO": 0.3706 } }, { "id": "jaspionjader/Kosmos-EVAA-PRP-v24-8B", "name": "Kosmos-EVAA-PRP-v24-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4259, "hfopenllm_v2/BBH": 0.5276, "hfopenllm_v2/MATH Level 5": 0.1103, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.429, "hfopenllm_v2/MMLU-PRO": 0.3779 } }, { "id": "jaspionjader/Kosmos-EVAA-PRP-v25-8B", "name": "Kosmos-EVAA-PRP-v25-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4421, "hfopenllm_v2/BBH": 0.5291, "hfopenllm_v2/MATH Level 5": 0.1186, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.4303, "hfopenllm_v2/MMLU-PRO": 0.3716 } }, { "id": "jaspionjader/Kosmos-EVAA-PRP-v26-8B", "name": "Kosmos-EVAA-PRP-v26-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4414, "hfopenllm_v2/BBH": 0.5271, "hfopenllm_v2/MATH Level 5": 0.1133, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4264, "hfopenllm_v2/MMLU-PRO": 0.3793 } }, { "id": "jaspionjader/Kosmos-EVAA-PRP-v27-8B", "name": "Kosmos-EVAA-PRP-v27-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4378, "hfopenllm_v2/BBH": 0.529, "hfopenllm_v2/MATH Level 5": 0.1193, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4343, "hfopenllm_v2/MMLU-PRO": 0.3755 } }, { "id": "jaspionjader/Kosmos-EVAA-PRP-v28-8B", "name": "Kosmos-EVAA-PRP-v28-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4366, "hfopenllm_v2/BBH": 0.5295, "hfopenllm_v2/MATH Level 5": 0.1171, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.433, "hfopenllm_v2/MMLU-PRO": 0.375 } }, { "id": "jaspionjader/Kosmos-EVAA-PRP-v29-8B", "name": "Kosmos-EVAA-PRP-v29-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4487, "hfopenllm_v2/BBH": 0.5275, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4237, "hfopenllm_v2/MMLU-PRO": 0.3765 } }, { "id": "jaspionjader/Kosmos-EVAA-PRP-v30-8B", "name": "Kosmos-EVAA-PRP-v30-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4295, "hfopenllm_v2/BBH": 0.5328, "hfopenllm_v2/MATH Level 5": 0.1178, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4263, "hfopenllm_v2/MMLU-PRO": 0.3938 } }, { "id": "jaspionjader/Kosmos-EVAA-PRP-v31-8B", "name": "Kosmos-EVAA-PRP-v31-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4399, "hfopenllm_v2/BBH": 0.5315, "hfopenllm_v2/MATH Level 5": 0.1133, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4251, "hfopenllm_v2/MMLU-PRO": 0.3935 } }, { "id": "jaspionjader/Kosmos-EVAA-PRP-v32-8B", "name": "Kosmos-EVAA-PRP-v32-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4487, "hfopenllm_v2/BBH": 0.5293, "hfopenllm_v2/MATH Level 5": 0.1148, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4211, "hfopenllm_v2/MMLU-PRO": 0.3777 } }, { "id": "jaspionjader/Kosmos-EVAA-PRP-v33-8B", "name": "Kosmos-EVAA-PRP-v33-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4302, "hfopenllm_v2/BBH": 0.5321, "hfopenllm_v2/MATH Level 5": 0.1178, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4184, "hfopenllm_v2/MMLU-PRO": 0.3909 } }, { "id": "jaspionjader/Kosmos-EVAA-PRP-v34-8B", "name": "Kosmos-EVAA-PRP-v34-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4563, "hfopenllm_v2/BBH": 0.5333, "hfopenllm_v2/MATH Level 5": 0.1125, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4237, "hfopenllm_v2/MMLU-PRO": 0.3927 } }, { "id": "jaspionjader/Kosmos-EVAA-TSN-8B", "name": "Kosmos-EVAA-TSN-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4721, "hfopenllm_v2/BBH": 0.5177, "hfopenllm_v2/MATH Level 5": 0.1344, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4329, "hfopenllm_v2/MMLU-PRO": 0.3816 } }, { "id": "jaspionjader/Kosmos-EVAA-TSN-light-8B", "name": "Kosmos-EVAA-TSN-light-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4685, "hfopenllm_v2/BBH": 0.5235, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4289, "hfopenllm_v2/MMLU-PRO": 0.3806 } }, { "id": "jaspionjader/Kosmos-EVAA-TSN-v19-8B", "name": "Kosmos-EVAA-TSN-v19-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4564, "hfopenllm_v2/BBH": 0.5316, "hfopenllm_v2/MATH Level 5": 0.1156, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4277, "hfopenllm_v2/MMLU-PRO": 0.379 } }, { "id": "jaspionjader/Kosmos-EVAA-TSN-v20-8B", "name": "Kosmos-EVAA-TSN-v20-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4423, "hfopenllm_v2/BBH": 0.525, "hfopenllm_v2/MATH Level 5": 0.1246, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.421, "hfopenllm_v2/MMLU-PRO": 0.3936 } }, { "id": "jaspionjader/Kosmos-EVAA-TSN-v21-8B", "name": "Kosmos-EVAA-TSN-v21-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.467, "hfopenllm_v2/BBH": 0.5248, "hfopenllm_v2/MATH Level 5": 0.1193, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4343, "hfopenllm_v2/MMLU-PRO": 0.3816 } }, { "id": "jaspionjader/Kosmos-EVAA-TSN-v22-8B", "name": "Kosmos-EVAA-TSN-v22-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4673, "hfopenllm_v2/BBH": 0.5246, "hfopenllm_v2/MATH Level 5": 0.1133, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4303, "hfopenllm_v2/MMLU-PRO": 0.3812 } }, { "id": "jaspionjader/Kosmos-EVAA-v10-8B", "name": "Kosmos-EVAA-v10-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4262, "hfopenllm_v2/BBH": 0.5376, "hfopenllm_v2/MATH Level 5": 0.1246, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4224, "hfopenllm_v2/MMLU-PRO": 0.3831 } }, { "id": "jaspionjader/Kosmos-EVAA-v11-8B", "name": "Kosmos-EVAA-v11-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4426, "hfopenllm_v2/BBH": 0.5359, "hfopenllm_v2/MATH Level 5": 0.1322, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.4184, "hfopenllm_v2/MMLU-PRO": 0.3836 } }, { "id": "jaspionjader/Kosmos-EVAA-v12-8B", "name": "Kosmos-EVAA-v12-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4378, "hfopenllm_v2/BBH": 0.5349, "hfopenllm_v2/MATH Level 5": 0.1367, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4211, "hfopenllm_v2/MMLU-PRO": 0.3836 } }, { "id": "jaspionjader/Kosmos-EVAA-v2-8B", "name": "Kosmos-EVAA-v2-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4396, "hfopenllm_v2/BBH": 0.5341, "hfopenllm_v2/MATH Level 5": 0.1322, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4211, "hfopenllm_v2/MMLU-PRO": 0.3826 } }, { "id": "jaspionjader/Kosmos-EVAA-v3-8B", "name": "Kosmos-EVAA-v3-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4411, "hfopenllm_v2/BBH": 0.5331, "hfopenllm_v2/MATH Level 5": 0.1329, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4224, "hfopenllm_v2/MMLU-PRO": 0.3821 } }, { "id": "jaspionjader/Kosmos-EVAA-v4-8B", "name": "Kosmos-EVAA-v4-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4289, "hfopenllm_v2/BBH": 0.5337, "hfopenllm_v2/MATH Level 5": 0.1254, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4197, "hfopenllm_v2/MMLU-PRO": 0.3817 } }, { "id": "jaspionjader/Kosmos-EVAA-v5-8B", "name": "Kosmos-EVAA-v5-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.446, "hfopenllm_v2/BBH": 0.5345, "hfopenllm_v2/MATH Level 5": 0.1261, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4224, "hfopenllm_v2/MMLU-PRO": 0.3821 } }, { "id": "jaspionjader/Kosmos-EVAA-v6-8B", "name": "Kosmos-EVAA-v6-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4396, "hfopenllm_v2/BBH": 0.538, "hfopenllm_v2/MATH Level 5": 0.1292, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4184, "hfopenllm_v2/MMLU-PRO": 0.3821 } }, { "id": "jaspionjader/Kosmos-EVAA-v7-8B", "name": "Kosmos-EVAA-v7-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4277, "hfopenllm_v2/BBH": 0.5335, "hfopenllm_v2/MATH Level 5": 0.1337, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4171, "hfopenllm_v2/MMLU-PRO": 0.3836 } }, { "id": "jaspionjader/Kosmos-EVAA-v8-8B", "name": "Kosmos-EVAA-v8-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4383, "hfopenllm_v2/BBH": 0.5359, "hfopenllm_v2/MATH Level 5": 0.1307, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.421, "hfopenllm_v2/MMLU-PRO": 0.3827 } }, { "id": "jaspionjader/Kosmos-EVAA-v9-8B", "name": "Kosmos-EVAA-v9-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4369, "hfopenllm_v2/BBH": 0.5361, "hfopenllm_v2/MATH Level 5": 0.1276, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4184, "hfopenllm_v2/MMLU-PRO": 0.382 } }, { "id": "jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B", "name": "Kosmos-EVAA-v9-TitanFusion-Mix-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4284, "hfopenllm_v2/BBH": 0.554, "hfopenllm_v2/MATH Level 5": 0.1148, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.4354, "hfopenllm_v2/MMLU-PRO": 0.3836 } }, { "id": "jaspionjader/Kosmos-VENN-8B", "name": "Kosmos-VENN-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4332, "hfopenllm_v2/BBH": 0.5318, "hfopenllm_v2/MATH Level 5": 0.1412, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4211, "hfopenllm_v2/MMLU-PRO": 0.3801 } }, { "id": "jaspionjader/kstc-1-8b", "name": "kstc-1-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4643, "hfopenllm_v2/BBH": 0.5209, "hfopenllm_v2/MATH Level 5": 0.1171, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4158, "hfopenllm_v2/MMLU-PRO": 0.3892 } }, { "id": "jaspionjader/kstc-11-8b", "name": "kstc-11-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4757, "hfopenllm_v2/BBH": 0.5189, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4118, "hfopenllm_v2/MMLU-PRO": 0.3879 } }, { "id": "jaspionjader/kstc-4-8b", "name": "kstc-4-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.477, "hfopenllm_v2/BBH": 0.5216, "hfopenllm_v2/MATH Level 5": 0.1239, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4118, "hfopenllm_v2/MMLU-PRO": 0.3869 } }, { "id": "jaspionjader/kstc-5-8b", "name": "kstc-5-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4721, "hfopenllm_v2/BBH": 0.5211, "hfopenllm_v2/MATH Level 5": 0.1299, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.4224, "hfopenllm_v2/MMLU-PRO": 0.3892 } }, { "id": "jaspionjader/kstc-6-8b", "name": "kstc-6-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4944, "hfopenllm_v2/BBH": 0.5231, "hfopenllm_v2/MATH Level 5": 0.1246, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4105, "hfopenllm_v2/MMLU-PRO": 0.3857 } }, { "id": "jaspionjader/kstc-8-8b", "name": "kstc-8-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.491, "hfopenllm_v2/BBH": 0.5239, "hfopenllm_v2/MATH Level 5": 0.1307, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4211, "hfopenllm_v2/MMLU-PRO": 0.3889 } }, { "id": "jaspionjader/kstc-9-8b", "name": "kstc-9-8b", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4861, "hfopenllm_v2/BBH": 0.5238, "hfopenllm_v2/MATH Level 5": 0.136, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4118, "hfopenllm_v2/MMLU-PRO": 0.3872 } }, { "id": "jaspionjader/PRP-Kosmos-EVAA-8B", "name": "PRP-Kosmos-EVAA-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3633, "hfopenllm_v2/BBH": 0.5237, "hfopenllm_v2/MATH Level 5": 0.0959, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.425, "hfopenllm_v2/MMLU-PRO": 0.3766 } }, { "id": "jaspionjader/PRP-Kosmos-EVAA-light-8B", "name": "PRP-Kosmos-EVAA-light-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4321, "hfopenllm_v2/BBH": 0.5275, "hfopenllm_v2/MATH Level 5": 0.1103, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.4235, "hfopenllm_v2/MMLU-PRO": 0.3631 } }, { "id": "jaspionjader/slu-10", "name": "slu-10", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.436, "hfopenllm_v2/BBH": 0.5096, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.392, "hfopenllm_v2/MMLU-PRO": 0.3664 } }, { "id": "jaspionjader/slu-11", "name": "slu-11", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3725, "hfopenllm_v2/BBH": 0.489, "hfopenllm_v2/MATH Level 5": 0.0559, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.3919, "hfopenllm_v2/MMLU-PRO": 0.3382 } }, { "id": "jaspionjader/slu-13", "name": "slu-13", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4378, "hfopenllm_v2/BBH": 0.5097, "hfopenllm_v2/MATH Level 5": 0.0808, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.3814, "hfopenllm_v2/MMLU-PRO": 0.358 } }, { "id": "jaspionjader/slu-14", "name": "slu-14", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4107, "hfopenllm_v2/BBH": 0.5089, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.396, "hfopenllm_v2/MMLU-PRO": 0.3627 } }, { "id": "jaspionjader/slu-17", "name": "slu-17", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4217, "hfopenllm_v2/BBH": 0.5071, "hfopenllm_v2/MATH Level 5": 0.0853, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.3761, "hfopenllm_v2/MMLU-PRO": 0.3619 } }, { "id": "jaspionjader/slu-2", "name": "slu-2", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4016, "hfopenllm_v2/BBH": 0.5008, "hfopenllm_v2/MATH Level 5": 0.0634, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.3959, "hfopenllm_v2/MMLU-PRO": 0.3506 } }, { "id": "jaspionjader/slu-20", "name": "slu-20", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4393, "hfopenllm_v2/BBH": 0.5061, "hfopenllm_v2/MATH Level 5": 0.0869, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.3933, "hfopenllm_v2/MMLU-PRO": 0.3665 } }, { "id": "jaspionjader/slu-22", "name": "slu-22", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4321, "hfopenllm_v2/BBH": 0.5082, "hfopenllm_v2/MATH Level 5": 0.0793, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.3893, "hfopenllm_v2/MMLU-PRO": 0.365 } }, { "id": "jaspionjader/slu-23", "name": "slu-23", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4478, "hfopenllm_v2/BBH": 0.5132, "hfopenllm_v2/MATH Level 5": 0.0944, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4092, "hfopenllm_v2/MMLU-PRO": 0.3725 } }, { "id": "jaspionjader/slu-25", "name": "slu-25", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.45, "hfopenllm_v2/BBH": 0.5095, "hfopenllm_v2/MATH Level 5": 0.0838, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.3946, "hfopenllm_v2/MMLU-PRO": 0.3684 } }, { "id": "jaspionjader/slu-29", "name": "slu-29", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4431, "hfopenllm_v2/BBH": 0.5096, "hfopenllm_v2/MATH Level 5": 0.0869, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.3933, "hfopenllm_v2/MMLU-PRO": 0.3669 } }, { "id": "jaspionjader/slu-32", "name": "slu-32", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4516, "hfopenllm_v2/BBH": 0.5167, "hfopenllm_v2/MATH Level 5": 0.1073, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4039, "hfopenllm_v2/MMLU-PRO": 0.3766 } }, { "id": "jaspionjader/slu-33", "name": "slu-33", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4457, "hfopenllm_v2/BBH": 0.5081, "hfopenllm_v2/MATH Level 5": 0.0997, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.3867, "hfopenllm_v2/MMLU-PRO": 0.3679 } }, { "id": "jaspionjader/slu-34", "name": "slu-34", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4351, "hfopenllm_v2/BBH": 0.5077, "hfopenllm_v2/MATH Level 5": 0.0997, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.388, "hfopenllm_v2/MMLU-PRO": 0.372 } }, { "id": "jaspionjader/slu-35", "name": "slu-35", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4242, "hfopenllm_v2/BBH": 0.5103, "hfopenllm_v2/MATH Level 5": 0.1012, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.3946, "hfopenllm_v2/MMLU-PRO": 0.3676 } }, { "id": "jaspionjader/slu-36", "name": "slu-36", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4518, "hfopenllm_v2/BBH": 0.5087, "hfopenllm_v2/MATH Level 5": 0.0906, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.3933, "hfopenllm_v2/MMLU-PRO": 0.3711 } }, { "id": "jaspionjader/slu-37", "name": "slu-37", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4534, "hfopenllm_v2/BBH": 0.51, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.3946, "hfopenllm_v2/MMLU-PRO": 0.3695 } }, { "id": "jaspionjader/slu-6", "name": "slu-6", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4117, "hfopenllm_v2/BBH": 0.5099, "hfopenllm_v2/MATH Level 5": 0.0944, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4066, "hfopenllm_v2/MMLU-PRO": 0.3611 } }, { "id": "jaspionjader/slu-mix-1", "name": "slu-mix-1", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4569, "hfopenllm_v2/BBH": 0.524, "hfopenllm_v2/MATH Level 5": 0.1118, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4277, "hfopenllm_v2/MMLU-PRO": 0.393 } }, { "id": "jaspionjader/sof-1", "name": "sof-1", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4314, "hfopenllm_v2/BBH": 0.501, "hfopenllm_v2/MATH Level 5": 0.114, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4082, "hfopenllm_v2/MMLU-PRO": 0.3674 } }, { "id": "jaspionjader/sof-10", "name": "sof-10", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4648, "hfopenllm_v2/BBH": 0.5197, "hfopenllm_v2/MATH Level 5": 0.1239, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4091, "hfopenllm_v2/MMLU-PRO": 0.3874 } }, { "id": "jaspionjader/sof-3", "name": "sof-3", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4637, "hfopenllm_v2/BBH": 0.5206, "hfopenllm_v2/MATH Level 5": 0.1276, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4131, "hfopenllm_v2/MMLU-PRO": 0.3812 } }, { "id": "jaspionjader/sof-6", "name": "sof-6", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4354, "hfopenllm_v2/BBH": 0.5209, "hfopenllm_v2/MATH Level 5": 0.1299, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4171, "hfopenllm_v2/MMLU-PRO": 0.3844 } }, { "id": "jaspionjader/test-10", "name": "test-10", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4578, "hfopenllm_v2/BBH": 0.5316, "hfopenllm_v2/MATH Level 5": 0.114, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4251, "hfopenllm_v2/MMLU-PRO": 0.3936 } }, { "id": "jaspionjader/test-11", "name": "test-11", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4541, "hfopenllm_v2/BBH": 0.535, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.429, "hfopenllm_v2/MMLU-PRO": 0.3939 } }, { "id": "jaspionjader/test-12", "name": "test-12", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4368, "hfopenllm_v2/BBH": 0.5347, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.425, "hfopenllm_v2/MMLU-PRO": 0.3935 } }, { "id": "jaspionjader/test-13", "name": "test-13", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4581, "hfopenllm_v2/BBH": 0.5318, "hfopenllm_v2/MATH Level 5": 0.1057, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4264, "hfopenllm_v2/MMLU-PRO": 0.3935 } }, { "id": "jaspionjader/test-14", "name": "test-14", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4444, "hfopenllm_v2/BBH": 0.5323, "hfopenllm_v2/MATH Level 5": 0.1103, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4317, "hfopenllm_v2/MMLU-PRO": 0.393 } }, { "id": "jaspionjader/test-15", "name": "test-15", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4365, "hfopenllm_v2/BBH": 0.5328, "hfopenllm_v2/MATH Level 5": 0.1118, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4264, "hfopenllm_v2/MMLU-PRO": 0.393 } }, { "id": "jaspionjader/test-16", "name": "test-16", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4599, "hfopenllm_v2/BBH": 0.533, "hfopenllm_v2/MATH Level 5": 0.1095, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4225, "hfopenllm_v2/MMLU-PRO": 0.393 } }, { "id": "jaspionjader/test-17", "name": "test-17", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4267, "hfopenllm_v2/BBH": 0.5329, "hfopenllm_v2/MATH Level 5": 0.1103, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.429, "hfopenllm_v2/MMLU-PRO": 0.3929 } }, { "id": "jaspionjader/test-18", "name": "test-18", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4392, "hfopenllm_v2/BBH": 0.5317, "hfopenllm_v2/MATH Level 5": 0.1148, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4251, "hfopenllm_v2/MMLU-PRO": 0.393 } }, { "id": "jaspionjader/test-19", "name": "test-19", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4401, "hfopenllm_v2/BBH": 0.5319, "hfopenllm_v2/MATH Level 5": 0.1095, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4264, "hfopenllm_v2/MMLU-PRO": 0.3929 } }, { "id": "jaspionjader/test-20", "name": "test-20", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4529, "hfopenllm_v2/BBH": 0.5327, "hfopenllm_v2/MATH Level 5": 0.1118, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4251, "hfopenllm_v2/MMLU-PRO": 0.392 } }, { "id": "jaspionjader/TSN-Kosmos-EVAA-8B", "name": "TSN-Kosmos-EVAA-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4903, "hfopenllm_v2/BBH": 0.5347, "hfopenllm_v2/MATH Level 5": 0.145, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4173, "hfopenllm_v2/MMLU-PRO": 0.3831 } }, { "id": "jaspionjader/TSN-Kosmos-EVAA-v2-8B", "name": "TSN-Kosmos-EVAA-v2-8B", "developer": "jaspionjader", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4667, "hfopenllm_v2/BBH": 0.5343, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.4186, "hfopenllm_v2/MMLU-PRO": 0.3762 } }, { "id": "jayasuryajsk/Qwen2.5-3B-reasoner", "name": "Qwen2.5-3B-reasoner", "developer": "jayasuryajsk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.416, "hfopenllm_v2/BBH": 0.4651, "hfopenllm_v2/MATH Level 5": 0.2085, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4123, "hfopenllm_v2/MMLU-PRO": 0.3482 } }, { "id": "JayHyeon/Qwen-0.5B-DPO-1epoch", "name": "Qwen-0.5B-DPO-1epoch", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2647, "hfopenllm_v2/BBH": 0.3191, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3352, "hfopenllm_v2/MMLU-PRO": 0.1558 } }, { "id": "JayHyeon/Qwen-0.5B-DPO-5epoch", "name": "Qwen-0.5B-DPO-5epoch", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.257, "hfopenllm_v2/BBH": 0.3112, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.2433, "hfopenllm_v2/MUSR": 0.338, "hfopenllm_v2/MMLU-PRO": 0.1533 } }, { "id": "JayHyeon/Qwen-0.5B-eDPO-1epoch", "name": "Qwen-0.5B-eDPO-1epoch", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2623, "hfopenllm_v2/BBH": 0.3181, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.2424, "hfopenllm_v2/MUSR": 0.3327, "hfopenllm_v2/MMLU-PRO": 0.1553 } }, { "id": "JayHyeon/Qwen-0.5B-eDPO-5epoch", "name": "Qwen-0.5B-eDPO-5epoch", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2477, "hfopenllm_v2/BBH": 0.3096, "hfopenllm_v2/MATH Level 5": 0.0234, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.3326, "hfopenllm_v2/MMLU-PRO": 0.1523 } }, { "id": "JayHyeon/Qwen-0.5B-IRPO-1epoch", "name": "Qwen-0.5B-IRPO-1epoch", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2589, "hfopenllm_v2/BBH": 0.3164, "hfopenllm_v2/MATH Level 5": 0.0317, "hfopenllm_v2/GPQA": 0.2466, "hfopenllm_v2/MUSR": 0.3286, "hfopenllm_v2/MMLU-PRO": 0.15 } }, { "id": "JayHyeon/Qwen-0.5B-IRPO-5epoch", "name": "Qwen-0.5B-IRPO-5epoch", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2487, "hfopenllm_v2/BBH": 0.3189, "hfopenllm_v2/MATH Level 5": 0.0325, "hfopenllm_v2/GPQA": 0.2399, "hfopenllm_v2/MUSR": 0.3287, "hfopenllm_v2/MMLU-PRO": 0.1507 } }, { "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT", "name": "Qwen2.5-0.5B-Instruct-SFT", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2768, "hfopenllm_v2/BBH": 0.3254, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.3342, "hfopenllm_v2/MMLU-PRO": 0.152 } }, { "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1", "name": "Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2469, "hfopenllm_v2/BBH": 0.326, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3434, "hfopenllm_v2/MMLU-PRO": 0.1575 } }, { "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1", "name": "Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2606, "hfopenllm_v2/BBH": 0.3308, "hfopenllm_v2/MATH Level 5": 0.0498, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3288, "hfopenllm_v2/MMLU-PRO": 0.1626 } }, { "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1", "name": "Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2529, "hfopenllm_v2/BBH": 0.3262, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3301, "hfopenllm_v2/MMLU-PRO": 0.1576 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT", "name": "Qwen2.5-0.5B-SFT", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1964, "hfopenllm_v2/BBH": 0.3121, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.3394, "hfopenllm_v2/MMLU-PRO": 0.1673 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4", "name": "Qwen2.5-0.5B-SFT-1e-4", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.202, "hfopenllm_v2/BBH": 0.3017, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.3446, "hfopenllm_v2/MMLU-PRO": 0.1619 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep", "name": "Qwen2.5-0.5B-SFT-1e-4-2ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.214, "hfopenllm_v2/BBH": 0.3172, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2466, "hfopenllm_v2/MUSR": 0.3473, "hfopenllm_v2/MMLU-PRO": 0.1537 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep", "name": "Qwen2.5-0.5B-SFT-1e-4-3ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2257, "hfopenllm_v2/BBH": 0.3064, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.3661, "hfopenllm_v2/MMLU-PRO": 0.1532 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep", "name": "Qwen2.5-0.5B-SFT-1e-4-5ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1987, "hfopenllm_v2/BBH": 0.3104, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3407, "hfopenllm_v2/MMLU-PRO": 0.1558 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5", "name": "Qwen2.5-0.5B-SFT-1e-5", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1986, "hfopenllm_v2/BBH": 0.314, "hfopenllm_v2/MATH Level 5": 0.0378, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.346, "hfopenllm_v2/MMLU-PRO": 0.1698 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep", "name": "Qwen2.5-0.5B-SFT-1e-5-2ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1971, "hfopenllm_v2/BBH": 0.3225, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3368, "hfopenllm_v2/MMLU-PRO": 0.1651 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep", "name": "Qwen2.5-0.5B-SFT-1e-5-3ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2241, "hfopenllm_v2/BBH": 0.3247, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3353, "hfopenllm_v2/MMLU-PRO": 0.1689 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep", "name": "Qwen2.5-0.5B-SFT-1e-5-5ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2292, "hfopenllm_v2/BBH": 0.3259, "hfopenllm_v2/MATH Level 5": 0.0521, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3235, "hfopenllm_v2/MMLU-PRO": 0.1688 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4", "name": "Qwen2.5-0.5B-SFT-2e-4", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2034, "hfopenllm_v2/BBH": 0.2936, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3434, "hfopenllm_v2/MMLU-PRO": 0.1413 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep", "name": "Qwen2.5-0.5B-SFT-2e-4-2ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1831, "hfopenllm_v2/BBH": 0.2984, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2424, "hfopenllm_v2/MUSR": 0.3568, "hfopenllm_v2/MMLU-PRO": 0.1484 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep", "name": "Qwen2.5-0.5B-SFT-2e-4-3ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.199, "hfopenllm_v2/BBH": 0.311, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3449, "hfopenllm_v2/MMLU-PRO": 0.1416 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep", "name": "Qwen2.5-0.5B-SFT-2e-4-5ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1897, "hfopenllm_v2/BBH": 0.2936, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3874, "hfopenllm_v2/MMLU-PRO": 0.1336 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5", "name": "Qwen2.5-0.5B-SFT-2e-5", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2068, "hfopenllm_v2/BBH": 0.3204, "hfopenllm_v2/MATH Level 5": 0.037, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3487, "hfopenllm_v2/MMLU-PRO": 0.1678 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2201, "hfopenllm_v2/BBH": 0.3217, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3367, "hfopenllm_v2/MMLU-PRO": 0.171 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2542, "hfopenllm_v2/BBH": 0.3167, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3289, "hfopenllm_v2/MMLU-PRO": 0.158 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2451, "hfopenllm_v2/BBH": 0.316, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3302, "hfopenllm_v2/MMLU-PRO": 0.1561 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2557, "hfopenllm_v2/BBH": 0.3142, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3315, "hfopenllm_v2/MMLU-PRO": 0.1575 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2605, "hfopenllm_v2/BBH": 0.3167, "hfopenllm_v2/MATH Level 5": 0.0363, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3341, "hfopenllm_v2/MMLU-PRO": 0.1577 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2578, "hfopenllm_v2/BBH": 0.3173, "hfopenllm_v2/MATH Level 5": 0.0355, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3288, "hfopenllm_v2/MMLU-PRO": 0.1583 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2335, "hfopenllm_v2/BBH": 0.3198, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3276, "hfopenllm_v2/MMLU-PRO": 0.1581 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2472, "hfopenllm_v2/BBH": 0.3226, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3262, "hfopenllm_v2/MMLU-PRO": 0.1538 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2474, "hfopenllm_v2/BBH": 0.3229, "hfopenllm_v2/MATH Level 5": 0.0415, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3275, "hfopenllm_v2/MMLU-PRO": 0.1539 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2403, "hfopenllm_v2/BBH": 0.3245, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.3262, "hfopenllm_v2/MMLU-PRO": 0.1573 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2368, "hfopenllm_v2/BBH": 0.3224, "hfopenllm_v2/MATH Level 5": 0.0461, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3355, "hfopenllm_v2/MMLU-PRO": 0.1516 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2372, "hfopenllm_v2/BBH": 0.3248, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3394, "hfopenllm_v2/MMLU-PRO": 0.155 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2499, "hfopenllm_v2/BBH": 0.3181, "hfopenllm_v2/MATH Level 5": 0.0415, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3288, "hfopenllm_v2/MMLU-PRO": 0.1574 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2381, "hfopenllm_v2/BBH": 0.3242, "hfopenllm_v2/MATH Level 5": 0.0498, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3328, "hfopenllm_v2/MMLU-PRO": 0.1572 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2421, "hfopenllm_v2/BBH": 0.3225, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3408, "hfopenllm_v2/MMLU-PRO": 0.1496 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2381, "hfopenllm_v2/BBH": 0.3265, "hfopenllm_v2/MATH Level 5": 0.0446, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3408, "hfopenllm_v2/MMLU-PRO": 0.1499 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2526, "hfopenllm_v2/BBH": 0.3177, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3342, "hfopenllm_v2/MMLU-PRO": 0.1572 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2457, "hfopenllm_v2/BBH": 0.316, "hfopenllm_v2/MATH Level 5": 0.0446, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3302, "hfopenllm_v2/MMLU-PRO": 0.1572 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2442, "hfopenllm_v2/BBH": 0.3194, "hfopenllm_v2/MATH Level 5": 0.0483, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3315, "hfopenllm_v2/MMLU-PRO": 0.1567 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2604, "hfopenllm_v2/BBH": 0.3178, "hfopenllm_v2/MATH Level 5": 0.0355, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3288, "hfopenllm_v2/MMLU-PRO": 0.1567 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.249, "hfopenllm_v2/BBH": 0.3173, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3302, "hfopenllm_v2/MMLU-PRO": 0.1569 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2604, "hfopenllm_v2/BBH": 0.315, "hfopenllm_v2/MATH Level 5": 0.0378, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3342, "hfopenllm_v2/MMLU-PRO": 0.1566 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2411, "hfopenllm_v2/BBH": 0.3167, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3301, "hfopenllm_v2/MMLU-PRO": 0.1562 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2369, "hfopenllm_v2/BBH": 0.326, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3355, "hfopenllm_v2/MMLU-PRO": 0.157 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2262, "hfopenllm_v2/BBH": 0.3262, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3408, "hfopenllm_v2/MMLU-PRO": 0.1541 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2508, "hfopenllm_v2/BBH": 0.3199, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3355, "hfopenllm_v2/MMLU-PRO": 0.1555 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.239, "hfopenllm_v2/BBH": 0.3182, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3328, "hfopenllm_v2/MMLU-PRO": 0.156 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2423, "hfopenllm_v2/BBH": 0.3154, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3328, "hfopenllm_v2/MMLU-PRO": 0.1548 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2493, "hfopenllm_v2/BBH": 0.319, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3341, "hfopenllm_v2/MMLU-PRO": 0.1561 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.255, "hfopenllm_v2/BBH": 0.3211, "hfopenllm_v2/MATH Level 5": 0.0491, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3288, "hfopenllm_v2/MMLU-PRO": 0.1571 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2478, "hfopenllm_v2/BBH": 0.3198, "hfopenllm_v2/MATH Level 5": 0.0423, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3315, "hfopenllm_v2/MMLU-PRO": 0.1587 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2475, "hfopenllm_v2/BBH": 0.3225, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3301, "hfopenllm_v2/MMLU-PRO": 0.1556 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.259, "hfopenllm_v2/BBH": 0.3185, "hfopenllm_v2/MATH Level 5": 0.0363, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3275, "hfopenllm_v2/MMLU-PRO": 0.1586 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2323, "hfopenllm_v2/BBH": 0.3179, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.3262, "hfopenllm_v2/MMLU-PRO": 0.1548 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2315, "hfopenllm_v2/BBH": 0.326, "hfopenllm_v2/MATH Level 5": 0.0415, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3383, "hfopenllm_v2/MMLU-PRO": 0.1521 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2298, "hfopenllm_v2/BBH": 0.332, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3329, "hfopenllm_v2/MMLU-PRO": 0.1567 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2469, "hfopenllm_v2/BBH": 0.3179, "hfopenllm_v2/MATH Level 5": 0.0415, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3302, "hfopenllm_v2/MMLU-PRO": 0.1575 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.252, "hfopenllm_v2/BBH": 0.3168, "hfopenllm_v2/MATH Level 5": 0.037, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3328, "hfopenllm_v2/MMLU-PRO": 0.1576 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2666, "hfopenllm_v2/BBH": 0.3191, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3289, "hfopenllm_v2/MMLU-PRO": 0.1567 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2499, "hfopenllm_v2/BBH": 0.3178, "hfopenllm_v2/MATH Level 5": 0.037, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3341, "hfopenllm_v2/MMLU-PRO": 0.1562 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2417, "hfopenllm_v2/BBH": 0.3178, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3328, "hfopenllm_v2/MMLU-PRO": 0.1575 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2562, "hfopenllm_v2/BBH": 0.319, "hfopenllm_v2/MATH Level 5": 0.0423, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3341, "hfopenllm_v2/MMLU-PRO": 0.1576 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2408, "hfopenllm_v2/BBH": 0.3165, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3315, "hfopenllm_v2/MMLU-PRO": 0.1557 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2481, "hfopenllm_v2/BBH": 0.3204, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3302, "hfopenllm_v2/MMLU-PRO": 0.1592 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2545, "hfopenllm_v2/BBH": 0.3186, "hfopenllm_v2/MATH Level 5": 0.0498, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3289, "hfopenllm_v2/MMLU-PRO": 0.1561 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.252, "hfopenllm_v2/BBH": 0.3204, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3262, "hfopenllm_v2/MMLU-PRO": 0.1538 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2315, "hfopenllm_v2/BBH": 0.3213, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3222, "hfopenllm_v2/MMLU-PRO": 0.1582 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2515, "hfopenllm_v2/BBH": 0.3187, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3289, "hfopenllm_v2/MMLU-PRO": 0.1539 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2472, "hfopenllm_v2/BBH": 0.3213, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3262, "hfopenllm_v2/MMLU-PRO": 0.1588 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.246, "hfopenllm_v2/BBH": 0.3234, "hfopenllm_v2/MATH Level 5": 0.0378, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3302, "hfopenllm_v2/MMLU-PRO": 0.1533 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2524, "hfopenllm_v2/BBH": 0.3256, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3368, "hfopenllm_v2/MMLU-PRO": 0.1531 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2265, "hfopenllm_v2/BBH": 0.3252, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3262, "hfopenllm_v2/MMLU-PRO": 0.1568 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2302, "hfopenllm_v2/BBH": 0.3224, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.3408, "hfopenllm_v2/MMLU-PRO": 0.15 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2524, "hfopenllm_v2/BBH": 0.3278, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3395, "hfopenllm_v2/MMLU-PRO": 0.1521 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2658, "hfopenllm_v2/BBH": 0.3175, "hfopenllm_v2/MATH Level 5": 0.0363, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3302, "hfopenllm_v2/MMLU-PRO": 0.1575 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2487, "hfopenllm_v2/BBH": 0.3189, "hfopenllm_v2/MATH Level 5": 0.0378, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3275, "hfopenllm_v2/MMLU-PRO": 0.1595 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.256, "hfopenllm_v2/BBH": 0.3159, "hfopenllm_v2/MATH Level 5": 0.0378, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.3275, "hfopenllm_v2/MMLU-PRO": 0.1562 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2499, "hfopenllm_v2/BBH": 0.3156, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3302, "hfopenllm_v2/MMLU-PRO": 0.1556 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2496, "hfopenllm_v2/BBH": 0.3177, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3315, "hfopenllm_v2/MMLU-PRO": 0.1567 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2515, "hfopenllm_v2/BBH": 0.3172, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3275, "hfopenllm_v2/MMLU-PRO": 0.1553 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep", "name": "Qwen2.5-0.5B-SFT-2e-5-3ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2281, "hfopenllm_v2/BBH": 0.324, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3301, "hfopenllm_v2/MMLU-PRO": 0.1746 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep", "name": "Qwen2.5-0.5B-SFT-2e-5-5ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2348, "hfopenllm_v2/BBH": 0.3308, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3409, "hfopenllm_v2/MMLU-PRO": 0.1695 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2526, "hfopenllm_v2/BBH": 0.3238, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3528, "hfopenllm_v2/MMLU-PRO": 0.1574 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep", "name": "Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2481, "hfopenllm_v2/BBH": 0.3175, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3475, "hfopenllm_v2/MMLU-PRO": 0.1597 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep", "name": "Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2548, "hfopenllm_v2/BBH": 0.3199, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3435, "hfopenllm_v2/MMLU-PRO": 0.1562 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam", "name": "Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2423, "hfopenllm_v2/BBH": 0.3219, "hfopenllm_v2/MATH Level 5": 0.034, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3515, "hfopenllm_v2/MMLU-PRO": 0.1563 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep", "name": "Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2493, "hfopenllm_v2/BBH": 0.3191, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3475, "hfopenllm_v2/MMLU-PRO": 0.1592 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep", "name": "Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2478, "hfopenllm_v2/BBH": 0.3218, "hfopenllm_v2/MATH Level 5": 0.0415, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3515, "hfopenllm_v2/MMLU-PRO": 0.1556 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5", "name": "Qwen2.5-0.5B-SFT-5e-5", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.201, "hfopenllm_v2/BBH": 0.3109, "hfopenllm_v2/MATH Level 5": 0.034, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3381, "hfopenllm_v2/MMLU-PRO": 0.1672 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep", "name": "Qwen2.5-0.5B-SFT-5e-5-2ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2175, "hfopenllm_v2/BBH": 0.318, "hfopenllm_v2/MATH Level 5": 0.0378, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3368, "hfopenllm_v2/MMLU-PRO": 0.1627 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep", "name": "Qwen2.5-0.5B-SFT-5e-5-3ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2199, "hfopenllm_v2/BBH": 0.3297, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3593, "hfopenllm_v2/MMLU-PRO": 0.1651 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep", "name": "Qwen2.5-0.5B-SFT-5e-5-5ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2077, "hfopenllm_v2/BBH": 0.3276, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3766, "hfopenllm_v2/MMLU-PRO": 0.1587 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5", "name": "Qwen2.5-0.5B-SFT-7e-5", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2093, "hfopenllm_v2/BBH": 0.3158, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3367, "hfopenllm_v2/MMLU-PRO": 0.1622 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep", "name": "Qwen2.5-0.5B-SFT-7e-5-2ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2156, "hfopenllm_v2/BBH": 0.31, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2424, "hfopenllm_v2/MUSR": 0.3367, "hfopenllm_v2/MMLU-PRO": 0.1567 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep", "name": "Qwen2.5-0.5B-SFT-7e-5-3ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2381, "hfopenllm_v2/BBH": 0.3199, "hfopenllm_v2/MATH Level 5": 0.0332, "hfopenllm_v2/GPQA": 0.2366, "hfopenllm_v2/MUSR": 0.3554, "hfopenllm_v2/MMLU-PRO": 0.1522 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep", "name": "Qwen2.5-0.5B-SFT-7e-5-5ep", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.212, "hfopenllm_v2/BBH": 0.32, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.3713, "hfopenllm_v2/MMLU-PRO": 0.1628 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1", "name": "Qwen2.5-0.5B-SFT-DPO-1epoch_v1", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2025, "hfopenllm_v2/BBH": 0.3268, "hfopenllm_v2/MATH Level 5": 0.0363, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3209, "hfopenllm_v2/MMLU-PRO": 0.133 } }, { "id": "JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1", "name": "Qwen2.5-0.5B-SFT-MDPO-1epoch_v1", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1964, "hfopenllm_v2/BBH": 0.3293, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3262, "hfopenllm_v2/MMLU-PRO": 0.1337 } }, { "id": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1", "name": "Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2393, "hfopenllm_v2/BBH": 0.3244, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3222, "hfopenllm_v2/MMLU-PRO": 0.1573 } }, { "id": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3", "name": "Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2475, "hfopenllm_v2/BBH": 0.3209, "hfopenllm_v2/MATH Level 5": 0.0461, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3275, "hfopenllm_v2/MMLU-PRO": 0.1567 } }, { "id": "JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam", "name": "Qwen_0.5-DPO_1e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2316, "hfopenllm_v2/BBH": 0.3258, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3221, "hfopenllm_v2/MMLU-PRO": 0.158 } }, { "id": "JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam", "name": "Qwen_0.5-DPO_1e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.236, "hfopenllm_v2/BBH": 0.3225, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3222, "hfopenllm_v2/MMLU-PRO": 0.1596 } }, { "id": "JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam", "name": "Qwen_0.5-DPO_3e-6-1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2337, "hfopenllm_v2/BBH": 0.3132, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3235, "hfopenllm_v2/MMLU-PRO": 0.1533 } }, { "id": "JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam", "name": "Qwen_0.5-DPO_3e-6-2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2569, "hfopenllm_v2/BBH": 0.3276, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3156, "hfopenllm_v2/MMLU-PRO": 0.1565 } }, { "id": "JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam", "name": "Qwen_0.5-DPO_3e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.246, "hfopenllm_v2/BBH": 0.3267, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3209, "hfopenllm_v2/MMLU-PRO": 0.1543 } }, { "id": "JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam", "name": "Qwen_0.5-DPO_3e-7-1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2529, "hfopenllm_v2/BBH": 0.3229, "hfopenllm_v2/MATH Level 5": 0.0551, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3195, "hfopenllm_v2/MMLU-PRO": 0.1597 } }, { "id": "JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam", "name": "Qwen_0.5-DPO_3e-7-2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2505, "hfopenllm_v2/BBH": 0.3256, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3195, "hfopenllm_v2/MMLU-PRO": 0.1599 } }, { "id": "JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam", "name": "Qwen_0.5-DPO_3e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2387, "hfopenllm_v2/BBH": 0.3258, "hfopenllm_v2/MATH Level 5": 0.0446, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3169, "hfopenllm_v2/MMLU-PRO": 0.1589 } }, { "id": "JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam", "name": "Qwen_0.5-DPO_5e-7-1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2532, "hfopenllm_v2/BBH": 0.3218, "hfopenllm_v2/MATH Level 5": 0.0634, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3209, "hfopenllm_v2/MMLU-PRO": 0.1593 } }, { "id": "JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam", "name": "Qwen_0.5-DPO_5e-7-2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2456, "hfopenllm_v2/BBH": 0.3299, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3181, "hfopenllm_v2/MMLU-PRO": 0.1602 } }, { "id": "JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam", "name": "Qwen_0.5-DPO_5e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2423, "hfopenllm_v2/BBH": 0.3271, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3181, "hfopenllm_v2/MMLU-PRO": 0.1595 } }, { "id": "JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam", "name": "Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2532, "hfopenllm_v2/BBH": 0.314, "hfopenllm_v2/MATH Level 5": 0.0491, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3315, "hfopenllm_v2/MMLU-PRO": 0.1566 } }, { "id": "JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam", "name": "Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.267, "hfopenllm_v2/BBH": 0.3189, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3288, "hfopenllm_v2/MMLU-PRO": 0.1562 } }, { "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam", "name": "Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2481, "hfopenllm_v2/BBH": 0.3261, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3368, "hfopenllm_v2/MMLU-PRO": 0.1565 } }, { "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam", "name": "Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2383, "hfopenllm_v2/BBH": 0.3218, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3342, "hfopenllm_v2/MMLU-PRO": 0.1503 } }, { "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam", "name": "Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2471, "hfopenllm_v2/BBH": 0.3224, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3328, "hfopenllm_v2/MMLU-PRO": 0.1533 } }, { "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam", "name": "Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2447, "hfopenllm_v2/BBH": 0.3181, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3341, "hfopenllm_v2/MMLU-PRO": 0.1565 } }, { "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam", "name": "Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2551, "hfopenllm_v2/BBH": 0.3194, "hfopenllm_v2/MATH Level 5": 0.0446, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3262, "hfopenllm_v2/MMLU-PRO": 0.1567 } }, { "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam", "name": "Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2538, "hfopenllm_v2/BBH": 0.3153, "hfopenllm_v2/MATH Level 5": 0.0415, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3261, "hfopenllm_v2/MMLU-PRO": 0.1583 } }, { "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam", "name": "Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2402, "hfopenllm_v2/BBH": 0.3168, "hfopenllm_v2/MATH Level 5": 0.0378, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3328, "hfopenllm_v2/MMLU-PRO": 0.1568 } }, { "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam", "name": "Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2484, "hfopenllm_v2/BBH": 0.3211, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3288, "hfopenllm_v2/MMLU-PRO": 0.1573 } }, { "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam", "name": "Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2578, "hfopenllm_v2/BBH": 0.3203, "hfopenllm_v2/MATH Level 5": 0.0423, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3289, "hfopenllm_v2/MMLU-PRO": 0.1583 } }, { "id": "JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam", "name": "Qwen_0.5-IPO_5e-7-1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2574, "hfopenllm_v2/BBH": 0.3279, "hfopenllm_v2/MATH Level 5": 0.0559, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3169, "hfopenllm_v2/MMLU-PRO": 0.1651 } }, { "id": "JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam", "name": "Qwen_0.5-IPO_5e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3072, "hfopenllm_v2/BBH": 0.3264, "hfopenllm_v2/MATH Level 5": 0.0582, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3156, "hfopenllm_v2/MMLU-PRO": 0.1624 } }, { "id": "JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam", "name": "Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2551, "hfopenllm_v2/BBH": 0.3242, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3182, "hfopenllm_v2/MMLU-PRO": 0.1574 } }, { "id": "JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam", "name": "Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2636, "hfopenllm_v2/BBH": 0.3198, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3262, "hfopenllm_v2/MMLU-PRO": 0.1586 } }, { "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam", "name": "Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2323, "hfopenllm_v2/BBH": 0.3255, "hfopenllm_v2/MATH Level 5": 0.037, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.3169, "hfopenllm_v2/MMLU-PRO": 0.1612 } }, { "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam", "name": "Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2414, "hfopenllm_v2/BBH": 0.3314, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3342, "hfopenllm_v2/MMLU-PRO": 0.1532 } }, { "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam", "name": "Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2678, "hfopenllm_v2/BBH": 0.3362, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3382, "hfopenllm_v2/MMLU-PRO": 0.1561 } }, { "id": "JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam", "name": "Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2561, "hfopenllm_v2/BBH": 0.3231, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3196, "hfopenllm_v2/MMLU-PRO": 0.1589 } }, { "id": "JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam", "name": "Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2639, "hfopenllm_v2/BBH": 0.3257, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3209, "hfopenllm_v2/MMLU-PRO": 0.1587 } }, { "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam", "name": "Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2518, "hfopenllm_v2/BBH": 0.3214, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3169, "hfopenllm_v2/MMLU-PRO": 0.1585 } }, { "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam", "name": "Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2438, "hfopenllm_v2/BBH": 0.3266, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3196, "hfopenllm_v2/MMLU-PRO": 0.1554 } }, { "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam", "name": "Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2465, "hfopenllm_v2/BBH": 0.3246, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3182, "hfopenllm_v2/MMLU-PRO": 0.1563 } }, { "id": "JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam", "name": "Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2506, "hfopenllm_v2/BBH": 0.3261, "hfopenllm_v2/MATH Level 5": 0.0498, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.3382, "hfopenllm_v2/MMLU-PRO": 0.1522 } }, { "id": "JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam", "name": "Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2457, "hfopenllm_v2/BBH": 0.318, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3315, "hfopenllm_v2/MMLU-PRO": 0.1566 } }, { "id": "JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam", "name": "Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2454, "hfopenllm_v2/BBH": 0.3216, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3382, "hfopenllm_v2/MMLU-PRO": 0.1544 } }, { "id": "JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam", "name": "Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2342, "hfopenllm_v2/BBH": 0.3189, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3302, "hfopenllm_v2/MMLU-PRO": 0.158 } }, { "id": "JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam", "name": "Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.232, "hfopenllm_v2/BBH": 0.3234, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3369, "hfopenllm_v2/MMLU-PRO": 0.1543 } }, { "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam", "name": "Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2418, "hfopenllm_v2/BBH": 0.3175, "hfopenllm_v2/MATH Level 5": 0.0423, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3288, "hfopenllm_v2/MMLU-PRO": 0.158 } }, { "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam", "name": "Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2493, "hfopenllm_v2/BBH": 0.3197, "hfopenllm_v2/MATH Level 5": 0.0423, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3315, "hfopenllm_v2/MMLU-PRO": 0.1571 } }, { "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam", "name": "Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.252, "hfopenllm_v2/BBH": 0.3198, "hfopenllm_v2/MATH Level 5": 0.0423, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3262, "hfopenllm_v2/MMLU-PRO": 0.1551 } }, { "id": "JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam", "name": "Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.258, "hfopenllm_v2/BBH": 0.3248, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3422, "hfopenllm_v2/MMLU-PRO": 0.1539 } }, { "id": "JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam", "name": "Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.232, "hfopenllm_v2/BBH": 0.3265, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3395, "hfopenllm_v2/MMLU-PRO": 0.1537 } }, { "id": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam", "name": "Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2488, "hfopenllm_v2/BBH": 0.3273, "hfopenllm_v2/MATH Level 5": 0.0461, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3342, "hfopenllm_v2/MMLU-PRO": 0.1531 } }, { "id": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam", "name": "Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2524, "hfopenllm_v2/BBH": 0.313, "hfopenllm_v2/MATH Level 5": 0.0446, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3289, "hfopenllm_v2/MMLU-PRO": 0.1564 } }, { "id": "JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam", "name": "Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2514, "hfopenllm_v2/BBH": 0.3221, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3315, "hfopenllm_v2/MMLU-PRO": 0.1538 } }, { "id": "JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam", "name": "Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2457, "hfopenllm_v2/BBH": 0.318, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3275, "hfopenllm_v2/MMLU-PRO": 0.1572 } }, { "id": "JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam", "name": "Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2636, "hfopenllm_v2/BBH": 0.3181, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3235, "hfopenllm_v2/MMLU-PRO": 0.1574 } }, { "id": "JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1", "name": "Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2321, "hfopenllm_v2/BBH": 0.3278, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3022, "hfopenllm_v2/MMLU-PRO": 0.1496 } }, { "id": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1", "name": "Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2542, "hfopenllm_v2/BBH": 0.3253, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3181, "hfopenllm_v2/MMLU-PRO": 0.1609 } }, { "id": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3", "name": "Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2739, "hfopenllm_v2/BBH": 0.3245, "hfopenllm_v2/MATH Level 5": 0.0461, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.3089, "hfopenllm_v2/MMLU-PRO": 0.1597 } }, { "id": "JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const", "name": "Qwen_0.5-VDPO_3e-6-1ep_3vpo_const", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2483, "hfopenllm_v2/BBH": 0.3174, "hfopenllm_v2/MATH Level 5": 0.0378, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3328, "hfopenllm_v2/MMLU-PRO": 0.1558 } }, { "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam", "name": "Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2518, "hfopenllm_v2/BBH": 0.3218, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3235, "hfopenllm_v2/MMLU-PRO": 0.1595 } }, { "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const", "name": "Qwen_0.5-VDPO_5e-7-1ep_10vpo_const", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2536, "hfopenllm_v2/BBH": 0.3234, "hfopenllm_v2/MATH Level 5": 0.0491, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3236, "hfopenllm_v2/MMLU-PRO": 0.1597 } }, { "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const", "name": "Qwen_0.5-VDPO_5e-7-1ep_1vpo_const", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2448, "hfopenllm_v2/BBH": 0.324, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3249, "hfopenllm_v2/MMLU-PRO": 0.1587 } }, { "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const", "name": "Qwen_0.5-VDPO_5e-7-1ep_3vpo_const", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2505, "hfopenllm_v2/BBH": 0.3227, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3209, "hfopenllm_v2/MMLU-PRO": 0.1589 } }, { "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam", "name": "Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2472, "hfopenllm_v2/BBH": 0.3255, "hfopenllm_v2/MATH Level 5": 0.0498, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3208, "hfopenllm_v2/MMLU-PRO": 0.1587 } }, { "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const", "name": "Qwen_0.5-VDPO_5e-7-3ep_1vpo_const", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2417, "hfopenllm_v2/BBH": 0.3256, "hfopenllm_v2/MATH Level 5": 0.0582, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3275, "hfopenllm_v2/MMLU-PRO": 0.1562 } }, { "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const", "name": "Qwen_0.5-VDPO_5e-7-3ep_3vpo_const", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2527, "hfopenllm_v2/BBH": 0.3235, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.3235, "hfopenllm_v2/MMLU-PRO": 0.158 } }, { "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam", "name": "Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2669, "hfopenllm_v2/BBH": 0.3314, "hfopenllm_v2/MATH Level 5": 0.071, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3168, "hfopenllm_v2/MMLU-PRO": 0.1634 } }, { "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const", "name": "Qwen_0.5-VIPO_5e-7-1ep_10vpo_const", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2702, "hfopenllm_v2/BBH": 0.33, "hfopenllm_v2/MATH Level 5": 0.074, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3208, "hfopenllm_v2/MMLU-PRO": 0.1635 } }, { "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const", "name": "Qwen_0.5-VIPO_5e-7-1ep_1vpo_const", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.248, "hfopenllm_v2/BBH": 0.3309, "hfopenllm_v2/MATH Level 5": 0.068, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3208, "hfopenllm_v2/MMLU-PRO": 0.1649 } }, { "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const", "name": "Qwen_0.5-VIPO_5e-7-1ep_30vpo_const", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2622, "hfopenllm_v2/BBH": 0.3282, "hfopenllm_v2/MATH Level 5": 0.074, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3221, "hfopenllm_v2/MMLU-PRO": 0.1634 } }, { "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const", "name": "Qwen_0.5-VIPO_5e-7-1ep_3vpo_const", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2609, "hfopenllm_v2/BBH": 0.3298, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3168, "hfopenllm_v2/MMLU-PRO": 0.1651 } }, { "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam", "name": "Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.293, "hfopenllm_v2/BBH": 0.322, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3116, "hfopenllm_v2/MMLU-PRO": 0.1591 } }, { "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const", "name": "Qwen_0.5-VIPO_5e-7-3ep_10vpo_const", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2881, "hfopenllm_v2/BBH": 0.3255, "hfopenllm_v2/MATH Level 5": 0.0725, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3102, "hfopenllm_v2/MMLU-PRO": 0.1582 } }, { "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const", "name": "Qwen_0.5-VIPO_5e-7-3ep_1vpo_const", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2887, "hfopenllm_v2/BBH": 0.3237, "hfopenllm_v2/MATH Level 5": 0.0748, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3142, "hfopenllm_v2/MMLU-PRO": 0.1609 } }, { "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const", "name": "Qwen_0.5-VIPO_5e-7-3ep_30vpo_const", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2905, "hfopenllm_v2/BBH": 0.3254, "hfopenllm_v2/MATH Level 5": 0.077, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3129, "hfopenllm_v2/MMLU-PRO": 0.1574 } }, { "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const", "name": "Qwen_0.5-VIPO_5e-7-3ep_3vpo_const", "developer": "JayHyeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2905, "hfopenllm_v2/BBH": 0.3238, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3089, "hfopenllm_v2/MMLU-PRO": 0.1592 } }, { "id": "jeanmichela/o-distil-qwen", "name": "o-distil-qwen", "developer": "jeanmichela", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4482, "hfopenllm_v2/BBH": 0.59, "hfopenllm_v2/MATH Level 5": 0.565, "hfopenllm_v2/GPQA": 0.3935, "hfopenllm_v2/MUSR": 0.534, "hfopenllm_v2/MMLU-PRO": 0.4658 } }, { "id": "jebcarter/psyonic-cetacean-20B", "name": "psyonic-cetacean-20B", "developer": "jebcarter", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2544, "hfopenllm_v2/BBH": 0.4907, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.4661, "hfopenllm_v2/MMLU-PRO": 0.2886 } }, { "id": "jebish7/aya-expanse-8b", "name": "aya-expanse-8b", "developer": "jebish7", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3791, "hfopenllm_v2/BBH": 0.4969, "hfopenllm_v2/MATH Level 5": 0.0816, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3869, "hfopenllm_v2/MMLU-PRO": 0.3103 } }, { "id": "jebish7/gemma-2-2b-it", "name": "gemma-2-2b-it", "developer": "jebish7", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1272, "hfopenllm_v2/BBH": 0.4395, "hfopenllm_v2/MATH Level 5": 0.034, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4244, "hfopenllm_v2/MMLU-PRO": 0.2715 } }, { "id": "jebish7/gemma-2-9b-it", "name": "gemma-2-9b-it", "developer": "jebish7", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1557, "hfopenllm_v2/BBH": 0.5949, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4554, "hfopenllm_v2/MMLU-PRO": 0.4143 } }, { "id": "jebish7/Llama-3-Nanda-10B-Chat", "name": "Llama-3-Nanda-10B-Chat", "developer": "jebish7", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2953, "hfopenllm_v2/BBH": 0.4959, "hfopenllm_v2/MATH Level 5": 0.0559, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4356, "hfopenllm_v2/MMLU-PRO": 0.3157 } }, { "id": "jebish7/Llama-3.1-8B-Instruct", "name": "Llama-3.1-8B-Instruct", "developer": "jebish7", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5058, "hfopenllm_v2/BBH": 0.5088, "hfopenllm_v2/MATH Level 5": 0.1548, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.3998, "hfopenllm_v2/MMLU-PRO": 0.3777 } }, { "id": "jebish7/Nemotron-4-Mini-Hindi-4B-Base", "name": "Nemotron-4-Mini-Hindi-4B-Base", "developer": "jebish7", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2285, "hfopenllm_v2/BBH": 0.3924, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4249, "hfopenllm_v2/MMLU-PRO": 0.2503 } }, { "id": "jebish7/Nemotron-4-Mini-Hindi-4B-Instruct", "name": "Nemotron-4-Mini-Hindi-4B-Instruct", "developer": "jebish7", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3345, "hfopenllm_v2/BBH": 0.4041, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4153, "hfopenllm_v2/MMLU-PRO": 0.2595 } }, { "id": "jebish7/Nemotron-Mini-4B-Instruct", "name": "Nemotron-Mini-4B-Instruct", "developer": "jebish7", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3709, "hfopenllm_v2/BBH": 0.4244, "hfopenllm_v2/MATH Level 5": 0.0325, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.4727, "hfopenllm_v2/MMLU-PRO": 0.2783 } }, { "id": "jebish7/qwen2.5-0.5B-IHA-Hin", "name": "qwen2.5-0.5B-IHA-Hin", "developer": "jebish7", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1416, "hfopenllm_v2/BBH": 0.2989, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3475, "hfopenllm_v2/MMLU-PRO": 0.1094 } }, { "id": "jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1", "name": "jeffmeloy_Qwen2.5-7B-minperplexity-1", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3757, "hfopenllm_v2/BBH": 0.5582, "hfopenllm_v2/MATH Level 5": 0.2915, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.429, "hfopenllm_v2/MMLU-PRO": 0.4368 } }, { "id": "jeffmeloy/Qwen-7B-nerd-uncensored-v1.0", "name": "Qwen-7B-nerd-uncensored-v1.0", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6136, "hfopenllm_v2/BBH": 0.5421, "hfopenllm_v2/MATH Level 5": 0.287, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4793, "hfopenllm_v2/MMLU-PRO": 0.4363 } }, { "id": "jeffmeloy/Qwen2.5-7B-minperplexity-2", "name": "Qwen2.5-7B-minperplexity-2", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5097, "hfopenllm_v2/BBH": 0.5524, "hfopenllm_v2/MATH Level 5": 0.3014, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4625, "hfopenllm_v2/MMLU-PRO": 0.4346 } }, { "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9", "name": "Qwen2.5-7B-nerd-uncensored-v0.9", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6048, "hfopenllm_v2/BBH": 0.547, "hfopenllm_v2/MATH Level 5": 0.2946, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.482, "hfopenllm_v2/MMLU-PRO": 0.4363 } }, { "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0", "name": "Qwen2.5-7B-nerd-uncensored-v1.0", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7695, "hfopenllm_v2/BBH": 0.5418, "hfopenllm_v2/MATH Level 5": 0.4713, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4551, "hfopenllm_v2/MMLU-PRO": 0.4254 } }, { "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1", "name": "Qwen2.5-7B-nerd-uncensored-v1.1", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6626, "hfopenllm_v2/BBH": 0.4864, "hfopenllm_v2/MATH Level 5": 0.1329, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.3843, "hfopenllm_v2/MMLU-PRO": 0.385 } }, { "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2", "name": "Qwen2.5-7B-nerd-uncensored-v1.2", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4965, "hfopenllm_v2/BBH": 0.4946, "hfopenllm_v2/MATH Level 5": 0.1208, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4172, "hfopenllm_v2/MMLU-PRO": 0.3969 } }, { "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3", "name": "Qwen2.5-7B-nerd-uncensored-v1.3", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4995, "hfopenllm_v2/BBH": 0.5026, "hfopenllm_v2/MATH Level 5": 0.1231, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4187, "hfopenllm_v2/MMLU-PRO": 0.4016 } }, { "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4", "name": "Qwen2.5-7B-nerd-uncensored-v1.4", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6079, "hfopenllm_v2/BBH": 0.5467, "hfopenllm_v2/MATH Level 5": 0.281, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4714, "hfopenllm_v2/MMLU-PRO": 0.4419 } }, { "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5", "name": "Qwen2.5-7B-nerd-uncensored-v1.5", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.565, "hfopenllm_v2/BBH": 0.5523, "hfopenllm_v2/MATH Level 5": 0.2757, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.4982, "hfopenllm_v2/MMLU-PRO": 0.4448 } }, { "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7", "name": "Qwen2.5-7B-nerd-uncensored-v1.7", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4202, "hfopenllm_v2/BBH": 0.5392, "hfopenllm_v2/MATH Level 5": 0.2915, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4848, "hfopenllm_v2/MMLU-PRO": 0.428 } }, { "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8", "name": "Qwen2.5-7B-nerd-uncensored-v1.8", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6256, "hfopenllm_v2/BBH": 0.5447, "hfopenllm_v2/MATH Level 5": 0.2704, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4767, "hfopenllm_v2/MMLU-PRO": 0.4343 } }, { "id": "jeffmeloy/Qwen2.5-7B-olm-v1.0", "name": "Qwen2.5-7B-olm-v1.0", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5331, "hfopenllm_v2/BBH": 0.566, "hfopenllm_v2/MATH Level 5": 0.2863, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4278, "hfopenllm_v2/MMLU-PRO": 0.4566 } }, { "id": "jeffmeloy/Qwen2.5-7B-olm-v1.1", "name": "Qwen2.5-7B-olm-v1.1", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4329, "hfopenllm_v2/BBH": 0.5478, "hfopenllm_v2/MATH Level 5": 0.3829, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4808, "hfopenllm_v2/MMLU-PRO": 0.4354 } }, { "id": "jeffmeloy/Qwen2.5-7B-olm-v1.2", "name": "Qwen2.5-7B-olm-v1.2", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4203, "hfopenllm_v2/BBH": 0.5533, "hfopenllm_v2/MATH Level 5": 0.2847, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4688, "hfopenllm_v2/MMLU-PRO": 0.4387 } }, { "id": "jeffmeloy/Qwen2.5-7B-olm-v1.3", "name": "Qwen2.5-7B-olm-v1.3", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4219, "hfopenllm_v2/BBH": 0.5532, "hfopenllm_v2/MATH Level 5": 0.3104, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4701, "hfopenllm_v2/MMLU-PRO": 0.447 } }, { "id": "jeffmeloy/Qwen2.5-7B-olm-v1.4", "name": "Qwen2.5-7B-olm-v1.4", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4545, "hfopenllm_v2/BBH": 0.5582, "hfopenllm_v2/MATH Level 5": 0.2923, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4622, "hfopenllm_v2/MMLU-PRO": 0.4457 } }, { "id": "jeffmeloy/Qwen2.5-7B-olm-v1.5", "name": "Qwen2.5-7B-olm-v1.5", "developer": "jeffmeloy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4547, "hfopenllm_v2/BBH": 0.5544, "hfopenllm_v2/MATH Level 5": 0.2817, "hfopenllm_v2/GPQA": 0.3398, "hfopenllm_v2/MUSR": 0.4539, "hfopenllm_v2/MMLU-PRO": 0.4399 } }, { "id": "jeonsworld/CarbonVillain-en-10.7B-v4", "name": "CarbonVillain-en-10.7B-v4", "developer": "jeonsworld", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4579, "hfopenllm_v2/BBH": 0.5168, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.3965, "hfopenllm_v2/MMLU-PRO": 0.3142 } }, { "id": "jiangxinyang-shanda/Homer-LLama3-8B", "name": "Homer-LLama3-8B", "developer": "jiangxinyang-shanda", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3992, "hfopenllm_v2/BBH": 0.5173, "hfopenllm_v2/MATH Level 5": 0.0861, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4056, "hfopenllm_v2/MMLU-PRO": 0.3139 } }, { "id": "jieliu/Storm-7B", "name": "Storm-7B", "developer": "jieliu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3424, "hfopenllm_v2/BBH": 0.5187, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4429, "hfopenllm_v2/MMLU-PRO": 0.3119 } }, { "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun", "name": "llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun", "developer": "Jimmy19991222", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6717, "hfopenllm_v2/BBH": 0.488, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4041, "hfopenllm_v2/MMLU-PRO": 0.3634 } }, { "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log", "name": "llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log", "developer": "Jimmy19991222", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6556, "hfopenllm_v2/BBH": 0.4935, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4, "hfopenllm_v2/MMLU-PRO": 0.3658 } }, { "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log", "name": "llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log", "developer": "Jimmy19991222", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6315, "hfopenllm_v2/BBH": 0.4916, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.3935, "hfopenllm_v2/MMLU-PRO": 0.3611 } }, { "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4", "name": "llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4", "developer": "Jimmy19991222", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6285, "hfopenllm_v2/BBH": 0.4986, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4014, "hfopenllm_v2/MMLU-PRO": 0.3545 } }, { "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun", "name": "llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun", "developer": "Jimmy19991222", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6678, "hfopenllm_v2/BBH": 0.494, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.3987, "hfopenllm_v2/MMLU-PRO": 0.3658 } }, { "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log", "name": "llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log", "developer": "Jimmy19991222", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6605, "hfopenllm_v2/BBH": 0.4916, "hfopenllm_v2/MATH Level 5": 0.0657, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4, "hfopenllm_v2/MMLU-PRO": 0.3664 } }, { "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log", "name": "llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log", "developer": "Jimmy19991222", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6492, "hfopenllm_v2/BBH": 0.4952, "hfopenllm_v2/MATH Level 5": 0.0642, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.3961, "hfopenllm_v2/MMLU-PRO": 0.3711 } }, { "id": "Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2", "name": "Llama-3-Instruct-8B-SimPO-v0.2", "developer": "Jimmy19991222", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.654, "hfopenllm_v2/BBH": 0.4984, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4013, "hfopenllm_v2/MMLU-PRO": 0.3686 } }, { "id": "jiviai/medX_v2", "name": "medX_v2", "developer": "jiviai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3743, "hfopenllm_v2/BBH": 0.4509, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.3498, "hfopenllm_v2/MMLU-PRO": 0.3428 } }, { "id": "jlzhou/Qwen2.5-3B-Infinity-Instruct-0625", "name": "Qwen2.5-3B-Infinity-Instruct-0625", "developer": "jlzhou", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3558, "hfopenllm_v2/BBH": 0.4774, "hfopenllm_v2/MATH Level 5": 0.1367, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3981, "hfopenllm_v2/MMLU-PRO": 0.3199 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01", "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.01", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4271, "hfopenllm_v2/BBH": 0.5036, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.4638, "hfopenllm_v2/MMLU-PRO": 0.3739 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1", "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.1-gamma-0.1", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4253, "hfopenllm_v2/BBH": 0.5019, "hfopenllm_v2/MATH Level 5": 0.0967, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.415, "hfopenllm_v2/MMLU-PRO": 0.3724 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01", "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.01", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3377, "hfopenllm_v2/BBH": 0.4917, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.5018, "hfopenllm_v2/MMLU-PRO": 0.3533 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1", "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.3-gamma-0.1", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4274, "hfopenllm_v2/BBH": 0.5126, "hfopenllm_v2/MATH Level 5": 0.0808, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4226, "hfopenllm_v2/MMLU-PRO": 0.3739 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01", "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.01", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3204, "hfopenllm_v2/BBH": 0.4884, "hfopenllm_v2/MATH Level 5": 0.0038, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.5098, "hfopenllm_v2/MMLU-PRO": 0.3344 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1", "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.5-gamma-0.1", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4396, "hfopenllm_v2/BBH": 0.514, "hfopenllm_v2/MATH Level 5": 0.0801, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4398, "hfopenllm_v2/MMLU-PRO": 0.3696 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01", "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.01", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2814, "hfopenllm_v2/BBH": 0.4854, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.5163, "hfopenllm_v2/MMLU-PRO": 0.3295 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1", "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.7-gamma-0.1", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4302, "hfopenllm_v2/BBH": 0.5157, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4332, "hfopenllm_v2/MMLU-PRO": 0.3663 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01", "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.01", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.279, "hfopenllm_v2/BBH": 0.4861, "hfopenllm_v2/MATH Level 5": 0.0015, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.515, "hfopenllm_v2/MMLU-PRO": 0.3305 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1", "name": "Llama-3-8B-Instruct_breadcrumbs-density-0.9-gamma-0.1", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4223, "hfopenllm_v2/BBH": 0.5154, "hfopenllm_v2/MATH Level 5": 0.074, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4384, "hfopenllm_v2/MMLU-PRO": 0.365 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01", "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.01", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4359, "hfopenllm_v2/BBH": 0.5041, "hfopenllm_v2/MATH Level 5": 0.0483, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4532, "hfopenllm_v2/MMLU-PRO": 0.3762 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1", "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.1-gamma-0.1", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4202, "hfopenllm_v2/BBH": 0.5011, "hfopenllm_v2/MATH Level 5": 0.0982, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.415, "hfopenllm_v2/MMLU-PRO": 0.3699 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01", "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.01", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3518, "hfopenllm_v2/BBH": 0.4999, "hfopenllm_v2/MATH Level 5": 0.0234, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4871, "hfopenllm_v2/MMLU-PRO": 0.3611 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1", "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.3-gamma-0.1", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4204, "hfopenllm_v2/BBH": 0.5107, "hfopenllm_v2/MATH Level 5": 0.0876, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4279, "hfopenllm_v2/MMLU-PRO": 0.371 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01", "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.01", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3454, "hfopenllm_v2/BBH": 0.4984, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4911, "hfopenllm_v2/MMLU-PRO": 0.3531 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1", "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.5-gamma-0.1", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4092, "hfopenllm_v2/BBH": 0.5137, "hfopenllm_v2/MATH Level 5": 0.0808, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4357, "hfopenllm_v2/MMLU-PRO": 0.3669 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01", "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.01", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2904, "hfopenllm_v2/BBH": 0.4967, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4991, "hfopenllm_v2/MMLU-PRO": 0.349 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1", "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.7-gamma-0.1", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4199, "hfopenllm_v2/BBH": 0.5147, "hfopenllm_v2/MATH Level 5": 0.0808, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4358, "hfopenllm_v2/MMLU-PRO": 0.3615 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01", "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.01", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2913, "hfopenllm_v2/BBH": 0.4918, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4977, "hfopenllm_v2/MMLU-PRO": 0.3454 } }, { "id": "johnsutor/Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1", "name": "Llama-3-8B-Instruct_breadcrumbs_ties-density-0.9-gamma-0.1", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4162, "hfopenllm_v2/BBH": 0.5139, "hfopenllm_v2/MATH Level 5": 0.0778, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4317, "hfopenllm_v2/MMLU-PRO": 0.3625 } }, { "id": "johnsutor/Llama-3-8B-Instruct_dare_linear", "name": "Llama-3-8B-Instruct_dare_linear", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2145, "hfopenllm_v2/BBH": 0.4283, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4979, "hfopenllm_v2/MMLU-PRO": 0.2414 } }, { "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.1", "name": "Llama-3-8B-Instruct_dare_ties-density-0.1", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1891, "hfopenllm_v2/BBH": 0.4119, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.4658, "hfopenllm_v2/MMLU-PRO": 0.2265 } }, { "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.3", "name": "Llama-3-8B-Instruct_dare_ties-density-0.3", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2113, "hfopenllm_v2/BBH": 0.4559, "hfopenllm_v2/MATH Level 5": 0.0015, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.5069, "hfopenllm_v2/MMLU-PRO": 0.304 } }, { "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.7", "name": "Llama-3-8B-Instruct_dare_ties-density-0.7", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2034, "hfopenllm_v2/BBH": 0.4723, "hfopenllm_v2/MATH Level 5": 0.003, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.511, "hfopenllm_v2/MMLU-PRO": 0.3148 } }, { "id": "johnsutor/Llama-3-8B-Instruct_dare_ties-density-0.9", "name": "Llama-3-8B-Instruct_dare_ties-density-0.9", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2161, "hfopenllm_v2/BBH": 0.4664, "hfopenllm_v2/MATH Level 5": 0.0015, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.523, "hfopenllm_v2/MMLU-PRO": 0.3143 } }, { "id": "johnsutor/Llama-3-8B-Instruct_linear", "name": "Llama-3-8B-Instruct_linear", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4308, "hfopenllm_v2/BBH": 0.5031, "hfopenllm_v2/MATH Level 5": 0.1005, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4097, "hfopenllm_v2/MMLU-PRO": 0.3712 } }, { "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.1", "name": "Llama-3-8B-Instruct_ties-density-0.1", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4116, "hfopenllm_v2/BBH": 0.5021, "hfopenllm_v2/MATH Level 5": 0.0793, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4174, "hfopenllm_v2/MMLU-PRO": 0.36 } }, { "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.3", "name": "Llama-3-8B-Instruct_ties-density-0.3", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3626, "hfopenllm_v2/BBH": 0.4906, "hfopenllm_v2/MATH Level 5": 0.0672, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4025, "hfopenllm_v2/MMLU-PRO": 0.3321 } }, { "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.5", "name": "Llama-3-8B-Instruct_ties-density-0.5", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3797, "hfopenllm_v2/BBH": 0.4793, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.388, "hfopenllm_v2/MMLU-PRO": 0.3175 } }, { "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.7", "name": "Llama-3-8B-Instruct_ties-density-0.7", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3681, "hfopenllm_v2/BBH": 0.4738, "hfopenllm_v2/MATH Level 5": 0.0672, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.3881, "hfopenllm_v2/MMLU-PRO": 0.3152 } }, { "id": "johnsutor/Llama-3-8B-Instruct_ties-density-0.9", "name": "Llama-3-8B-Instruct_ties-density-0.9", "developer": "johnsutor", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3858, "hfopenllm_v2/BBH": 0.4735, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.388, "hfopenllm_v2/MMLU-PRO": 0.3182 } }, { "id": "jondurbin/bagel-dpo-34b-v0.5", "name": "jondurbin/bagel-dpo-34b-v0.5", "developer": "jondurbin", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7215, "reward-bench/Chat": 0.9385, "reward-bench/Chat Hard": 0.5504, "reward-bench/Safety": 0.6446, "reward-bench/Reasoning": 0.8889, "reward-bench/Prior Sets (0.5 weight)": 0.4487 } }, { "id": "Joseph717171/Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32", "name": "Hermes-3-Llama-3.1-8B_TIES_with_Base_Embeds_Initialized_to_Special_Instruct_Toks_dtypeF32", "developer": "Joseph717171", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6185, "hfopenllm_v2/BBH": 0.5177, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4369, "hfopenllm_v2/MMLU-PRO": 0.3144 } }, { "id": "Joseph717171/Llama-3.1-SuperNova-8B-Lite_TIES_with_Base", "name": "Llama-3.1-SuperNova-8B-Lite_TIES_with_Base", "developer": "Joseph717171", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8096, "hfopenllm_v2/BBH": 0.5147, "hfopenllm_v2/MATH Level 5": 0.1835, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.411, "hfopenllm_v2/MMLU-PRO": 0.388 } }, { "id": "Josephgflowers/Cinder-Phi-2-V1-F16-gguf", "name": "Cinder-Phi-2-V1-F16-gguf", "developer": "Josephgflowers", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2357, "hfopenllm_v2/BBH": 0.4397, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.3435, "hfopenllm_v2/MMLU-PRO": 0.2161 } }, { "id": "Josephgflowers/Differential-Attention-Liquid-Metal-Tinyllama", "name": "Differential-Attention-Liquid-Metal-Tinyllama", "developer": "Josephgflowers", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2227, "hfopenllm_v2/BBH": 0.2926, "hfopenllm_v2/MATH Level 5": 0.0325, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.3356, "hfopenllm_v2/MMLU-PRO": 0.1214 } }, { "id": "Josephgflowers/TinyLlama-Cinder-Agent-v1", "name": "TinyLlama-Cinder-Agent-v1", "developer": "Josephgflowers", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.267, "hfopenllm_v2/BBH": 0.3116, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.2441, "hfopenllm_v2/MUSR": 0.3395, "hfopenllm_v2/MMLU-PRO": 0.1161 } }, { "id": "Josephgflowers/Tinyllama-r1", "name": "Tinyllama-r1", "developer": "Josephgflowers", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2119, "hfopenllm_v2/BBH": 0.3015, "hfopenllm_v2/MATH Level 5": 0.0325, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3315, "hfopenllm_v2/MMLU-PRO": 0.1134 } }, { "id": "Josephgflowers/Tinyllama-STEM-Cinder-Agent-v1", "name": "Tinyllama-STEM-Cinder-Agent-v1", "developer": "Josephgflowers", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2126, "hfopenllm_v2/BBH": 0.3084, "hfopenllm_v2/MATH Level 5": 0.0672, "hfopenllm_v2/GPQA": 0.2349, "hfopenllm_v2/MUSR": 0.3341, "hfopenllm_v2/MMLU-PRO": 0.1086 } }, { "id": "Josephgflowers/TinyLlama-v1.1-Cinders-World", "name": "TinyLlama-v1.1-Cinders-World", "developer": "Josephgflowers", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2469, "hfopenllm_v2/BBH": 0.2998, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.2441, "hfopenllm_v2/MUSR": 0.3356, "hfopenllm_v2/MMLU-PRO": 0.1198 } }, { "id": "Josephgflowers/TinyLlama_v1.1_math_code-world-test-1", "name": "TinyLlama_v1.1_math_code-world-test-1", "developer": "Josephgflowers", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0078, "hfopenllm_v2/BBH": 0.3146, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2341, "hfopenllm_v2/MUSR": 0.3499, "hfopenllm_v2/MMLU-PRO": 0.1132 } }, { "id": "jpacifico/Chocolatine-14B-Instruct-4k-DPO", "name": "Chocolatine-14B-Instruct-4k-DPO", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4689, "hfopenllm_v2/BBH": 0.63, "hfopenllm_v2/MATH Level 5": 0.1782, "hfopenllm_v2/GPQA": 0.3414, "hfopenllm_v2/MUSR": 0.4439, "hfopenllm_v2/MMLU-PRO": 0.4764 } }, { "id": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.2", "name": "Chocolatine-14B-Instruct-DPO-v1.2", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6852, "hfopenllm_v2/BBH": 0.6438, "hfopenllm_v2/MATH Level 5": 0.2092, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.4268, "hfopenllm_v2/MMLU-PRO": 0.4697 } }, { "id": "jpacifico/Chocolatine-14B-Instruct-DPO-v1.3", "name": "Chocolatine-14B-Instruct-DPO-v1.3", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.704, "hfopenllm_v2/BBH": 0.6846, "hfopenllm_v2/MATH Level 5": 0.5619, "hfopenllm_v2/GPQA": 0.3414, "hfopenllm_v2/MUSR": 0.4234, "hfopenllm_v2/MMLU-PRO": 0.5374 } }, { "id": "jpacifico/Chocolatine-2-14B-Instruct-DPO-v2.0b1", "name": "Chocolatine-2-14B-Instruct-DPO-v2.0b1", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1033, "hfopenllm_v2/BBH": 0.6696, "hfopenllm_v2/MATH Level 5": 0.2757, "hfopenllm_v2/GPQA": 0.3758, "hfopenllm_v2/MUSR": 0.4467, "hfopenllm_v2/MMLU-PRO": 0.5124 } }, { "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0", "name": "Chocolatine-2-14B-Instruct-v2.0", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0885, "hfopenllm_v2/BBH": 0.677, "hfopenllm_v2/MATH Level 5": 0.4804, "hfopenllm_v2/GPQA": 0.3876, "hfopenllm_v2/MUSR": 0.5021, "hfopenllm_v2/MMLU-PRO": 0.5302 } }, { "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.1", "name": "Chocolatine-2-14B-Instruct-v2.0.1", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0742, "hfopenllm_v2/BBH": 0.6736, "hfopenllm_v2/MATH Level 5": 0.4796, "hfopenllm_v2/GPQA": 0.3918, "hfopenllm_v2/MUSR": 0.5008, "hfopenllm_v2/MMLU-PRO": 0.5299 } }, { "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0.3", "name": "Chocolatine-2-14B-Instruct-v2.0.3", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7037, "hfopenllm_v2/BBH": 0.6548, "hfopenllm_v2/MATH Level 5": 0.4207, "hfopenllm_v2/GPQA": 0.3792, "hfopenllm_v2/MUSR": 0.4768, "hfopenllm_v2/MMLU-PRO": 0.5374 } }, { "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b2", "name": "Chocolatine-2-14B-Instruct-v2.0b2", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7241, "hfopenllm_v2/BBH": 0.6476, "hfopenllm_v2/MATH Level 5": 0.395, "hfopenllm_v2/GPQA": 0.3834, "hfopenllm_v2/MUSR": 0.4808, "hfopenllm_v2/MMLU-PRO": 0.5369 } }, { "id": "jpacifico/Chocolatine-2-14B-Instruct-v2.0b3", "name": "Chocolatine-2-14B-Instruct-v2.0b3", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7323, "hfopenllm_v2/BBH": 0.6469, "hfopenllm_v2/MATH Level 5": 0.4109, "hfopenllm_v2/GPQA": 0.3792, "hfopenllm_v2/MUSR": 0.4781, "hfopenllm_v2/MMLU-PRO": 0.5337 } }, { "id": "jpacifico/Chocolatine-3B-Instruct-DPO-Revised", "name": "Chocolatine-3B-Instruct-DPO-Revised", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5623, "hfopenllm_v2/BBH": 0.554, "hfopenllm_v2/MATH Level 5": 0.1805, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.4453, "hfopenllm_v2/MMLU-PRO": 0.3989 } }, { "id": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.0", "name": "Chocolatine-3B-Instruct-DPO-v1.0", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3737, "hfopenllm_v2/BBH": 0.5471, "hfopenllm_v2/MATH Level 5": 0.1782, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.4755, "hfopenllm_v2/MMLU-PRO": 0.3937 } }, { "id": "jpacifico/Chocolatine-3B-Instruct-DPO-v1.2", "name": "Chocolatine-3B-Instruct-DPO-v1.2", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5455, "hfopenllm_v2/BBH": 0.5487, "hfopenllm_v2/MATH Level 5": 0.2047, "hfopenllm_v2/GPQA": 0.3389, "hfopenllm_v2/MUSR": 0.4154, "hfopenllm_v2/MMLU-PRO": 0.3877 } }, { "id": "jpacifico/Distilucie-7B-Math-Instruct-DPO-v0.1", "name": "Distilucie-7B-Math-Instruct-DPO-v0.1", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3048, "hfopenllm_v2/BBH": 0.3835, "hfopenllm_v2/MATH Level 5": 0.0257, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.3644, "hfopenllm_v2/MMLU-PRO": 0.1809 } }, { "id": "jpacifico/Lucie-7B-Instruct-DPO-v1.1", "name": "Lucie-7B-Instruct-DPO-v1.1", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3121, "hfopenllm_v2/BBH": 0.3781, "hfopenllm_v2/MATH Level 5": 0.0234, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.4016, "hfopenllm_v2/MMLU-PRO": 0.1838 } }, { "id": "jpacifico/Lucie-7B-Instruct-DPO-v1.1.3", "name": "Lucie-7B-Instruct-DPO-v1.1.3", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3045, "hfopenllm_v2/BBH": 0.3819, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.3818, "hfopenllm_v2/MMLU-PRO": 0.1764 } }, { "id": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.0", "name": "Lucie-7B-Instruct-Merged-Model_Stock-v1.0", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3234, "hfopenllm_v2/BBH": 0.3802, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.3844, "hfopenllm_v2/MMLU-PRO": 0.1871 } }, { "id": "jpacifico/Lucie-7B-Instruct-Merged-Model_Stock-v1.1", "name": "Lucie-7B-Instruct-Merged-Model_Stock-v1.1", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3014, "hfopenllm_v2/BBH": 0.3808, "hfopenllm_v2/MATH Level 5": 0.0279, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.375, "hfopenllm_v2/MMLU-PRO": 0.1862 } }, { "id": "jpacifico/Lucie-Boosted-7B-Instruct", "name": "Lucie-Boosted-7B-Instruct", "developer": "jpacifico", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2566, "hfopenllm_v2/BBH": 0.3465, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3699, "hfopenllm_v2/MMLU-PRO": 0.163 } }, { "id": "jsfs11/L3-8B-Stheno-slerp", "name": "L3-8B-Stheno-slerp", "developer": "jsfs11", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6752, "hfopenllm_v2/BBH": 0.5326, "hfopenllm_v2/MATH Level 5": 0.0989, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.3725, "hfopenllm_v2/MMLU-PRO": 0.3649 } }, { "id": "jsfs11/MixtureofMerges-MoE-4x7b-v4", "name": "MixtureofMerges-MoE-4x7b-v4", "developer": "jsfs11", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.403, "hfopenllm_v2/BBH": 0.5169, "hfopenllm_v2/MATH Level 5": 0.0634, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4386, "hfopenllm_v2/MMLU-PRO": 0.3032 } }, { "id": "jsfs11/MixtureofMerges-MoE-4x7b-v5", "name": "MixtureofMerges-MoE-4x7b-v5", "developer": "jsfs11", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4199, "hfopenllm_v2/BBH": 0.5198, "hfopenllm_v2/MATH Level 5": 0.0755, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4305, "hfopenllm_v2/MMLU-PRO": 0.3098 } }, { "id": "JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3", "name": "T3Q-Qwen2.5-14B-Instruct-1M-e3", "developer": "JungZoona", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7324, "hfopenllm_v2/BBH": 0.7586, "hfopenllm_v2/MATH Level 5": 0.2863, "hfopenllm_v2/GPQA": 0.4169, "hfopenllm_v2/MUSR": 0.5911, "hfopenllm_v2/MMLU-PRO": 0.5884 } }, { "id": "JungZoona/T3Q-qwen2.5-14b-v1.0-e3", "name": "T3Q-qwen2.5-14b-v1.0-e3", "developer": "JungZoona", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7324, "hfopenllm_v2/BBH": 0.7586, "hfopenllm_v2/MATH Level 5": 0.2863, "hfopenllm_v2/GPQA": 0.4169, "hfopenllm_v2/MUSR": 0.5911, "hfopenllm_v2/MMLU-PRO": 0.5884 } }, { "id": "Junhoee/Qwen-Megumin", "name": "Qwen-Megumin", "developer": "Junhoee", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7141, "hfopenllm_v2/BBH": 0.5285, "hfopenllm_v2/MATH Level 5": 0.4902, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.398, "hfopenllm_v2/MMLU-PRO": 0.4199 } }, { "id": "kaist-ai/janus-7b", "name": "janus-7b", "developer": "kaist-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3775, "hfopenllm_v2/BBH": 0.4694, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.4401, "hfopenllm_v2/MMLU-PRO": 0.2874 } }, { "id": "kaist-ai/janus-dpo-7b", "name": "janus-dpo-7b", "developer": "kaist-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4003, "hfopenllm_v2/BBH": 0.4773, "hfopenllm_v2/MATH Level 5": 0.0415, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4387, "hfopenllm_v2/MMLU-PRO": 0.2976 } }, { "id": "kaist-ai/janus-rm-7b", "name": "janus-rm-7b", "developer": "kaist-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1778, "hfopenllm_v2/BBH": 0.3056, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3883, "hfopenllm_v2/MMLU-PRO": 0.1126 } }, { "id": "kaist-ai/mistral-orpo-capybara-7k", "name": "mistral-orpo-capybara-7k", "developer": "kaist-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5367, "hfopenllm_v2/BBH": 0.4489, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.3964, "hfopenllm_v2/MMLU-PRO": 0.2971 } }, { "id": "katanemo/arch-agent-1-5b", "name": "Arch-Agent-1.5B", "developer": "katanemo", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 60.0, "bfcl/bfcl.overall.overall_accuracy": 32.14, "bfcl/bfcl.overall.total_cost_usd": 2.45, "bfcl/bfcl.overall.latency_mean_s": 2.38, "bfcl/bfcl.overall.latency_std_s": 4.01, "bfcl/bfcl.overall.latency_p95_s": 5.3, "bfcl/bfcl.non_live.ast_accuracy": 82.67, "bfcl/bfcl.non_live.simple_ast_accuracy": 72.17, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 85.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 81.0, "bfcl/bfcl.live.live_accuracy": 67.73, "bfcl/bfcl.live.live_simple_ast_accuracy": 70.54, "bfcl/bfcl.live.live_multiple_ast_accuracy": 67.81, "bfcl/bfcl.live.live_parallel_ast_accuracy": 31.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 58.33, "bfcl/bfcl.multi_turn.accuracy": 26.62, "bfcl/bfcl.multi_turn.base_accuracy": 35.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 27.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 21.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 22.0, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 8.17, "bfcl/bfcl.memory.kv_accuracy": 5.81, "bfcl/bfcl.memory.vector_accuracy": 5.81, "bfcl/bfcl.memory.recursive_summarization_accuracy": 12.9, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 74.83 } }, { "id": "katanemo/arch-agent-32b", "name": "Arch-Agent-32B", "developer": "katanemo", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 37.0, "bfcl/bfcl.overall.overall_accuracy": 45.37, "bfcl/bfcl.overall.total_cost_usd": 8.87, "bfcl/bfcl.overall.latency_mean_s": 9.44, "bfcl/bfcl.overall.latency_std_s": 21.44, "bfcl/bfcl.overall.latency_p95_s": 24.87, "bfcl/bfcl.non_live.ast_accuracy": 88.92, "bfcl/bfcl.non_live.simple_ast_accuracy": 76.67, "bfcl/bfcl.non_live.multiple_ast_accuracy": 94.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 96.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 89.0, "bfcl/bfcl.live.live_accuracy": 80.68, "bfcl/bfcl.live.live_simple_ast_accuracy": 86.43, "bfcl/bfcl.live.live_multiple_ast_accuracy": 79.11, "bfcl/bfcl.live.live_parallel_ast_accuracy": 93.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 79.17, "bfcl/bfcl.multi_turn.accuracy": 54.25, "bfcl/bfcl.multi_turn.base_accuracy": 64.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 58.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 53.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 41.5, "bfcl/bfcl.web_search.accuracy": 5.0, "bfcl/bfcl.web_search.base_accuracy": 4.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 6.0, "bfcl/bfcl.memory.accuracy": 14.62, "bfcl/bfcl.memory.kv_accuracy": 5.81, "bfcl/bfcl.memory.vector_accuracy": 9.03, "bfcl/bfcl.memory.recursive_summarization_accuracy": 29.03, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 82.15 } }, { "id": "katanemo/arch-agent-3b", "name": "Arch-Agent-3B", "developer": "katanemo", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 56.0, "bfcl/bfcl.overall.overall_accuracy": 35.36, "bfcl/bfcl.overall.total_cost_usd": 3.7, "bfcl/bfcl.overall.latency_mean_s": 3.56, "bfcl/bfcl.overall.latency_std_s": 6.65, "bfcl/bfcl.overall.latency_p95_s": 8.19, "bfcl/bfcl.non_live.ast_accuracy": 86.67, "bfcl/bfcl.non_live.simple_ast_accuracy": 78.67, "bfcl/bfcl.non_live.multiple_ast_accuracy": 94.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 91.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 82.5, "bfcl/bfcl.live.live_accuracy": 72.91, "bfcl/bfcl.live.live_simple_ast_accuracy": 75.58, "bfcl/bfcl.live.live_multiple_ast_accuracy": 72.27, "bfcl/bfcl.live.live_parallel_ast_accuracy": 68.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 75.0, "bfcl/bfcl.multi_turn.accuracy": 34.88, "bfcl/bfcl.multi_turn.base_accuracy": 42.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 37.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 31.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 29.0, "bfcl/bfcl.web_search.accuracy": 0.5, "bfcl/bfcl.web_search.base_accuracy": 1.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 6.88, "bfcl/bfcl.memory.kv_accuracy": 5.16, "bfcl/bfcl.memory.vector_accuracy": 5.81, "bfcl/bfcl.memory.recursive_summarization_accuracy": 9.68, "bfcl/bfcl.relevance.relevance_detection_accuracy": 68.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 74.67 } }, { "id": "kavonalds/BunderMaxx-0710", "name": "BunderMaxx-0710", "developer": "kavonalds", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2701, "hfopenllm_v2/BBH": 0.5566, "hfopenllm_v2/MATH Level 5": 0.068, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3682, "hfopenllm_v2/MMLU-PRO": 0.1449 } }, { "id": "kavonalds/BunderMaxx-1010", "name": "BunderMaxx-1010", "developer": "kavonalds", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2981, "hfopenllm_v2/BBH": 0.702, "hfopenllm_v2/MATH Level 5": 0.105, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3484, "hfopenllm_v2/MMLU-PRO": 0.1224 } }, { "id": "kavonalds/Lancer-1-1b-Instruct", "name": "Lancer-1-1b-Instruct", "developer": "kavonalds", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5546, "hfopenllm_v2/BBH": 0.3253, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3144, "hfopenllm_v2/MMLU-PRO": 0.1568 } }, { "id": "kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe", "name": "T3Q-Qwen2.5-7B-it-KOR-Safe", "developer": "kayfour", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6081, "hfopenllm_v2/BBH": 0.555, "hfopenllm_v2/MATH Level 5": 0.3761, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4277, "hfopenllm_v2/MMLU-PRO": 0.4464 } }, { "id": "keeeeenw/MicroLlama", "name": "MicroLlama", "developer": "keeeeenw", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1985, "hfopenllm_v2/BBH": 0.3007, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3698, "hfopenllm_v2/MMLU-PRO": 0.1138 } }, { "id": "kekmodel/StopCarbon-10.7B-v5", "name": "StopCarbon-10.7B-v5", "developer": "kekmodel", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4728, "hfopenllm_v2/BBH": 0.5178, "hfopenllm_v2/MATH Level 5": 0.0559, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4019, "hfopenllm_v2/MMLU-PRO": 0.3157 } }, { "id": "kevin009/llamaRAGdrama", "name": "llamaRAGdrama", "developer": "kevin009", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2598, "hfopenllm_v2/BBH": 0.4007, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.4316, "hfopenllm_v2/MMLU-PRO": 0.2724 } }, { "id": "Khetterman/DarkAtom-12B-v3", "name": "DarkAtom-12B-v3", "developer": "Khetterman", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6173, "hfopenllm_v2/BBH": 0.5154, "hfopenllm_v2/MATH Level 5": 0.111, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4468, "hfopenllm_v2/MMLU-PRO": 0.3546 } }, { "id": "Khetterman/Kosmos-8B-v1", "name": "Kosmos-8B-v1", "developer": "Khetterman", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4129, "hfopenllm_v2/BBH": 0.5234, "hfopenllm_v2/MATH Level 5": 0.0989, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.3919, "hfopenllm_v2/MMLU-PRO": 0.3669 } }, { "id": "khoantap/cheap-moe-merge", "name": "cheap-moe-merge", "developer": "khoantap", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4557, "hfopenllm_v2/BBH": 0.5131, "hfopenllm_v2/MATH Level 5": 0.0921, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4103, "hfopenllm_v2/MMLU-PRO": 0.3339 } }, { "id": "khoantap/llama-3-8b-stock-merge", "name": "llama-3-8b-stock-merge", "developer": "khoantap", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4812, "hfopenllm_v2/BBH": 0.5162, "hfopenllm_v2/MATH Level 5": 0.1616, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.3946, "hfopenllm_v2/MMLU-PRO": 0.38 } }, { "id": "khoantap/llama-breadcrumbs-ties-merge", "name": "llama-breadcrumbs-ties-merge", "developer": "khoantap", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2205, "hfopenllm_v2/BBH": 0.5416, "hfopenllm_v2/MATH Level 5": 0.1125, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.4434, "hfopenllm_v2/MMLU-PRO": 0.3172 } }, { "id": "khoantap/llama-evolve-ties-best-merge", "name": "llama-evolve-ties-best-merge", "developer": "khoantap", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6744, "hfopenllm_v2/BBH": 0.5414, "hfopenllm_v2/MATH Level 5": 0.1563, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.3946, "hfopenllm_v2/MMLU-PRO": 0.386 } }, { "id": "khoantap/llama-linear-0.5-0.5-1-merge", "name": "llama-linear-0.5-0.5-1-merge", "developer": "khoantap", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4812, "hfopenllm_v2/BBH": 0.5643, "hfopenllm_v2/MATH Level 5": 0.2054, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4143, "hfopenllm_v2/MMLU-PRO": 0.3833 } }, { "id": "khoantap/llama-linear-0.5-1-0.5-merge", "name": "llama-linear-0.5-1-0.5-merge", "developer": "khoantap", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5032, "hfopenllm_v2/BBH": 0.5951, "hfopenllm_v2/MATH Level 5": 0.148, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4172, "hfopenllm_v2/MMLU-PRO": 0.369 } }, { "id": "khoantap/llama-linear-1-0.5-0.5-merge", "name": "llama-linear-1-0.5-0.5-merge", "developer": "khoantap", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4515, "hfopenllm_v2/BBH": 0.5526, "hfopenllm_v2/MATH Level 5": 0.2477, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4118, "hfopenllm_v2/MMLU-PRO": 0.3635 } }, { "id": "khoantap/llama-slerp-merge", "name": "llama-slerp-merge", "developer": "khoantap", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.498, "hfopenllm_v2/BBH": 0.5783, "hfopenllm_v2/MATH Level 5": 0.0831, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4053, "hfopenllm_v2/MMLU-PRO": 0.3678 } }, { "id": "khoantap/moe-out-merge", "name": "moe-out-merge", "developer": "khoantap", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4505, "hfopenllm_v2/BBH": 0.5151, "hfopenllm_v2/MATH Level 5": 0.0929, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4063, "hfopenllm_v2/MMLU-PRO": 0.3348 } }, { "id": "khulaifi95/Llama-3.1-8B-Reason-Blend-888k", "name": "Llama-3.1-8B-Reason-Blend-888k", "developer": "khulaifi95", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5832, "hfopenllm_v2/BBH": 0.479, "hfopenllm_v2/MATH Level 5": 0.1156, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3379, "hfopenllm_v2/MMLU-PRO": 0.31 } }, { "id": "Kimargin/GPT-NEO-1.3B-wiki", "name": "GPT-NEO-1.3B-wiki", "developer": "Kimargin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1921, "hfopenllm_v2/BBH": 0.3026, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.245, "hfopenllm_v2/MUSR": 0.3883, "hfopenllm_v2/MMLU-PRO": 0.1099 } }, { "id": "KingNish/qwen-1b-continued", "name": "qwen-1b-continued", "developer": "KingNish", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1255, "hfopenllm_v2/BBH": 0.2991, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3859, "hfopenllm_v2/MMLU-PRO": 0.1261 } }, { "id": "KingNish/qwen-1b-continued-v2", "name": "qwen-1b-continued-v2", "developer": "KingNish", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1579, "hfopenllm_v2/BBH": 0.3119, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3393, "hfopenllm_v2/MMLU-PRO": 0.1193 } }, { "id": "KingNish/qwen-1b-continued-v2.1", "name": "qwen-1b-continued-v2.1", "developer": "KingNish", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1127, "hfopenllm_v2/BBH": 0.3042, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.4154, "hfopenllm_v2/MMLU-PRO": 0.1278 } }, { "id": "KingNish/qwen-1b-continued-v2.2", "name": "qwen-1b-continued-v2.2", "developer": "KingNish", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1413, "hfopenllm_v2/BBH": 0.3059, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3513, "hfopenllm_v2/MMLU-PRO": 0.1262 } }, { "id": "KingNish/Qwen2.5-0.5b-Test-ft", "name": "Qwen2.5-0.5b-Test-ft", "developer": "KingNish", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2671, "hfopenllm_v2/BBH": 0.3232, "hfopenllm_v2/MATH Level 5": 0.0355, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3421, "hfopenllm_v2/MMLU-PRO": 0.1689 } }, { "id": "KingNish/Reasoning-0.5b", "name": "Reasoning-0.5b", "developer": "KingNish", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2174, "hfopenllm_v2/BBH": 0.3354, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3513, "hfopenllm_v2/MMLU-PRO": 0.1641 } }, { "id": "KingNish/Reasoning-Llama-3b-v0.1", "name": "Reasoning-Llama-3b-v0.1", "developer": "KingNish", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6225, "hfopenllm_v2/BBH": 0.4343, "hfopenllm_v2/MATH Level 5": 0.1299, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3168, "hfopenllm_v2/MMLU-PRO": 0.3029 } }, { "id": "kms7530/chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1", "name": "chemeng_llama-3-8b-Instruct-bnb-4bit_24_1_100_1", "developer": "kms7530", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5455, "hfopenllm_v2/BBH": 0.4289, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3821, "hfopenllm_v2/MMLU-PRO": 0.2798 } }, { "id": "kms7530/chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath", "name": "chemeng_phi-3-mini-4k-instruct-bnb-4bit_16_4_100_1_nonmath", "developer": "kms7530", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4863, "hfopenllm_v2/BBH": 0.4987, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.3983, "hfopenllm_v2/MMLU-PRO": 0.3481 } }, { "id": "kms7530/chemeng_qwen-math-7b_24_1_100_1", "name": "chemeng_qwen-math-7b_24_1_100_1", "developer": "kms7530", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2111, "hfopenllm_v2/BBH": 0.3578, "hfopenllm_v2/MATH Level 5": 0.2243, "hfopenllm_v2/GPQA": 0.2441, "hfopenllm_v2/MUSR": 0.3687, "hfopenllm_v2/MMLU-PRO": 0.2158 } }, { "id": "kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath", "name": "chemeng_qwen-math-7b_24_1_100_1_nonmath", "developer": "kms7530", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2584, "hfopenllm_v2/BBH": 0.3893, "hfopenllm_v2/MATH Level 5": 0.3097, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4087, "hfopenllm_v2/MMLU-PRO": 0.2452 } }, { "id": "kno10/ende-chat-0.0.5", "name": "ende-chat-0.0.5", "developer": "kno10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3404, "hfopenllm_v2/BBH": 0.3604, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3938, "hfopenllm_v2/MMLU-PRO": 0.179 } }, { "id": "kno10/ende-chat-0.0.7", "name": "ende-chat-0.0.7", "developer": "kno10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4401, "hfopenllm_v2/BBH": 0.3792, "hfopenllm_v2/MATH Level 5": 0.0174, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3861, "hfopenllm_v2/MMLU-PRO": 0.1966 } }, { "id": "Kquant03/CognitiveFusion2-4x7B-BF16", "name": "CognitiveFusion2-4x7B-BF16", "developer": "Kquant03", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3567, "hfopenllm_v2/BBH": 0.4108, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4146, "hfopenllm_v2/MMLU-PRO": 0.2793 } }, { "id": "Kquant03/L3-Pneuma-8B", "name": "L3-Pneuma-8B", "developer": "Kquant03", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2374, "hfopenllm_v2/BBH": 0.4955, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4172, "hfopenllm_v2/MMLU-PRO": 0.3184 } }, { "id": "Krystalan/DRT-o1-14B", "name": "DRT-o1-14B", "developer": "Krystalan", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4068, "hfopenllm_v2/BBH": 0.6379, "hfopenllm_v2/MATH Level 5": 0.4826, "hfopenllm_v2/GPQA": 0.3523, "hfopenllm_v2/MUSR": 0.4795, "hfopenllm_v2/MMLU-PRO": 0.5179 } }, { "id": "Krystalan/DRT-o1-7B", "name": "DRT-o1-7B", "developer": "Krystalan", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3928, "hfopenllm_v2/BBH": 0.5468, "hfopenllm_v2/MATH Level 5": 0.4479, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.5087, "hfopenllm_v2/MMLU-PRO": 0.4151 } }, { "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-1415", "name": "Llama3-70b-SVA-FT-1415", "developer": "KSU-HW-SEC", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.618, "hfopenllm_v2/BBH": 0.665, "hfopenllm_v2/MATH Level 5": 0.2198, "hfopenllm_v2/GPQA": 0.375, "hfopenllm_v2/MUSR": 0.4565, "hfopenllm_v2/MMLU-PRO": 0.5243 } }, { "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-500", "name": "Llama3-70b-SVA-FT-500", "developer": "KSU-HW-SEC", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6105, "hfopenllm_v2/BBH": 0.6692, "hfopenllm_v2/MATH Level 5": 0.2137, "hfopenllm_v2/GPQA": 0.3809, "hfopenllm_v2/MUSR": 0.4511, "hfopenllm_v2/MMLU-PRO": 0.5227 } }, { "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-final", "name": "Llama3-70b-SVA-FT-final", "developer": "KSU-HW-SEC", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6165, "hfopenllm_v2/BBH": 0.665, "hfopenllm_v2/MATH Level 5": 0.2198, "hfopenllm_v2/GPQA": 0.375, "hfopenllm_v2/MUSR": 0.4565, "hfopenllm_v2/MMLU-PRO": 0.5243 } }, { "id": "KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step", "name": "Llama3.1-70b-SVA-FT-1000step", "developer": "KSU-HW-SEC", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7238, "hfopenllm_v2/BBH": 0.6903, "hfopenllm_v2/MATH Level 5": 0.321, "hfopenllm_v2/GPQA": 0.396, "hfopenllm_v2/MUSR": 0.4592, "hfopenllm_v2/MMLU-PRO": 0.5252 } }, { "id": "kuaishou/kwaipilot-40b-0604", "name": "kwaipilot-40b-0604", "developer": "Kuaishou", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.07042253521126761, "livecodebenchpro/Easy Problems": 0.056338028169014086 } }, { "id": "Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5", "name": "NeuralExperiment-7b-MagicCoder-v7.5", "developer": "Kukedlc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4553, "hfopenllm_v2/BBH": 0.3988, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4282, "hfopenllm_v2/MMLU-PRO": 0.2824 } }, { "id": "Kukedlc/NeuralLLaMa-3-8b-DT-v0.1", "name": "NeuralLLaMa-3-8b-DT-v0.1", "developer": "Kukedlc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4371, "hfopenllm_v2/BBH": 0.4987, "hfopenllm_v2/MATH Level 5": 0.0808, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4071, "hfopenllm_v2/MMLU-PRO": 0.3792 } }, { "id": "Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3", "name": "NeuralLLaMa-3-8b-ORPO-v0.3", "developer": "Kukedlc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5276, "hfopenllm_v2/BBH": 0.4557, "hfopenllm_v2/MATH Level 5": 0.0483, "hfopenllm_v2/GPQA": 0.2391, "hfopenllm_v2/MUSR": 0.37, "hfopenllm_v2/MMLU-PRO": 0.3057 } }, { "id": "Kukedlc/NeuralSynthesis-7B-v0.1", "name": "NeuralSynthesis-7B-v0.1", "developer": "Kukedlc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4185, "hfopenllm_v2/BBH": 0.5145, "hfopenllm_v2/MATH Level 5": 0.0634, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4333, "hfopenllm_v2/MMLU-PRO": 0.3049 } }, { "id": "Kukedlc/NeuralSynthesis-7B-v0.3", "name": "NeuralSynthesis-7B-v0.3", "developer": "Kukedlc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4078, "hfopenllm_v2/BBH": 0.5138, "hfopenllm_v2/MATH Level 5": 0.0778, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.4346, "hfopenllm_v2/MMLU-PRO": 0.305 } }, { "id": "Kukedlc/NeuralSynthesis-7b-v0.4-slerp", "name": "NeuralSynthesis-7b-v0.4-slerp", "developer": "Kukedlc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3947, "hfopenllm_v2/BBH": 0.5143, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4332, "hfopenllm_v2/MMLU-PRO": 0.3043 } }, { "id": "Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT", "name": "Qwen-2.5-7b-Spanish-o1-CoT", "developer": "Kukedlc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.421, "hfopenllm_v2/BBH": 0.5602, "hfopenllm_v2/MATH Level 5": 0.2727, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4777, "hfopenllm_v2/MMLU-PRO": 0.4363 } }, { "id": "Kumar955/Hemanth-llm", "name": "Hemanth-llm", "developer": "Kumar955", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5045, "hfopenllm_v2/BBH": 0.5225, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4486, "hfopenllm_v2/MMLU-PRO": 0.3113 } }, { "id": "kyutai/helium-1-preview-2b", "name": "helium-1-preview-2b", "developer": "kyutai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2614, "hfopenllm_v2/BBH": 0.3638, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.355, "hfopenllm_v2/MMLU-PRO": 0.1873 } }, { "id": "kz919/QwQ-0.5B-Distilled-SFT", "name": "QwQ-0.5B-Distilled-SFT", "developer": "kz919", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3077, "hfopenllm_v2/BBH": 0.3256, "hfopenllm_v2/MATH Level 5": 0.074, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3409, "hfopenllm_v2/MMLU-PRO": 0.1587 } }, { "id": "L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1", "name": "3_PRYMMAL-ECE-7B-SLERP-V1", "developer": "L-RAGE", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2742, "hfopenllm_v2/BBH": 0.4228, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.3841, "hfopenllm_v2/MMLU-PRO": 0.2925 } }, { "id": "ladydaina/ECE-FDF", "name": "ECE-FDF", "developer": "ladydaina", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3728, "hfopenllm_v2/BBH": 0.515, "hfopenllm_v2/MATH Level 5": 0.0816, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4504, "hfopenllm_v2/MMLU-PRO": 0.3007 } }, { "id": "laislemke/LLaMA-2-vicuna-7b-slerp", "name": "LLaMA-2-vicuna-7b-slerp", "developer": "laislemke", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2932, "hfopenllm_v2/BBH": 0.2986, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3833, "hfopenllm_v2/MMLU-PRO": 0.1342 } }, { "id": "lalainy/ECE-PRYMMAL-0.5B-FT-V5-MUSR", "name": "ECE-PRYMMAL-0.5B-FT-V5-MUSR", "developer": "lalainy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2138, "hfopenllm_v2/BBH": 0.3269, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3262, "hfopenllm_v2/MMLU-PRO": 0.1533 } }, { "id": "lalainy/ECE-PRYMMAL-0.5B-SLERP-V4", "name": "ECE-PRYMMAL-0.5B-SLERP-V4", "developer": "lalainy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1564, "hfopenllm_v2/BBH": 0.2894, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3789, "hfopenllm_v2/MMLU-PRO": 0.1169 } }, { "id": "lalainy/ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1", "name": "ECE-PRYMMAL-YL-0.5B-SLERP-BIS-V1", "developer": "lalainy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1437, "hfopenllm_v2/BBH": 0.3032, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2349, "hfopenllm_v2/MUSR": 0.3646, "hfopenllm_v2/MMLU-PRO": 0.1121 } }, { "id": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V3", "name": "ECE-PRYMMAL-YL-1B-SLERP-V3", "developer": "lalainy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.325, "hfopenllm_v2/BBH": 0.4225, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4213, "hfopenllm_v2/MMLU-PRO": 0.2931 } }, { "id": "lalainy/ECE-PRYMMAL-YL-1B-SLERP-V4", "name": "ECE-PRYMMAL-YL-1B-SLERP-V4", "developer": "lalainy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3324, "hfopenllm_v2/BBH": 0.4171, "hfopenllm_v2/MATH Level 5": 0.1005, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4306, "hfopenllm_v2/MMLU-PRO": 0.2893 } }, { "id": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V1", "name": "ECE-PRYMMAL-YL-6B-SLERP-V1", "developer": "lalainy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3264, "hfopenllm_v2/BBH": 0.4629, "hfopenllm_v2/MATH Level 5": 0.1269, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4864, "hfopenllm_v2/MMLU-PRO": 0.3214 } }, { "id": "lalainy/ECE-PRYMMAL-YL-6B-SLERP-V2", "name": "ECE-PRYMMAL-YL-6B-SLERP-V2", "developer": "lalainy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3249, "hfopenllm_v2/BBH": 0.4629, "hfopenllm_v2/MATH Level 5": 0.1269, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4864, "hfopenllm_v2/MMLU-PRO": 0.3214 } }, { "id": "Lambent/qwen2.5-reinstruct-alternate-lumen-14B", "name": "qwen2.5-reinstruct-alternate-lumen-14B", "developer": "Lambent", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4794, "hfopenllm_v2/BBH": 0.6459, "hfopenllm_v2/MATH Level 5": 0.4622, "hfopenllm_v2/GPQA": 0.3767, "hfopenllm_v2/MUSR": 0.477, "hfopenllm_v2/MMLU-PRO": 0.5388 } }, { "id": "Langboat/Mengzi3-8B-Chat", "name": "Mengzi3-8B-Chat", "developer": "Langboat", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.514, "hfopenllm_v2/BBH": 0.4684, "hfopenllm_v2/MATH Level 5": 0.0906, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.4078, "hfopenllm_v2/MMLU-PRO": 0.3142 } }, { "id": "langgptai/Qwen-las-v0.1", "name": "Qwen-las-v0.1", "developer": "langgptai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3301, "hfopenllm_v2/BBH": 0.3893, "hfopenllm_v2/MATH Level 5": 0.037, "hfopenllm_v2/GPQA": 0.2466, "hfopenllm_v2/MUSR": 0.3701, "hfopenllm_v2/MMLU-PRO": 0.2325 } }, { "id": "langgptai/qwen1.5-7b-chat-sa-v0.1", "name": "qwen1.5-7b-chat-sa-v0.1", "developer": "langgptai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4268, "hfopenllm_v2/BBH": 0.4325, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.3551, "hfopenllm_v2/MMLU-PRO": 0.2993 } }, { "id": "lars1234/Mistral-Small-24B-Instruct-2501-writer", "name": "Mistral-Small-24B-Instruct-2501-writer", "developer": "lars1234", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6565, "hfopenllm_v2/BBH": 0.6733, "hfopenllm_v2/MATH Level 5": 0.3557, "hfopenllm_v2/GPQA": 0.3893, "hfopenllm_v2/MUSR": 0.4645, "hfopenllm_v2/MMLU-PRO": 0.5448 } }, { "id": "Lawnakk/BBA100", "name": "BBA100", "developer": "Lawnakk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2076, "hfopenllm_v2/BBH": 0.2826, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2441, "hfopenllm_v2/MUSR": 0.402, "hfopenllm_v2/MMLU-PRO": 0.1122 } }, { "id": "Lawnakk/BBALAW1", "name": "BBALAW1", "developer": "Lawnakk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1905, "hfopenllm_v2/BBH": 0.2872, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2433, "hfopenllm_v2/MUSR": 0.4153, "hfopenllm_v2/MMLU-PRO": 0.1121 } }, { "id": "Lawnakk/BBALAW1.0", "name": "BBALAW1.0", "developer": "Lawnakk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1351, "hfopenllm_v2/BBH": 0.2828, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3526, "hfopenllm_v2/MMLU-PRO": 0.1128 } }, { "id": "Lawnakk/BBALAW1.2", "name": "BBALAW1.2", "developer": "Lawnakk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1354, "hfopenllm_v2/BBH": 0.2811, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3579, "hfopenllm_v2/MMLU-PRO": 0.1105 } }, { "id": "Lawnakk/BBALAW1.3", "name": "BBALAW1.3", "developer": "Lawnakk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1354, "hfopenllm_v2/BBH": 0.2827, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3619, "hfopenllm_v2/MMLU-PRO": 0.1094 } }, { "id": "Lawnakk/BBALAW1.6", "name": "BBALAW1.6", "developer": "Lawnakk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5245, "hfopenllm_v2/BBH": 0.5554, "hfopenllm_v2/MATH Level 5": 0.3603, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4368, "hfopenllm_v2/MMLU-PRO": 0.4507 } }, { "id": "Lawnakk/BBALAW1.61", "name": "BBALAW1.61", "developer": "Lawnakk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5771, "hfopenllm_v2/BBH": 0.5549, "hfopenllm_v2/MATH Level 5": 0.3663, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4355, "hfopenllm_v2/MMLU-PRO": 0.4471 } }, { "id": "Lawnakk/BBALAW1.62", "name": "BBALAW1.62", "developer": "Lawnakk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5046, "hfopenllm_v2/BBH": 0.5581, "hfopenllm_v2/MATH Level 5": 0.2825, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4343, "hfopenllm_v2/MMLU-PRO": 0.4545 } }, { "id": "Lawnakk/BBALAW1.63", "name": "BBALAW1.63", "developer": "Lawnakk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4407, "hfopenllm_v2/BBH": 0.5541, "hfopenllm_v2/MATH Level 5": 0.3701, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4303, "hfopenllm_v2/MMLU-PRO": 0.4471 } }, { "id": "Lawnakk/BBALAW1.64", "name": "BBALAW1.64", "developer": "Lawnakk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1395, "hfopenllm_v2/BBH": 0.2779, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.3447, "hfopenllm_v2/MMLU-PRO": 0.1115 } }, { "id": "leafspark/Llama-3.1-8B-MultiReflection-Instruct", "name": "Llama-3.1-8B-MultiReflection-Instruct", "developer": "leafspark", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7125, "hfopenllm_v2/BBH": 0.5009, "hfopenllm_v2/MATH Level 5": 0.1707, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.3682, "hfopenllm_v2/MMLU-PRO": 0.3724 } }, { "id": "LEESM/llama-2-7b-hf-lora-oki100p", "name": "llama-2-7b-hf-lora-oki100p", "developer": "LEESM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2513, "hfopenllm_v2/BBH": 0.3492, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3687, "hfopenllm_v2/MMLU-PRO": 0.1856 } }, { "id": "LEESM/llama-2-7b-hf-lora-oki10p", "name": "llama-2-7b-hf-lora-oki10p", "developer": "LEESM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.227, "hfopenllm_v2/BBH": 0.3531, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3475, "hfopenllm_v2/MMLU-PRO": 0.1679 } }, { "id": "LEESM/llama-3-8b-bnb-4b-kowiki231101", "name": "llama-3-8b-bnb-4b-kowiki231101", "developer": "LEESM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1685, "hfopenllm_v2/BBH": 0.4131, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3551, "hfopenllm_v2/MMLU-PRO": 0.2425 } }, { "id": "LEESM/llama-3-Korean-Bllossom-8B-trexlab-oki10p", "name": "llama-3-Korean-Bllossom-8B-trexlab-oki10p", "developer": "LEESM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2137, "hfopenllm_v2/BBH": 0.4343, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3869, "hfopenllm_v2/MMLU-PRO": 0.3177 } }, { "id": "lemon07r/Gemma-2-Ataraxy-9B", "name": "Gemma-2-Ataraxy-9B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3009, "hfopenllm_v2/BBH": 0.5931, "hfopenllm_v2/MATH Level 5": 0.0853, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.4424, "hfopenllm_v2/MMLU-PRO": 0.4226 } }, { "id": "lemon07r/Gemma-2-Ataraxy-Advanced-9B", "name": "Gemma-2-Ataraxy-Advanced-9B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5516, "hfopenllm_v2/BBH": 0.5889, "hfopenllm_v2/MATH Level 5": 0.1979, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.3761, "hfopenllm_v2/MMLU-PRO": 0.4244 } }, { "id": "lemon07r/Gemma-2-Ataraxy-Remix-9B", "name": "Gemma-2-Ataraxy-Remix-9B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7083, "hfopenllm_v2/BBH": 0.5892, "hfopenllm_v2/MATH Level 5": 0.2017, "hfopenllm_v2/GPQA": 0.3389, "hfopenllm_v2/MUSR": 0.4372, "hfopenllm_v2/MMLU-PRO": 0.4239 } }, { "id": "lemon07r/Gemma-2-Ataraxy-v2-9B", "name": "Gemma-2-Ataraxy-v2-9B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2136, "hfopenllm_v2/BBH": 0.5766, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.3423, "hfopenllm_v2/MUSR": 0.3484, "hfopenllm_v2/MMLU-PRO": 0.4221 } }, { "id": "lemon07r/Gemma-2-Ataraxy-v2a-9B", "name": "Gemma-2-Ataraxy-v2a-9B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1595, "hfopenllm_v2/BBH": 0.5182, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.3398, "hfopenllm_v2/MUSR": 0.3165, "hfopenllm_v2/MMLU-PRO": 0.3515 } }, { "id": "lemon07r/Gemma-2-Ataraxy-v2f-9B", "name": "Gemma-2-Ataraxy-v2f-9B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3791, "hfopenllm_v2/BBH": 0.5193, "hfopenllm_v2/MATH Level 5": 0.1163, "hfopenllm_v2/GPQA": 0.3389, "hfopenllm_v2/MUSR": 0.3231, "hfopenllm_v2/MMLU-PRO": 0.3503 } }, { "id": "lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B", "name": "Gemma-2-Ataraxy-v3-Advanced-9B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6602, "hfopenllm_v2/BBH": 0.5935, "hfopenllm_v2/MATH Level 5": 0.1873, "hfopenllm_v2/GPQA": 0.3364, "hfopenllm_v2/MUSR": 0.445, "hfopenllm_v2/MMLU-PRO": 0.4196 } }, { "id": "lemon07r/Gemma-2-Ataraxy-v3b-9B", "name": "Gemma-2-Ataraxy-v3b-9B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6809, "hfopenllm_v2/BBH": 0.5908, "hfopenllm_v2/MATH Level 5": 0.2153, "hfopenllm_v2/GPQA": 0.3331, "hfopenllm_v2/MUSR": 0.4489, "hfopenllm_v2/MMLU-PRO": 0.4205 } }, { "id": "lemon07r/Gemma-2-Ataraxy-v3i-9B", "name": "Gemma-2-Ataraxy-v3i-9B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4203, "hfopenllm_v2/BBH": 0.5626, "hfopenllm_v2/MATH Level 5": 0.1533, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.3181, "hfopenllm_v2/MMLU-PRO": 0.4166 } }, { "id": "lemon07r/Gemma-2-Ataraxy-v3j-9B", "name": "Gemma-2-Ataraxy-v3j-9B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4169, "hfopenllm_v2/BBH": 0.5632, "hfopenllm_v2/MATH Level 5": 0.1692, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.318, "hfopenllm_v2/MMLU-PRO": 0.4134 } }, { "id": "lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B", "name": "Gemma-2-Ataraxy-v4-Advanced-9B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7015, "hfopenllm_v2/BBH": 0.6024, "hfopenllm_v2/MATH Level 5": 0.2153, "hfopenllm_v2/GPQA": 0.3389, "hfopenllm_v2/MUSR": 0.4581, "hfopenllm_v2/MMLU-PRO": 0.4367 } }, { "id": "lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B", "name": "Gemma-2-Ataraxy-v4a-Advanced-9B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7135, "hfopenllm_v2/BBH": 0.5988, "hfopenllm_v2/MATH Level 5": 0.2115, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.4489, "hfopenllm_v2/MMLU-PRO": 0.4309 } }, { "id": "lemon07r/Gemma-2-Ataraxy-v4b-9B", "name": "Gemma-2-Ataraxy-v4b-9B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6878, "hfopenllm_v2/BBH": 0.6039, "hfopenllm_v2/MATH Level 5": 0.2334, "hfopenllm_v2/GPQA": 0.3406, "hfopenllm_v2/MUSR": 0.4555, "hfopenllm_v2/MMLU-PRO": 0.4357 } }, { "id": "lemon07r/Gemma-2-Ataraxy-v4c-9B", "name": "Gemma-2-Ataraxy-v4c-9B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6945, "hfopenllm_v2/BBH": 0.6084, "hfopenllm_v2/MATH Level 5": 0.2266, "hfopenllm_v2/GPQA": 0.3339, "hfopenllm_v2/MUSR": 0.4528, "hfopenllm_v2/MMLU-PRO": 0.4395 } }, { "id": "lemon07r/Gemma-2-Ataraxy-v4d-9B", "name": "Gemma-2-Ataraxy-v4d-9B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.725, "hfopenllm_v2/BBH": 0.6054, "hfopenllm_v2/MATH Level 5": 0.2334, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4541, "hfopenllm_v2/MMLU-PRO": 0.4346 } }, { "id": "lemon07r/llama-3-NeuralMahou-8b", "name": "llama-3-NeuralMahou-8b", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4901, "hfopenllm_v2/BBH": 0.4184, "hfopenllm_v2/MATH Level 5": 0.102, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.3873, "hfopenllm_v2/MMLU-PRO": 0.369 } }, { "id": "lemon07r/Llama-3-RedMagic4-8B", "name": "Llama-3-RedMagic4-8B", "developer": "lemon07r", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4864, "hfopenllm_v2/BBH": 0.4256, "hfopenllm_v2/MATH Level 5": 0.0899, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3766, "hfopenllm_v2/MMLU-PRO": 0.3676 } }, { "id": "LenguajeNaturalAI/leniachat-gemma-2b-v0", "name": "leniachat-gemma-2b-v0", "developer": "LenguajeNaturalAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.215, "hfopenllm_v2/BBH": 0.3074, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3659, "hfopenllm_v2/MMLU-PRO": 0.117 } }, { "id": "LenguajeNaturalAI/leniachat-qwen2-1.5B-v0", "name": "leniachat-qwen2-1.5B-v0", "developer": "LenguajeNaturalAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2221, "hfopenllm_v2/BBH": 0.3684, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.375, "hfopenllm_v2/MMLU-PRO": 0.188 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_12", "name": "_Spydaz_Web_AI_12", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2765, "hfopenllm_v2/BBH": 0.3163, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3582, "hfopenllm_v2/MMLU-PRO": 0.1137 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_14", "name": "_Spydaz_Web_AI_14", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1812, "hfopenllm_v2/BBH": 0.2989, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3395, "hfopenllm_v2/MMLU-PRO": 0.1139 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_001", "name": "_Spydaz_Web_AI_AGI_R1_001", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4505, "hfopenllm_v2/BBH": 0.4609, "hfopenllm_v2/MATH Level 5": 0.0634, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.4256, "hfopenllm_v2/MMLU-PRO": 0.2734 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_002", "name": "_Spydaz_Web_AI_AGI_R1_002", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5307, "hfopenllm_v2/BBH": 0.4683, "hfopenllm_v2/MATH Level 5": 0.0582, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.4255, "hfopenllm_v2/MMLU-PRO": 0.2894 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder", "name": "_Spydaz_Web_AI_AGI_R1_MasterCoder", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4143, "hfopenllm_v2/BBH": 0.4689, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.472, "hfopenllm_v2/MMLU-PRO": 0.2719 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001", "name": "_Spydaz_Web_AI_AGI_R1_Math_001", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4571, "hfopenllm_v2/BBH": 0.4818, "hfopenllm_v2/MATH Level 5": 0.0695, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.4778, "hfopenllm_v2/MMLU-PRO": 0.2681 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003", "name": "_Spydaz_Web_AI_AGI_R1_Math_003", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.62, "hfopenllm_v2/BBH": 0.4756, "hfopenllm_v2/MATH Level 5": 0.0695, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4202, "hfopenllm_v2/MMLU-PRO": 0.2999 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent", "name": "_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5951, "hfopenllm_v2/BBH": 0.4927, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.5198, "hfopenllm_v2/MMLU-PRO": 0.3 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student", "name": "_Spydaz_Web_AI_AGI_R1_Math_Student", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5736, "hfopenllm_v2/BBH": 0.4881, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.5098, "hfopenllm_v2/MMLU-PRO": 0.2927 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher", "name": "_Spydaz_Web_AI_AGI_R1_Math_Teacher", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5772, "hfopenllm_v2/BBH": 0.4805, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.5222, "hfopenllm_v2/MMLU-PRO": 0.2956 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR", "name": "_Spydaz_Web_AI_AGI_R1_MUSR", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4786, "hfopenllm_v2/BBH": 0.4672, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4869, "hfopenllm_v2/MMLU-PRO": 0.2828 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001", "name": "_Spydaz_Web_AI_AGI_R1_OmG_001", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5818, "hfopenllm_v2/BBH": 0.4908, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4486, "hfopenllm_v2/MMLU-PRO": 0.2906 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002", "name": "_Spydaz_Web_AI_AGI_R1_OmG_002", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5462, "hfopenllm_v2/BBH": 0.4655, "hfopenllm_v2/MATH Level 5": 0.0498, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.4511, "hfopenllm_v2/MMLU-PRO": 0.2867 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder", "name": "_Spydaz_Web_AI_AGI_R1_OmG_Coder", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4924, "hfopenllm_v2/BBH": 0.4638, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.5625, "hfopenllm_v2/MMLU-PRO": 0.289 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math", "name": "_Spydaz_Web_AI_AGI_R1_OmG_Math", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5033, "hfopenllm_v2/BBH": 0.4677, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4326, "hfopenllm_v2/MMLU-PRO": 0.2913 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster", "name": "_Spydaz_Web_AI_AGI_R1_OmG_MathMaster", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5558, "hfopenllm_v2/BBH": 0.4742, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.451, "hfopenllm_v2/MMLU-PRO": 0.2672 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder", "name": "_Spydaz_Web_AI_AGI_R1_Student_Coder", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.545, "hfopenllm_v2/BBH": 0.4651, "hfopenllm_v2/MATH Level 5": 0.0657, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4388, "hfopenllm_v2/MMLU-PRO": 0.2768 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder", "name": "_Spydaz_Web_AI_AGI_R1_Teacher_Coder", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5082, "hfopenllm_v2/BBH": 0.4797, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4338, "hfopenllm_v2/MMLU-PRO": 0.2845 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student", "name": "_Spydaz_Web_AI_AGI_R1_Top_Student", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.604, "hfopenllm_v2/BBH": 0.4988, "hfopenllm_v2/MATH Level 5": 0.0725, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.5398, "hfopenllm_v2/MMLU-PRO": 0.3024 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1", "name": "_Spydaz_Web_AI_AGI_R1_X1", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4273, "hfopenllm_v2/BBH": 0.4759, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.4232, "hfopenllm_v2/MMLU-PRO": 0.2891 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2", "name": "_Spydaz_Web_AI_AGI_R1_X2", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5434, "hfopenllm_v2/BBH": 0.4786, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4695, "hfopenllm_v2/MMLU-PRO": 0.2921 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1", "name": "_Spydaz_Web_AI_AGI_RP_R1", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5426, "hfopenllm_v2/BBH": 0.4701, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.4201, "hfopenllm_v2/MMLU-PRO": 0.2894 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_BIBLE_002", "name": "_Spydaz_Web_AI_BIBLE_002", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2195, "hfopenllm_v2/BBH": 0.3289, "hfopenllm_v2/MATH Level 5": 0.0174, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.3407, "hfopenllm_v2/MMLU-PRO": 0.1368 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_ChatML_002", "name": "_Spydaz_Web_AI_ChatML_002", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2412, "hfopenllm_v2/BBH": 0.3106, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3623, "hfopenllm_v2/MMLU-PRO": 0.1095 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_ChatQA", "name": "_Spydaz_Web_AI_ChatQA", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1415, "hfopenllm_v2/BBH": 0.3236, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3447, "hfopenllm_v2/MMLU-PRO": 0.1475 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_ChatQA_003", "name": "_Spydaz_Web_AI_ChatQA_003", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2209, "hfopenllm_v2/BBH": 0.3172, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3818, "hfopenllm_v2/MMLU-PRO": 0.1133 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_TEMP_", "name": "_Spydaz_Web_AI_TEMP_", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4795, "hfopenllm_v2/BBH": 0.4957, "hfopenllm_v2/MATH Level 5": 0.1239, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.4218, "hfopenllm_v2/MMLU-PRO": 0.3121 } }, { "id": "LeroyDyer/_Spydaz_Web_AI_Top_Teacher_", "name": "_Spydaz_Web_AI_Top_Teacher_", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4404, "hfopenllm_v2/BBH": 0.4891, "hfopenllm_v2/MATH Level 5": 0.1156, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4366, "hfopenllm_v2/MMLU-PRO": 0.315 } }, { "id": "LeroyDyer/CheckPoint_A", "name": "CheckPoint_A", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4513, "hfopenllm_v2/BBH": 0.4748, "hfopenllm_v2/MATH Level 5": 0.0589, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4231, "hfopenllm_v2/MMLU-PRO": 0.288 } }, { "id": "LeroyDyer/CheckPoint_B", "name": "CheckPoint_B", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.444, "hfopenllm_v2/BBH": 0.478, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3898, "hfopenllm_v2/MMLU-PRO": 0.2907 } }, { "id": "LeroyDyer/CheckPoint_C", "name": "CheckPoint_C", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3477, "hfopenllm_v2/BBH": 0.4586, "hfopenllm_v2/MATH Level 5": 0.0551, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.4346, "hfopenllm_v2/MMLU-PRO": 0.3021 } }, { "id": "LeroyDyer/CheckPoint_R1", "name": "CheckPoint_R1", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1728, "hfopenllm_v2/BBH": 0.4225, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.4031, "hfopenllm_v2/MMLU-PRO": 0.2205 } }, { "id": "LeroyDyer/LCARS_AI_001", "name": "LCARS_AI_001", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3109, "hfopenllm_v2/BBH": 0.4258, "hfopenllm_v2/MATH Level 5": 0.0234, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.4384, "hfopenllm_v2/MMLU-PRO": 0.267 } }, { "id": "LeroyDyer/LCARS_AI_1x4_003_SuperAI", "name": "LCARS_AI_1x4_003_SuperAI", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4111, "hfopenllm_v2/BBH": 0.492, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4506, "hfopenllm_v2/MMLU-PRO": 0.2972 } }, { "id": "LeroyDyer/LCARS_AI_StarTrek_Computer", "name": "LCARS_AI_StarTrek_Computer", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3583, "hfopenllm_v2/BBH": 0.4446, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.395, "hfopenllm_v2/MMLU-PRO": 0.2458 } }, { "id": "LeroyDyer/LCARS_TOP_SCORE", "name": "LCARS_TOP_SCORE", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4371, "hfopenllm_v2/BBH": 0.5127, "hfopenllm_v2/MATH Level 5": 0.0672, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4293, "hfopenllm_v2/MMLU-PRO": 0.3031 } }, { "id": "LeroyDyer/Mixtral_AI_SwahiliTron_7b", "name": "Mixtral_AI_SwahiliTron_7b", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1534, "hfopenllm_v2/BBH": 0.3055, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.342, "hfopenllm_v2/MMLU-PRO": 0.1208 } }, { "id": "LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b", "name": "SpydazWeb_AI_CyberTron_Ultra_7b", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1556, "hfopenllm_v2/BBH": 0.4811, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4136, "hfopenllm_v2/MMLU-PRO": 0.2866 } }, { "id": "LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2", "name": "SpydazWeb_AI_HumanAGI_001_M2", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.394, "hfopenllm_v2/BBH": 0.4888, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4503, "hfopenllm_v2/MMLU-PRO": 0.3005 } }, { "id": "LeroyDyer/SpydazWeb_AI_HumanAGI_002", "name": "SpydazWeb_AI_HumanAGI_002", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4088, "hfopenllm_v2/BBH": 0.5044, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4865, "hfopenllm_v2/MMLU-PRO": 0.3059 } }, { "id": "LeroyDyer/SpydazWeb_AI_HumanAI_001", "name": "SpydazWeb_AI_HumanAI_001", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2252, "hfopenllm_v2/BBH": 0.3344, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.386, "hfopenllm_v2/MMLU-PRO": 0.1271 } }, { "id": "LeroyDyer/SpydazWeb_AI_HumanAI_006", "name": "SpydazWeb_AI_HumanAI_006", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.143, "hfopenllm_v2/BBH": 0.3302, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3568, "hfopenllm_v2/MMLU-PRO": 0.1135 } }, { "id": "LeroyDyer/SpydazWeb_AI_HumanAI_007", "name": "SpydazWeb_AI_HumanAI_007", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3352, "hfopenllm_v2/BBH": 0.3416, "hfopenllm_v2/MATH Level 5": 0.0227, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4096, "hfopenllm_v2/MMLU-PRO": 0.1352 } }, { "id": "LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT", "name": "SpydazWeb_AI_HumanAI_009_CHAT", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2973, "hfopenllm_v2/BBH": 0.3307, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4138, "hfopenllm_v2/MMLU-PRO": 0.1433 } }, { "id": "LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT", "name": "SpydazWeb_AI_HumanAI_010_CHAT", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2507, "hfopenllm_v2/BBH": 0.3336, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.4137, "hfopenllm_v2/MMLU-PRO": 0.143 } }, { "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT", "name": "SpydazWeb_AI_HumanAI_011_INSTRUCT", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3149, "hfopenllm_v2/BBH": 0.3523, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3831, "hfopenllm_v2/MMLU-PRO": 0.1595 } }, { "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML", "name": "SpydazWeb_AI_HumanAI_011_INSTRUCT_ML", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3752, "hfopenllm_v2/BBH": 0.3984, "hfopenllm_v2/MATH Level 5": 0.0257, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4239, "hfopenllm_v2/MMLU-PRO": 0.2019 } }, { "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1", "name": "SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.405, "hfopenllm_v2/BBH": 0.4858, "hfopenllm_v2/MATH Level 5": 0.0551, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.3921, "hfopenllm_v2/MMLU-PRO": 0.2956 } }, { "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", "name": "SpydazWeb_AI_HumanAI_012_INSTRUCT_IA", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3036, "hfopenllm_v2/BBH": 0.4575, "hfopenllm_v2/MATH Level 5": 0.0446, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4253, "hfopenllm_v2/MMLU-PRO": 0.2329 } }, { "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX", "name": "SpydazWeb_AI_HumanAI_012_INSTRUCT_MX", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3066, "hfopenllm_v2/BBH": 0.3158, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.3444, "hfopenllm_v2/MMLU-PRO": 0.1107 } }, { "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", "name": "SpydazWeb_AI_HumanAI_012_INSTRUCT_XA", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3579, "hfopenllm_v2/BBH": 0.4477, "hfopenllm_v2/MATH Level 5": 0.0423, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4134, "hfopenllm_v2/MMLU-PRO": 0.2376 } }, { "id": "LeroyDyer/SpydazWeb_AI_HumanAI_RP", "name": "SpydazWeb_AI_HumanAI_RP", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2541, "hfopenllm_v2/BBH": 0.3323, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3883, "hfopenllm_v2/MMLU-PRO": 0.1324 } }, { "id": "LeroyDyer/SpydazWeb_AI_HumanAI_TextVision", "name": "SpydazWeb_AI_HumanAI_TextVision", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3063, "hfopenllm_v2/BBH": 0.3354, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.3938, "hfopenllm_v2/MMLU-PRO": 0.1387 } }, { "id": "LeroyDyer/SpydazWeb_HumanAI_M1", "name": "SpydazWeb_HumanAI_M1", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3582, "hfopenllm_v2/BBH": 0.3563, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3671, "hfopenllm_v2/MMLU-PRO": 0.1663 } }, { "id": "LeroyDyer/SpydazWeb_HumanAI_M2", "name": "SpydazWeb_HumanAI_M2", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.375, "hfopenllm_v2/BBH": 0.3931, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3751, "hfopenllm_v2/MMLU-PRO": 0.201 } }, { "id": "LeroyDyer/SpydazWeb_HumanAI_M3", "name": "SpydazWeb_HumanAI_M3", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1579, "hfopenllm_v2/BBH": 0.3127, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3914, "hfopenllm_v2/MMLU-PRO": 0.1149 } }, { "id": "LeroyDyer/SpydazWebAI_Human_AGI", "name": "SpydazWebAI_Human_AGI", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3388, "hfopenllm_v2/BBH": 0.3375, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.3966, "hfopenllm_v2/MMLU-PRO": 0.1479 } }, { "id": "LeroyDyer/SpydazWebAI_Human_AGI_001", "name": "SpydazWebAI_Human_AGI_001", "developer": "LeroyDyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3118, "hfopenllm_v2/BBH": 0.3433, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.3994, "hfopenllm_v2/MMLU-PRO": 0.1426 } }, { "id": "lesubra/ECE-EIFFEL-3B", "name": "ECE-EIFFEL-3B", "developer": "lesubra", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3469, "hfopenllm_v2/BBH": 0.5102, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.3314, "hfopenllm_v2/MUSR": 0.4362, "hfopenllm_v2/MMLU-PRO": 0.3821 } }, { "id": "lesubra/ECE-EIFFEL-3Bv2", "name": "ECE-EIFFEL-3Bv2", "developer": "lesubra", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3013, "hfopenllm_v2/BBH": 0.5424, "hfopenllm_v2/MATH Level 5": 0.1186, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.4443, "hfopenllm_v2/MMLU-PRO": 0.3999 } }, { "id": "lesubra/ECE-EIFFEL-3Bv3", "name": "ECE-EIFFEL-3Bv3", "developer": "lesubra", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3786, "hfopenllm_v2/BBH": 0.5469, "hfopenllm_v2/MATH Level 5": 0.1669, "hfopenllm_v2/GPQA": 0.3297, "hfopenllm_v2/MUSR": 0.4675, "hfopenllm_v2/MMLU-PRO": 0.3975 } }, { "id": "lesubra/ECE-PRYMMAL-3B-SLERP-V1", "name": "ECE-PRYMMAL-3B-SLERP-V1", "developer": "lesubra", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2933, "hfopenllm_v2/BBH": 0.5341, "hfopenllm_v2/MATH Level 5": 0.1662, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4595, "hfopenllm_v2/MMLU-PRO": 0.39 } }, { "id": "lesubra/ECE-PRYMMAL-3B-SLERP-V2", "name": "ECE-PRYMMAL-3B-SLERP-V2", "developer": "lesubra", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2933, "hfopenllm_v2/BBH": 0.5341, "hfopenllm_v2/MATH Level 5": 0.1662, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4595, "hfopenllm_v2/MMLU-PRO": 0.39 } }, { "id": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V1", "name": "ECE-PRYMMAL-3B-SLERP_2-V1", "developer": "lesubra", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3649, "hfopenllm_v2/BBH": 0.5411, "hfopenllm_v2/MATH Level 5": 0.1677, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4661, "hfopenllm_v2/MMLU-PRO": 0.399 } }, { "id": "lesubra/ECE-PRYMMAL-3B-SLERP_2-V2", "name": "ECE-PRYMMAL-3B-SLERP_2-V2", "developer": "lesubra", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3664, "hfopenllm_v2/BBH": 0.5411, "hfopenllm_v2/MATH Level 5": 0.1677, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4661, "hfopenllm_v2/MMLU-PRO": 0.399 } }, { "id": "lesubra/merge-test", "name": "merge-test", "developer": "lesubra", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5383, "hfopenllm_v2/BBH": 0.524, "hfopenllm_v2/MATH Level 5": 0.1208, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.4419, "hfopenllm_v2/MMLU-PRO": 0.3874 } }, { "id": "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", "name": "EXAONE-3.0-7.8B-Instruct", "developer": "LGAI-EXAONE", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7193, "hfopenllm_v2/BBH": 0.4174, "hfopenllm_v2/MATH Level 5": 0.3044, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3661, "hfopenllm_v2/MMLU-PRO": 0.3577 } }, { "id": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct", "name": "EXAONE-3.5-2.4B-Instruct", "developer": "LGAI-EXAONE", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.795, "hfopenllm_v2/BBH": 0.4092, "hfopenllm_v2/MATH Level 5": 0.3678, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3661, "hfopenllm_v2/MMLU-PRO": 0.328 } }, { "id": "LGAI-EXAONE/EXAONE-3.5-32B-Instruct", "name": "EXAONE-3.5-32B-Instruct", "developer": "LGAI-EXAONE", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8392, "hfopenllm_v2/BBH": 0.5761, "hfopenllm_v2/MATH Level 5": 0.5128, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3807, "hfopenllm_v2/MMLU-PRO": 0.4637 } }, { "id": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct", "name": "EXAONE-3.5-7.8B-Instruct", "developer": "LGAI-EXAONE", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8136, "hfopenllm_v2/BBH": 0.4728, "hfopenllm_v2/MATH Level 5": 0.4751, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3779, "hfopenllm_v2/MMLU-PRO": 0.4133 } }, { "id": "lightblue/suzume-llama-3-8B-multilingual", "name": "suzume-llama-3-8B-multilingual", "developer": "lightblue", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6678, "hfopenllm_v2/BBH": 0.495, "hfopenllm_v2/MATH Level 5": 0.0944, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3977, "hfopenllm_v2/MMLU-PRO": 0.3383 } }, { "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-full", "name": "suzume-llama-3-8B-multilingual-orpo-borda-full", "developer": "lightblue", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5817, "hfopenllm_v2/BBH": 0.4714, "hfopenllm_v2/MATH Level 5": 0.0763, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3222, "hfopenllm_v2/MMLU-PRO": 0.331 } }, { "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half", "name": "suzume-llama-3-8B-multilingual-orpo-borda-half", "developer": "lightblue", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6249, "hfopenllm_v2/BBH": 0.4707, "hfopenllm_v2/MATH Level 5": 0.0906, "hfopenllm_v2/GPQA": 0.245, "hfopenllm_v2/MUSR": 0.3516, "hfopenllm_v2/MMLU-PRO": 0.3614 } }, { "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25", "name": "suzume-llama-3-8B-multilingual-orpo-borda-top25", "developer": "lightblue", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6637, "hfopenllm_v2/BBH": 0.4865, "hfopenllm_v2/MATH Level 5": 0.1042, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3566, "hfopenllm_v2/MMLU-PRO": 0.3684 } }, { "id": "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top75", "name": "suzume-llama-3-8B-multilingual-orpo-borda-top75", "developer": "lightblue", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6687, "hfopenllm_v2/BBH": 0.4833, "hfopenllm_v2/MATH Level 5": 0.0785, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3817, "hfopenllm_v2/MMLU-PRO": 0.3769 } }, { "id": "LightningRodLabs/Flashlight-v1.0", "name": "Flashlight-v1.0", "developer": "LightningRodLabs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6745, "hfopenllm_v2/BBH": 0.6877, "hfopenllm_v2/MATH Level 5": 0.497, "hfopenllm_v2/GPQA": 0.3423, "hfopenllm_v2/MUSR": 0.4101, "hfopenllm_v2/MMLU-PRO": 0.5402 } }, { "id": "LightningRodLabs/Flashlight-v1.1", "name": "Flashlight-v1.1", "developer": "LightningRodLabs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6721, "hfopenllm_v2/BBH": 0.6901, "hfopenllm_v2/MATH Level 5": 0.5325, "hfopenllm_v2/GPQA": 0.3398, "hfopenllm_v2/MUSR": 0.4048, "hfopenllm_v2/MMLU-PRO": 0.5416 } }, { "id": "LightningRodLabs/Flashlight-v1.2", "name": "Flashlight-v1.2", "developer": "LightningRodLabs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.436, "hfopenllm_v2/BBH": 0.3265, "hfopenllm_v2/MATH Level 5": 0.1556, "hfopenllm_v2/GPQA": 0.2357, "hfopenllm_v2/MUSR": 0.4554, "hfopenllm_v2/MMLU-PRO": 0.2485 } }, { "id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1", "name": "2_PRYMMAL-ECE-2B-SLERP-V1", "developer": "Lil-R", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5823, "hfopenllm_v2/BBH": 0.4287, "hfopenllm_v2/MATH Level 5": 0.0914, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4375, "hfopenllm_v2/MMLU-PRO": 0.2678 } }, { "id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2", "name": "2_PRYMMAL-ECE-2B-SLERP-V2", "developer": "Lil-R", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5543, "hfopenllm_v2/BBH": 0.4376, "hfopenllm_v2/MATH Level 5": 0.0944, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4482, "hfopenllm_v2/MMLU-PRO": 0.2744 } }, { "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP", "name": "2_PRYMMAL-ECE-7B-SLERP", "developer": "Lil-R", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5577, "hfopenllm_v2/BBH": 0.5557, "hfopenllm_v2/MATH Level 5": 0.3633, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4396, "hfopenllm_v2/MMLU-PRO": 0.4507 } }, { "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1", "name": "2_PRYMMAL-ECE-7B-SLERP-V1", "developer": "Lil-R", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1073, "hfopenllm_v2/BBH": 0.3053, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.3911, "hfopenllm_v2/MMLU-PRO": 0.1124 } }, { "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2", "name": "2_PRYMMAL-ECE-7B-SLERP-V2", "developer": "Lil-R", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1073, "hfopenllm_v2/BBH": 0.3053, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.3911, "hfopenllm_v2/MMLU-PRO": 0.1124 } }, { "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3", "name": "2_PRYMMAL-ECE-7B-SLERP-V3", "developer": "Lil-R", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2235, "hfopenllm_v2/BBH": 0.3578, "hfopenllm_v2/MATH Level 5": 0.006, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.4107, "hfopenllm_v2/MMLU-PRO": 0.1817 } }, { "id": "Lil-R/PRYMMAL-ECE-1B-SLERP-V1", "name": "PRYMMAL-ECE-1B-SLERP-V1", "developer": "Lil-R", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2874, "hfopenllm_v2/BBH": 0.419, "hfopenllm_v2/MATH Level 5": 0.1035, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3974, "hfopenllm_v2/MMLU-PRO": 0.2926 } }, { "id": "Lil-R/PRYMMAL-ECE-7B-SLERP-V8", "name": "PRYMMAL-ECE-7B-SLERP-V8", "developer": "Lil-R", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1258, "hfopenllm_v2/BBH": 0.2955, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3631, "hfopenllm_v2/MMLU-PRO": 0.1128 } }, { "id": "LilRg/10PRYMMAL-3B-slerp", "name": "10PRYMMAL-3B-slerp", "developer": "LilRg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1946, "hfopenllm_v2/BBH": 0.532, "hfopenllm_v2/MATH Level 5": 0.1495, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4529, "hfopenllm_v2/MMLU-PRO": 0.3881 } }, { "id": "LilRg/ECE-1B-merge-PRYMMAL", "name": "ECE-1B-merge-PRYMMAL", "developer": "LilRg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2712, "hfopenllm_v2/BBH": 0.4235, "hfopenllm_v2/MATH Level 5": 0.1012, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3801, "hfopenllm_v2/MMLU-PRO": 0.2906 } }, { "id": "LilRg/ECE_Finetunning", "name": "ECE_Finetunning", "developer": "LilRg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0445, "hfopenllm_v2/BBH": 0.4732, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.3839, "hfopenllm_v2/MMLU-PRO": 0.3191 } }, { "id": "LilRg/PRYMMAL-6B-slerp", "name": "PRYMMAL-6B-slerp", "developer": "LilRg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1153, "hfopenllm_v2/BBH": 0.2868, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.3698, "hfopenllm_v2/MMLU-PRO": 0.1108 } }, { "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V3", "name": "PRYMMAL-ECE-7B-SLERP-V3", "developer": "LilRg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1243, "hfopenllm_v2/BBH": 0.2957, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3671, "hfopenllm_v2/MMLU-PRO": 0.1127 } }, { "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V4", "name": "PRYMMAL-ECE-7B-SLERP-V4", "developer": "LilRg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1249, "hfopenllm_v2/BBH": 0.2957, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3671, "hfopenllm_v2/MMLU-PRO": 0.1127 } }, { "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V5", "name": "PRYMMAL-ECE-7B-SLERP-V5", "developer": "LilRg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1249, "hfopenllm_v2/BBH": 0.2957, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3671, "hfopenllm_v2/MMLU-PRO": 0.1127 } }, { "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V6", "name": "PRYMMAL-ECE-7B-SLERP-V6", "developer": "LilRg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1243, "hfopenllm_v2/BBH": 0.2957, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3671, "hfopenllm_v2/MMLU-PRO": 0.1127 } }, { "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V7", "name": "PRYMMAL-ECE-7B-SLERP-V7", "developer": "LilRg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1249, "hfopenllm_v2/BBH": 0.2957, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3671, "hfopenllm_v2/MMLU-PRO": 0.1127 } }, { "id": "LilRg/PRYMMAL-slerp-Merge", "name": "PRYMMAL-slerp-Merge", "developer": "LilRg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3044, "hfopenllm_v2/BBH": 0.5364, "hfopenllm_v2/MATH Level 5": 0.1616, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4635, "hfopenllm_v2/MMLU-PRO": 0.3863 } }, { "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v2-merged", "name": "CodeMind-Llama3-8B-unsloth_v2-merged", "developer": "LimYeri", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6946, "hfopenllm_v2/BBH": 0.486, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3316, "hfopenllm_v2/MMLU-PRO": 0.3506 } }, { "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v3-merged", "name": "CodeMind-Llama3-8B-unsloth_v3-merged", "developer": "LimYeri", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6763, "hfopenllm_v2/BBH": 0.4908, "hfopenllm_v2/MATH Level 5": 0.068, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3356, "hfopenllm_v2/MMLU-PRO": 0.3496 } }, { "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged", "name": "CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged", "developer": "LimYeri", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6492, "hfopenllm_v2/BBH": 0.4853, "hfopenllm_v2/MATH Level 5": 0.068, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3608, "hfopenllm_v2/MMLU-PRO": 0.3354 } }, { "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-merged", "name": "CodeMind-Llama3-8B-unsloth_v4-one-merged", "developer": "LimYeri", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3211, "hfopenllm_v2/BBH": 0.4739, "hfopenllm_v2/MATH Level 5": 0.0551, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4069, "hfopenllm_v2/MMLU-PRO": 0.3353 } }, { "id": "LimYeri/CodeMind-Llama3.1-8B-unsloth-merged", "name": "CodeMind-Llama3.1-8B-unsloth-merged", "developer": "LimYeri", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.649, "hfopenllm_v2/BBH": 0.4695, "hfopenllm_v2/MATH Level 5": 0.1088, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3752, "hfopenllm_v2/MMLU-PRO": 0.334 } }, { "id": "lkoenig/BBAI_145_", "name": "BBAI_145_", "developer": "lkoenig", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.445, "hfopenllm_v2/BBH": 0.5567, "hfopenllm_v2/MATH Level 5": 0.361, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4382, "hfopenllm_v2/MMLU-PRO": 0.449 } }, { "id": "lkoenig/BBAI_200_Gemma", "name": "BBAI_200_Gemma", "developer": "lkoenig", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0705, "hfopenllm_v2/BBH": 0.3449, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3631, "hfopenllm_v2/MMLU-PRO": 0.1679 } }, { "id": "lkoenig/BBAI_212_Qwencore", "name": "BBAI_212_Qwencore", "developer": "lkoenig", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4384, "hfopenllm_v2/BBH": 0.5569, "hfopenllm_v2/MATH Level 5": 0.3489, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4343, "hfopenllm_v2/MMLU-PRO": 0.449 } }, { "id": "lkoenig/BBAI_212_QwenLawLo", "name": "BBAI_212_QwenLawLo", "developer": "lkoenig", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4566, "hfopenllm_v2/BBH": 0.5574, "hfopenllm_v2/MATH Level 5": 0.3603, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.437, "hfopenllm_v2/MMLU-PRO": 0.4489 } }, { "id": "lkoenig/BBAI_230_Xiaqwen", "name": "BBAI_230_Xiaqwen", "developer": "lkoenig", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4649, "hfopenllm_v2/BBH": 0.5578, "hfopenllm_v2/MATH Level 5": 0.3663, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4422, "hfopenllm_v2/MMLU-PRO": 0.4481 } }, { "id": "lkoenig/BBAI_375_QwenDyancabs", "name": "BBAI_375_QwenDyancabs", "developer": "lkoenig", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4566, "hfopenllm_v2/BBH": 0.5571, "hfopenllm_v2/MATH Level 5": 0.3776, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4462, "hfopenllm_v2/MMLU-PRO": 0.4476 } }, { "id": "lkoenig/BBAI_456_QwenKoen", "name": "BBAI_456_QwenKoen", "developer": "lkoenig", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4529, "hfopenllm_v2/BBH": 0.5553, "hfopenllm_v2/MATH Level 5": 0.3686, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4395, "hfopenllm_v2/MMLU-PRO": 0.4469 } }, { "id": "lkoenig/BBAI_7B_KoenQwenDyan", "name": "BBAI_7B_KoenQwenDyan", "developer": "lkoenig", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5807, "hfopenllm_v2/BBH": 0.5537, "hfopenllm_v2/MATH Level 5": 0.3739, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.4369, "hfopenllm_v2/MMLU-PRO": 0.446 } }, { "id": "lkoenig/BBAI_7B_Qwen2.5koen", "name": "BBAI_7B_Qwen2.5koen", "developer": "lkoenig", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.46, "hfopenllm_v2/BBH": 0.5544, "hfopenllm_v2/MATH Level 5": 0.3656, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4369, "hfopenllm_v2/MMLU-PRO": 0.4485 } }, { "id": "lkoenig/BBAI_7B_QwenDyancabsLAW", "name": "BBAI_7B_QwenDyancabsLAW", "developer": "lkoenig", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.555, "hfopenllm_v2/BBH": 0.5579, "hfopenllm_v2/MATH Level 5": 0.3678, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4461, "hfopenllm_v2/MMLU-PRO": 0.4471 } }, { "id": "lkoenig/BBAI_7B_QwenDyanKoenLo", "name": "BBAI_7B_QwenDyanKoenLo", "developer": "lkoenig", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4663, "hfopenllm_v2/BBH": 0.5562, "hfopenllm_v2/MATH Level 5": 0.364, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4343, "hfopenllm_v2/MMLU-PRO": 0.4465 } }, { "id": "llm-blender/PairRM-hf", "name": "llm-blender/PairRM-hf", "developer": "llm-blender", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6087, "reward-bench/Chat": 0.9022, "reward-bench/Chat Hard": 0.5219, "reward-bench/Safety": 0.477, "reward-bench/Reasoning": 0.4898, "reward-bench/Prior Sets (0.5 weight)": 0.6961 } }, { "id": "LLM360/K2", "name": "K2", "developer": "LLM360", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2252, "hfopenllm_v2/BBH": 0.4972, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.398, "hfopenllm_v2/MMLU-PRO": 0.3004 } }, { "id": "LLM360/K2-Chat", "name": "K2-Chat", "developer": "LLM360", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5152, "hfopenllm_v2/BBH": 0.5358, "hfopenllm_v2/MATH Level 5": 0.1035, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.457, "hfopenllm_v2/MMLU-PRO": 0.3371 } }, { "id": "LLM4Binary/llm4decompile-1.3b-v2", "name": "llm4decompile-1.3b-v2", "developer": "LLM4Binary", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2268, "hfopenllm_v2/BBH": 0.3272, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2357, "hfopenllm_v2/MUSR": 0.4072, "hfopenllm_v2/MMLU-PRO": 0.1209 } }, { "id": "llmat/Mistral-v0.3-7B-ORPO", "name": "Mistral-v0.3-7B-ORPO", "developer": "llmat", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.377, "hfopenllm_v2/BBH": 0.3978, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3555, "hfopenllm_v2/MMLU-PRO": 0.2278 } }, { "id": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V5", "name": "ECE-PRYMMAL-YL-1B-SLERP-V5", "developer": "llnYou", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3313, "hfopenllm_v2/BBH": 0.4233, "hfopenllm_v2/MATH Level 5": 0.111, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.3868, "hfopenllm_v2/MMLU-PRO": 0.2931 } }, { "id": "llnYou/ECE-PRYMMAL-YL-1B-SLERP-V6", "name": "ECE-PRYMMAL-YL-1B-SLERP-V6", "developer": "llnYou", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1388, "hfopenllm_v2/BBH": 0.3944, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3928, "hfopenllm_v2/MMLU-PRO": 0.235 } }, { "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V1", "name": "ECE-PRYMMAL-YL-3B-SLERP-V1", "developer": "llnYou", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2346, "hfopenllm_v2/BBH": 0.4018, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.3364, "hfopenllm_v2/MMLU-PRO": 0.285 } }, { "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V2", "name": "ECE-PRYMMAL-YL-3B-SLERP-V2", "developer": "llnYou", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2309, "hfopenllm_v2/BBH": 0.399, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.3588, "hfopenllm_v2/MMLU-PRO": 0.29 } }, { "id": "llnYou/ECE-PRYMMAL-YL-3B-SLERP-V3", "name": "ECE-PRYMMAL-YL-3B-SLERP-V3", "developer": "llnYou", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3581, "hfopenllm_v2/BBH": 0.5473, "hfopenllm_v2/MATH Level 5": 0.1299, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4361, "hfopenllm_v2/MMLU-PRO": 0.4043 } }, { "id": "lmsys/vicuna-13b-v1.3", "name": "vicuna-13b-v1.3", "developer": "lmsys", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3344, "hfopenllm_v2/BBH": 0.3384, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3727, "hfopenllm_v2/MMLU-PRO": 0.2243 } }, { "id": "lmsys/vicuna-7b-v1.3", "name": "vicuna-7b-v1.3", "developer": "lmsys", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2909, "hfopenllm_v2/BBH": 0.3298, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2424, "hfopenllm_v2/MUSR": 0.3793, "hfopenllm_v2/MMLU-PRO": 0.1838 } }, { "id": "lmsys/vicuna-7b-v1.5", "name": "vicuna-7b-v1.5", "developer": "lmsys", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2352, "hfopenllm_v2/BBH": 0.3947, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.4231, "hfopenllm_v2/MMLU-PRO": 0.2147 } }, { "id": "lmsys/Vicuna-v1.3-13B", "name": "Vicuna v1.3 13B", "developer": "lmsys", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.706, "helm_classic/MMLU": 0.462, "helm_classic/BoolQ": 0.808, "helm_classic/NarrativeQA": 0.691, "helm_classic/NaturalQuestions (open-book)": 0.686, "helm_classic/QuAC": 0.403, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.385, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.762, "helm_classic/CivilComments": 0.645, "helm_classic/RAFT": 0.657 } }, { "id": "lmsys/Vicuna-v1.3-7B", "name": "Vicuna v1.3 7B", "developer": "lmsys", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.625, "helm_classic/MMLU": 0.434, "helm_classic/BoolQ": 0.76, "helm_classic/NarrativeQA": 0.643, "helm_classic/NaturalQuestions (open-book)": 0.634, "helm_classic/QuAC": 0.392, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.292, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.916, "helm_classic/CivilComments": 0.62, "helm_classic/RAFT": 0.693 } }, { "id": "Locutusque/CollectiveLM-Falcon-3-7B", "name": "CollectiveLM-Falcon-3-7B", "developer": "Locutusque", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3918, "hfopenllm_v2/BBH": 0.5105, "hfopenllm_v2/MATH Level 5": 0.2183, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.3887, "hfopenllm_v2/MMLU-PRO": 0.3599 } }, { "id": "Locutusque/Hercules-6.0-Llama-3.1-8B", "name": "Hercules-6.0-Llama-3.1-8B", "developer": "Locutusque", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.663, "hfopenllm_v2/BBH": 0.4813, "hfopenllm_v2/MATH Level 5": 0.1669, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3621, "hfopenllm_v2/MMLU-PRO": 0.3615 } }, { "id": "Locutusque/Hercules-6.1-Llama-3.1-8B", "name": "Hercules-6.1-Llama-3.1-8B", "developer": "Locutusque", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6007, "hfopenllm_v2/BBH": 0.4656, "hfopenllm_v2/MATH Level 5": 0.176, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3553, "hfopenllm_v2/MMLU-PRO": 0.3669 } }, { "id": "Locutusque/Llama-3-NeuralHercules-5.0-8B", "name": "Llama-3-NeuralHercules-5.0-8B", "developer": "Locutusque", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4489, "hfopenllm_v2/BBH": 0.394, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3881, "hfopenllm_v2/MMLU-PRO": 0.2933 } }, { "id": "Locutusque/Llama-3-Yggdrasil-2.0-8B", "name": "Llama-3-Yggdrasil-2.0-8B", "developer": "Locutusque", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5371, "hfopenllm_v2/BBH": 0.4772, "hfopenllm_v2/MATH Level 5": 0.0831, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3977, "hfopenllm_v2/MMLU-PRO": 0.3167 } }, { "id": "Locutusque/TinyMistral-248M-v2.5", "name": "TinyMistral-248M-v2.5", "developer": "Locutusque", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1336, "hfopenllm_v2/BBH": 0.3039, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.3782, "hfopenllm_v2/MMLU-PRO": 0.1135 } }, { "id": "lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v7", "name": "llama-3.1-8b-instruct-ortho-v7", "developer": "lodrick-the-lafted", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3515, "hfopenllm_v2/BBH": 0.3907, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3616, "hfopenllm_v2/MMLU-PRO": 0.1974 } }, { "id": "lordjia/Llama-3-Cantonese-8B-Instruct", "name": "Llama-3-Cantonese-8B-Instruct", "developer": "lordjia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6669, "hfopenllm_v2/BBH": 0.4814, "hfopenllm_v2/MATH Level 5": 0.0891, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4046, "hfopenllm_v2/MMLU-PRO": 0.3515 } }, { "id": "lordjia/Qwen2-Cantonese-7B-Instruct", "name": "Qwen2-Cantonese-7B-Instruct", "developer": "lordjia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5435, "hfopenllm_v2/BBH": 0.5215, "hfopenllm_v2/MATH Level 5": 0.256, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4004, "hfopenllm_v2/MMLU-PRO": 0.3843 } }, { "id": "lt-asset/nova-1.3b", "name": "nova-1.3b", "developer": "lt-asset", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1214, "hfopenllm_v2/BBH": 0.317, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.3698, "hfopenllm_v2/MMLU-PRO": 0.1142 } }, { "id": "lunahr/thea-3b-50r-u1", "name": "thea-3b-50r-u1", "developer": "lunahr", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.603, "hfopenllm_v2/BBH": 0.4105, "hfopenllm_v2/MATH Level 5": 0.1042, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3182, "hfopenllm_v2/MMLU-PRO": 0.2808 } }, { "id": "lunahr/thea-v2-3b-50r", "name": "thea-v2-3b-50r", "developer": "lunahr", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3704, "hfopenllm_v2/BBH": 0.4194, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3222, "hfopenllm_v2/MMLU-PRO": 0.2409 } }, { "id": "Luni/StarDust-12b-v1", "name": "StarDust-12b-v1", "developer": "Luni", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5459, "hfopenllm_v2/BBH": 0.5366, "hfopenllm_v2/MATH Level 5": 0.0763, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.4324, "hfopenllm_v2/MMLU-PRO": 0.3412 } }, { "id": "Luni/StarDust-12b-v2", "name": "StarDust-12b-v2", "developer": "Luni", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5629, "hfopenllm_v2/BBH": 0.5419, "hfopenllm_v2/MATH Level 5": 0.0687, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4338, "hfopenllm_v2/MMLU-PRO": 0.3439 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v3", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7049, "hfopenllm_v2/BBH": 0.6478, "hfopenllm_v2/MATH Level 5": 0.4162, "hfopenllm_v2/GPQA": 0.3817, "hfopenllm_v2/MUSR": 0.4808, "hfopenllm_v2/MMLU-PRO": 0.5394 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v4", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6943, "hfopenllm_v2/BBH": 0.642, "hfopenllm_v2/MATH Level 5": 0.3467, "hfopenllm_v2/GPQA": 0.3716, "hfopenllm_v2/MUSR": 0.4769, "hfopenllm_v2/MMLU-PRO": 0.5252 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v5", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7485, "hfopenllm_v2/BBH": 0.6467, "hfopenllm_v2/MATH Level 5": 0.4358, "hfopenllm_v2/GPQA": 0.3624, "hfopenllm_v2/MUSR": 0.4473, "hfopenllm_v2/MMLU-PRO": 0.514 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v6", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7043, "hfopenllm_v2/BBH": 0.6458, "hfopenllm_v2/MATH Level 5": 0.3958, "hfopenllm_v2/GPQA": 0.3775, "hfopenllm_v2/MUSR": 0.4768, "hfopenllm_v2/MMLU-PRO": 0.5392 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4663, "hfopenllm_v2/BBH": 0.6215, "hfopenllm_v2/MATH Level 5": 0.3316, "hfopenllm_v2/GPQA": 0.3758, "hfopenllm_v2/MUSR": 0.4937, "hfopenllm_v2/MMLU-PRO": 0.5204 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v7", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6794, "hfopenllm_v2/BBH": 0.6531, "hfopenllm_v2/MATH Level 5": 0.4101, "hfopenllm_v2/GPQA": 0.3792, "hfopenllm_v2/MUSR": 0.4834, "hfopenllm_v2/MMLU-PRO": 0.5376 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6931, "hfopenllm_v2/BBH": 0.6423, "hfopenllm_v2/MATH Level 5": 0.3406, "hfopenllm_v2/GPQA": 0.375, "hfopenllm_v2/MUSR": 0.4888, "hfopenllm_v2/MMLU-PRO": 0.5277 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v8", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7875, "hfopenllm_v2/BBH": 0.6419, "hfopenllm_v2/MATH Level 5": 0.5559, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.4394, "hfopenllm_v2/MMLU-PRO": 0.5206 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v8.5", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5929, "hfopenllm_v2/BBH": 0.6451, "hfopenllm_v2/MATH Level 5": 0.3656, "hfopenllm_v2/GPQA": 0.38, "hfopenllm_v2/MUSR": 0.477, "hfopenllm_v2/MMLU-PRO": 0.529 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v8.6", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5919, "hfopenllm_v2/BBH": 0.6457, "hfopenllm_v2/MATH Level 5": 0.4071, "hfopenllm_v2/GPQA": 0.3842, "hfopenllm_v2/MUSR": 0.4953, "hfopenllm_v2/MMLU-PRO": 0.54 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v8.7", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7875, "hfopenllm_v2/BBH": 0.6483, "hfopenllm_v2/MATH Level 5": 0.5408, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4381, "hfopenllm_v2/MMLU-PRO": 0.5242 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v8.8", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7028, "hfopenllm_v2/BBH": 0.6566, "hfopenllm_v2/MATH Level 5": 0.4237, "hfopenllm_v2/GPQA": 0.3758, "hfopenllm_v2/MUSR": 0.4912, "hfopenllm_v2/MMLU-PRO": 0.5323 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v8.9", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7993, "hfopenllm_v2/BBH": 0.6483, "hfopenllm_v2/MATH Level 5": 0.537, "hfopenllm_v2/GPQA": 0.3297, "hfopenllm_v2/MUSR": 0.4328, "hfopenllm_v2/MMLU-PRO": 0.5199 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v9", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5235, "hfopenllm_v2/BBH": 0.6546, "hfopenllm_v2/MATH Level 5": 0.4366, "hfopenllm_v2/GPQA": 0.3884, "hfopenllm_v2/MUSR": 0.4806, "hfopenllm_v2/MMLU-PRO": 0.5422 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v9-stock", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6514, "hfopenllm_v2/BBH": 0.6571, "hfopenllm_v2/MATH Level 5": 0.4184, "hfopenllm_v2/GPQA": 0.3842, "hfopenllm_v2/MUSR": 0.482, "hfopenllm_v2/MMLU-PRO": 0.5412 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v9.1", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8003, "hfopenllm_v2/BBH": 0.6555, "hfopenllm_v2/MATH Level 5": 0.5468, "hfopenllm_v2/GPQA": 0.3431, "hfopenllm_v2/MUSR": 0.4354, "hfopenllm_v2/MMLU-PRO": 0.5251 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2", "name": "NQLSG-Qwen2.5-14B-MegaFusion-v9.2", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7862, "hfopenllm_v2/BBH": 0.6538, "hfopenllm_v2/MATH Level 5": 0.5332, "hfopenllm_v2/GPQA": 0.3557, "hfopenllm_v2/MUSR": 0.4381, "hfopenllm_v2/MMLU-PRO": 0.5283 } }, { "id": "Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion", "name": "NQLSG-Qwen2.5-14B-OriginalFusion", "developer": "Lunzima", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6142, "hfopenllm_v2/BBH": 0.6592, "hfopenllm_v2/MATH Level 5": 0.4275, "hfopenllm_v2/GPQA": 0.3809, "hfopenllm_v2/MUSR": 0.5122, "hfopenllm_v2/MMLU-PRO": 0.5239 } }, { "id": "LxzGordon/URM-LLaMa-3-8B", "name": "LxzGordon/URM-LLaMa-3-8B", "developer": "LxzGordon", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8991, "reward-bench/Chat": 0.9693, "reward-bench/Chat Hard": 0.7873, "reward-bench/Safety": 0.8824, "reward-bench/Reasoning": 0.9574 } }, { "id": "LxzGordon/URM-LLaMa-3.1-8B", "name": "LxzGordon/URM-LLaMa-3.1-8B", "developer": "LxzGordon", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7394, "reward-bench/Chat": 0.9553, "reward-bench/Chat Hard": 0.8816, "reward-bench/Safety": 0.9178, "reward-bench/Reasoning": 0.9698, "reward-bench/Factuality": 0.6884, "reward-bench/Precise IF": 0.45, "reward-bench/Math": 0.6393, "reward-bench/Focus": 0.9758, "reward-bench/Ties": 0.7653 } }, { "id": "Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3", "name": "Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3", "developer": "Lyte", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7098, "hfopenllm_v2/BBH": 0.495, "hfopenllm_v2/MATH Level 5": 0.1903, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3461, "hfopenllm_v2/MMLU-PRO": 0.3618 } }, { "id": "Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04", "name": "Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04", "developer": "Lyte", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5774, "hfopenllm_v2/BBH": 0.3515, "hfopenllm_v2/MATH Level 5": 0.0801, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3236, "hfopenllm_v2/MMLU-PRO": 0.1843 } }, { "id": "Lyte/Llama-3.2-3B-Overthinker", "name": "Llama-3.2-3B-Overthinker", "developer": "Lyte", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6408, "hfopenllm_v2/BBH": 0.432, "hfopenllm_v2/MATH Level 5": 0.1563, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3419, "hfopenllm_v2/MMLU-PRO": 0.2985 } }, { "id": "M4-ai/TinyMistral-248M-v3", "name": "TinyMistral-248M-v3", "developer": "M4-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1639, "hfopenllm_v2/BBH": 0.2885, "hfopenllm_v2/MATH Level 5": 0.0045, "hfopenllm_v2/GPQA": 0.2408, "hfopenllm_v2/MUSR": 0.3793, "hfopenllm_v2/MMLU-PRO": 0.1132 } }, { "id": "m42-health/Llama3-Med42-70B", "name": "Llama3-Med42-70B", "developer": "m42-health", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6291, "hfopenllm_v2/BBH": 0.6688, "hfopenllm_v2/MATH Level 5": 0.2258, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4629, "hfopenllm_v2/MMLU-PRO": 0.4963 } }, { "id": "macadeliccc/magistrate-3.2-3b-base", "name": "magistrate-3.2-3b-base", "developer": "macadeliccc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1159, "hfopenllm_v2/BBH": 0.3343, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3976, "hfopenllm_v2/MMLU-PRO": 0.1689 } }, { "id": "macadeliccc/magistrate-3.2-3b-it", "name": "magistrate-3.2-3b-it", "developer": "macadeliccc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2292, "hfopenllm_v2/BBH": 0.3257, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2475, "hfopenllm_v2/MUSR": 0.3763, "hfopenllm_v2/MMLU-PRO": 0.1592 } }, { "id": "macadeliccc/Samantha-Qwen-2-7B", "name": "Samantha-Qwen-2-7B", "developer": "macadeliccc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4377, "hfopenllm_v2/BBH": 0.5082, "hfopenllm_v2/MATH Level 5": 0.2115, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.4799, "hfopenllm_v2/MMLU-PRO": 0.3779 } }, { "id": "madeagents/hammer2-1-0-5b-fc", "name": "Hammer2.1-0.5b (FC)", "developer": "madeagents", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 100.0, "bfcl/bfcl.overall.overall_accuracy": 21.22, "bfcl/bfcl.overall.total_cost_usd": 2.82, "bfcl/bfcl.overall.latency_mean_s": 2.79, "bfcl/bfcl.overall.latency_std_s": 3.17, "bfcl/bfcl.overall.latency_p95_s": 9.86, "bfcl/bfcl.non_live.ast_accuracy": 65.98, "bfcl/bfcl.non_live.simple_ast_accuracy": 62.42, "bfcl/bfcl.non_live.multiple_ast_accuracy": 81.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 69.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 51.5, "bfcl/bfcl.live.live_accuracy": 54.63, "bfcl/bfcl.live.live_simple_ast_accuracy": 56.59, "bfcl/bfcl.live.live_multiple_ast_accuracy": 54.42, "bfcl/bfcl.live.live_parallel_ast_accuracy": 62.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 37.5, "bfcl/bfcl.multi_turn.accuracy": 2.88, "bfcl/bfcl.multi_turn.base_accuracy": 4.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 4.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 2.5, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 1.08, "bfcl/bfcl.memory.kv_accuracy": 0.65, "bfcl/bfcl.memory.vector_accuracy": 1.94, "bfcl/bfcl.memory.recursive_summarization_accuracy": 0.65, "bfcl/bfcl.relevance.relevance_detection_accuracy": 68.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 80.79 } }, { "id": "madeagents/hammer2-1-1-5b-fc", "name": "Hammer2.1-1.5b (FC)", "developer": "madeagents", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 75.0, "bfcl/bfcl.overall.overall_accuracy": 27.88, "bfcl/bfcl.overall.total_cost_usd": 6.83, "bfcl/bfcl.overall.latency_mean_s": 6.28, "bfcl/bfcl.overall.latency_std_s": 8.79, "bfcl/bfcl.overall.latency_p95_s": 30.72, "bfcl/bfcl.non_live.ast_accuracy": 82.98, "bfcl/bfcl.non_live.simple_ast_accuracy": 73.92, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 85.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 80.5, "bfcl/bfcl.live.live_accuracy": 69.5, "bfcl/bfcl.live.live_simple_ast_accuracy": 72.09, "bfcl/bfcl.live.live_multiple_ast_accuracy": 69.33, "bfcl/bfcl.live.live_parallel_ast_accuracy": 50.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 62.5, "bfcl/bfcl.multi_turn.accuracy": 15.62, "bfcl/bfcl.multi_turn.base_accuracy": 20.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 16.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 9.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 16.5, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 0.0, "bfcl/bfcl.memory.kv_accuracy": 0.0, "bfcl/bfcl.memory.vector_accuracy": 0.0, "bfcl/bfcl.memory.recursive_summarization_accuracy": 0.0, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 79.4 } }, { "id": "madeagents/hammer2-1-3b-fc", "name": "Hammer2.1-3b (FC)", "developer": "madeagents", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 68.0, "bfcl/bfcl.overall.overall_accuracy": 29.71, "bfcl/bfcl.overall.total_cost_usd": 10.89, "bfcl/bfcl.overall.latency_mean_s": 11.24, "bfcl/bfcl.overall.latency_std_s": 15.81, "bfcl/bfcl.overall.latency_p95_s": 47.44, "bfcl/bfcl.non_live.ast_accuracy": 84.96, "bfcl/bfcl.non_live.simple_ast_accuracy": 79.33, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 86.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 80.5, "bfcl/bfcl.live.live_accuracy": 70.54, "bfcl/bfcl.live.live_simple_ast_accuracy": 68.22, "bfcl/bfcl.live.live_multiple_ast_accuracy": 71.32, "bfcl/bfcl.live.live_parallel_ast_accuracy": 62.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 66.67, "bfcl/bfcl.multi_turn.accuracy": 16.5, "bfcl/bfcl.multi_turn.base_accuracy": 22.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 12.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 16.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 15.5, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 3.01, "bfcl/bfcl.memory.kv_accuracy": 2.58, "bfcl/bfcl.memory.vector_accuracy": 3.87, "bfcl/bfcl.memory.recursive_summarization_accuracy": 2.58, "bfcl/bfcl.relevance.relevance_detection_accuracy": 56.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 86.12 } }, { "id": "madeagents/hammer2-1-7b-fc", "name": "Hammer2.1-7b (FC)", "developer": "madeagents", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 64.0, "bfcl/bfcl.overall.overall_accuracy": 31.67, "bfcl/bfcl.overall.total_cost_usd": 4.99, "bfcl/bfcl.overall.latency_mean_s": 5.77, "bfcl/bfcl.overall.latency_std_s": 10.29, "bfcl/bfcl.overall.latency_p95_s": 31.26, "bfcl/bfcl.non_live.ast_accuracy": 85.5, "bfcl/bfcl.non_live.simple_ast_accuracy": 72.5, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 91.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 86.0, "bfcl/bfcl.live.live_accuracy": 69.5, "bfcl/bfcl.live.live_simple_ast_accuracy": 66.67, "bfcl/bfcl.live.live_multiple_ast_accuracy": 69.99, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 75.0, "bfcl/bfcl.multi_turn.accuracy": 23.87, "bfcl/bfcl.multi_turn.base_accuracy": 24.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 28.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 21.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 21.5, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 0.0, "bfcl/bfcl.memory.kv_accuracy": 0.0, "bfcl/bfcl.memory.vector_accuracy": 0.0, "bfcl/bfcl.memory.recursive_summarization_accuracy": 0.0, "bfcl/bfcl.relevance.relevance_detection_accuracy": 50.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 90.12 } }, { "id": "magnifi/Phi3_intent_v56_3_w_unknown_5_lr_0.002", "name": "Phi3_intent_v56_3_w_unknown_5_lr_0.002", "developer": "magnifi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2018, "hfopenllm_v2/BBH": 0.3282, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.4123, "hfopenllm_v2/MMLU-PRO": 0.1472 } }, { "id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1", "name": "Llama-3-8B-Magpie-Align-SFT-v0.1", "developer": "Magpie-Align", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4361, "hfopenllm_v2/BBH": 0.4615, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3277, "hfopenllm_v2/MMLU-PRO": 0.2863 } }, { "id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3", "name": "Llama-3-8B-Magpie-Align-SFT-v0.3", "developer": "Magpie-Align", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5064, "hfopenllm_v2/BBH": 0.4572, "hfopenllm_v2/MATH Level 5": 0.0733, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3424, "hfopenllm_v2/MMLU-PRO": 0.2902 } }, { "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.1", "name": "Llama-3-8B-Magpie-Align-v0.1", "developer": "Magpie-Align", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4027, "hfopenllm_v2/BBH": 0.4789, "hfopenllm_v2/MATH Level 5": 0.0461, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.3087, "hfopenllm_v2/MMLU-PRO": 0.3001 } }, { "id": "Magpie-Align/Llama-3-8B-Magpie-Align-v0.3", "name": "Llama-3-8B-Magpie-Align-v0.3", "developer": "Magpie-Align", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4497, "hfopenllm_v2/BBH": 0.457, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3406, "hfopenllm_v2/MMLU-PRO": 0.3134 } }, { "id": "Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1", "name": "Llama-3.1-8B-Magpie-Align-SFT-v0.1", "developer": "Magpie-Align", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4782, "hfopenllm_v2/BBH": 0.4764, "hfopenllm_v2/MATH Level 5": 0.0899, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3397, "hfopenllm_v2/MMLU-PRO": 0.2943 } }, { "id": "Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1", "name": "Llama-3.1-8B-Magpie-Align-v0.1", "developer": "Magpie-Align", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4458, "hfopenllm_v2/BBH": 0.4622, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3141, "hfopenllm_v2/MMLU-PRO": 0.3262 } }, { "id": "Magpie-Align/MagpieLM-8B-Chat-v0.1", "name": "MagpieLM-8B-Chat-v0.1", "developer": "Magpie-Align", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3701, "hfopenllm_v2/BBH": 0.4172, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3501, "hfopenllm_v2/MMLU-PRO": 0.3195 } }, { "id": "Magpie-Align/MagpieLM-8B-SFT-v0.1", "name": "MagpieLM-8B-SFT-v0.1", "developer": "Magpie-Align", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4721, "hfopenllm_v2/BBH": 0.4553, "hfopenllm_v2/MATH Level 5": 0.0755, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3649, "hfopenllm_v2/MMLU-PRO": 0.299 } }, { "id": "MagusCorp/grpo_lora_enem_llama3_7b", "name": "grpo_lora_enem_llama3_7b", "developer": "MagusCorp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4724, "hfopenllm_v2/BBH": 0.4801, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.3971, "hfopenllm_v2/MMLU-PRO": 0.3574 } }, { "id": "maldv/Awqward2.5-32B-Instruct", "name": "Awqward2.5-32B-Instruct", "developer": "maldv", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8255, "hfopenllm_v2/BBH": 0.6974, "hfopenllm_v2/MATH Level 5": 0.6231, "hfopenllm_v2/GPQA": 0.3406, "hfopenllm_v2/MUSR": 0.4275, "hfopenllm_v2/MMLU-PRO": 0.5723 } }, { "id": "maldv/badger-kappa-llama-3-8b", "name": "badger-kappa-llama-3-8b", "developer": "maldv", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4695, "hfopenllm_v2/BBH": 0.5085, "hfopenllm_v2/MATH Level 5": 0.0861, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.3765, "hfopenllm_v2/MMLU-PRO": 0.3695 } }, { "id": "maldv/badger-lambda-llama-3-8b", "name": "badger-lambda-llama-3-8b", "developer": "maldv", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4861, "hfopenllm_v2/BBH": 0.4963, "hfopenllm_v2/MATH Level 5": 0.0944, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.3754, "hfopenllm_v2/MMLU-PRO": 0.3767 } }, { "id": "maldv/badger-mu-llama-3-8b", "name": "badger-mu-llama-3-8b", "developer": "maldv", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4919, "hfopenllm_v2/BBH": 0.5143, "hfopenllm_v2/MATH Level 5": 0.0559, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3555, "hfopenllm_v2/MMLU-PRO": 0.3674 } }, { "id": "maldv/badger-writer-llama-3-8b", "name": "badger-writer-llama-3-8b", "developer": "maldv", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5303, "hfopenllm_v2/BBH": 0.4864, "hfopenllm_v2/MATH Level 5": 0.0755, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.3581, "hfopenllm_v2/MMLU-PRO": 0.376 } }, { "id": "maldv/Lytta2.5-32B-Instruct", "name": "Lytta2.5-32B-Instruct", "developer": "maldv", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2508, "hfopenllm_v2/BBH": 0.56, "hfopenllm_v2/MATH Level 5": 0.3444, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3769, "hfopenllm_v2/MMLU-PRO": 0.5048 } }, { "id": "maldv/Qwentile2.5-32B-Instruct", "name": "Qwentile2.5-32B-Instruct", "developer": "maldv", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7393, "hfopenllm_v2/BBH": 0.6963, "hfopenllm_v2/MATH Level 5": 0.5219, "hfopenllm_v2/GPQA": 0.3842, "hfopenllm_v2/MUSR": 0.4682, "hfopenllm_v2/MMLU-PRO": 0.5879 } }, { "id": "ManoloPueblo/ContentCuisine_1-7B-slerp", "name": "ContentCuisine_1-7B-slerp", "developer": "ManoloPueblo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3907, "hfopenllm_v2/BBH": 0.5188, "hfopenllm_v2/MATH Level 5": 0.0733, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4672, "hfopenllm_v2/MMLU-PRO": 0.3054 } }, { "id": "ManoloPueblo/LLM_MERGE_CC2", "name": "LLM_MERGE_CC2", "developer": "ManoloPueblo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3853, "hfopenllm_v2/BBH": 0.5209, "hfopenllm_v2/MATH Level 5": 0.0642, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4593, "hfopenllm_v2/MMLU-PRO": 0.3032 } }, { "id": "ManoloPueblo/LLM_MERGE_CC3", "name": "LLM_MERGE_CC3", "developer": "ManoloPueblo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3959, "hfopenllm_v2/BBH": 0.5246, "hfopenllm_v2/MATH Level 5": 0.0793, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4672, "hfopenllm_v2/MMLU-PRO": 0.3156 } }, { "id": "marcuscedricridia/absolute-o1-7b", "name": "absolute-o1-7b", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7516, "hfopenllm_v2/BBH": 0.5469, "hfopenllm_v2/MATH Level 5": 0.5083, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4114, "hfopenllm_v2/MMLU-PRO": 0.4413 } }, { "id": "marcuscedricridia/Cheng-1", "name": "Cheng-1", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7789, "hfopenllm_v2/BBH": 0.5525, "hfopenllm_v2/MATH Level 5": 0.4894, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4073, "hfopenllm_v2/MMLU-PRO": 0.4349 } }, { "id": "marcuscedricridia/Cheng-2", "name": "Cheng-2", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8337, "hfopenllm_v2/BBH": 0.6499, "hfopenllm_v2/MATH Level 5": 0.5438, "hfopenllm_v2/GPQA": 0.3456, "hfopenllm_v2/MUSR": 0.4193, "hfopenllm_v2/MMLU-PRO": 0.5013 } }, { "id": "marcuscedricridia/Cheng-2-v1.1", "name": "Cheng-2-v1.1", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.827, "hfopenllm_v2/BBH": 0.651, "hfopenllm_v2/MATH Level 5": 0.5393, "hfopenllm_v2/GPQA": 0.3431, "hfopenllm_v2/MUSR": 0.4167, "hfopenllm_v2/MMLU-PRO": 0.5076 } }, { "id": "marcuscedricridia/cursa-o1-7b", "name": "cursa-o1-7b", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7628, "hfopenllm_v2/BBH": 0.5466, "hfopenllm_v2/MATH Level 5": 0.4955, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4301, "hfopenllm_v2/MMLU-PRO": 0.4392 } }, { "id": "marcuscedricridia/cursa-o1-7b-2-28-2025", "name": "cursa-o1-7b-2-28-2025", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7467, "hfopenllm_v2/BBH": 0.5384, "hfopenllm_v2/MATH Level 5": 0.4811, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4273, "hfopenllm_v2/MMLU-PRO": 0.4365 } }, { "id": "marcuscedricridia/cursa-o1-7b-v1.1", "name": "cursa-o1-7b-v1.1", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7528, "hfopenllm_v2/BBH": 0.5493, "hfopenllm_v2/MATH Level 5": 0.4985, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4259, "hfopenllm_v2/MMLU-PRO": 0.4392 } }, { "id": "marcuscedricridia/cursa-o1-7b-v1.2-normalize-false", "name": "cursa-o1-7b-v1.2-normalize-false", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7616, "hfopenllm_v2/BBH": 0.5492, "hfopenllm_v2/MATH Level 5": 0.4992, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4273, "hfopenllm_v2/MMLU-PRO": 0.4436 } }, { "id": "marcuscedricridia/cursor-o1-7b", "name": "cursor-o1-7b", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4107, "hfopenllm_v2/BBH": 0.5007, "hfopenllm_v2/MATH Level 5": 0.1412, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4101, "hfopenllm_v2/MMLU-PRO": 0.3251 } }, { "id": "marcuscedricridia/cursorr-o1.2-7b", "name": "cursorr-o1.2-7b", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.166, "hfopenllm_v2/BBH": 0.3068, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3538, "hfopenllm_v2/MMLU-PRO": 0.108 } }, { "id": "marcuscedricridia/etr1o-explicit-v1.1", "name": "etr1o-explicit-v1.1", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.288, "hfopenllm_v2/BBH": 0.3132, "hfopenllm_v2/MATH Level 5": 0.0045, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4111, "hfopenllm_v2/MMLU-PRO": 0.1195 } }, { "id": "marcuscedricridia/etr1o-explicit-v1.2", "name": "etr1o-explicit-v1.2", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1504, "hfopenllm_v2/BBH": 0.295, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.4031, "hfopenllm_v2/MMLU-PRO": 0.1126 } }, { "id": "marcuscedricridia/etr1o-v1.1", "name": "etr1o-v1.1", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1597, "hfopenllm_v2/BBH": 0.31, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.4017, "hfopenllm_v2/MMLU-PRO": 0.1157 } }, { "id": "marcuscedricridia/etr1o-v1.2", "name": "etr1o-v1.2", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7287, "hfopenllm_v2/BBH": 0.6349, "hfopenllm_v2/MATH Level 5": 0.3588, "hfopenllm_v2/GPQA": 0.3758, "hfopenllm_v2/MUSR": 0.4714, "hfopenllm_v2/MMLU-PRO": 0.5316 } }, { "id": "marcuscedricridia/fan-o1-7b", "name": "fan-o1-7b", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4456, "hfopenllm_v2/BBH": 0.4849, "hfopenllm_v2/MATH Level 5": 0.1616, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.3834, "hfopenllm_v2/MMLU-PRO": 0.3274 } }, { "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST", "name": "Hush-Qwen2.5-7B-MST", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7488, "hfopenllm_v2/BBH": 0.5458, "hfopenllm_v2/MATH Level 5": 0.4245, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.3914, "hfopenllm_v2/MMLU-PRO": 0.4163 } }, { "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1", "name": "Hush-Qwen2.5-7B-MST-v1.1", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7445, "hfopenllm_v2/BBH": 0.5559, "hfopenllm_v2/MATH Level 5": 0.4653, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4073, "hfopenllm_v2/MMLU-PRO": 0.4299 } }, { "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3", "name": "Hush-Qwen2.5-7B-MST-v1.3", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7043, "hfopenllm_v2/BBH": 0.5516, "hfopenllm_v2/MATH Level 5": 0.4758, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4311, "hfopenllm_v2/MMLU-PRO": 0.444 } }, { "id": "marcuscedricridia/Hush-Qwen2.5-7B-Preview", "name": "Hush-Qwen2.5-7B-Preview", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7962, "hfopenllm_v2/BBH": 0.5431, "hfopenllm_v2/MATH Level 5": 0.3754, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4298, "hfopenllm_v2/MMLU-PRO": 0.4364 } }, { "id": "marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M", "name": "Hush-Qwen2.5-7B-RP-v1.4-1M", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7728, "hfopenllm_v2/BBH": 0.5295, "hfopenllm_v2/MATH Level 5": 0.3369, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4433, "hfopenllm_v2/MMLU-PRO": 0.4135 } }, { "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.1", "name": "Hush-Qwen2.5-7B-v1.1", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7889, "hfopenllm_v2/BBH": 0.5384, "hfopenllm_v2/MATH Level 5": 0.4381, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4179, "hfopenllm_v2/MMLU-PRO": 0.4227 } }, { "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.2", "name": "Hush-Qwen2.5-7B-v1.2", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7865, "hfopenllm_v2/BBH": 0.5403, "hfopenllm_v2/MATH Level 5": 0.4403, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4219, "hfopenllm_v2/MMLU-PRO": 0.4197 } }, { "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.3", "name": "Hush-Qwen2.5-7B-v1.3", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7856, "hfopenllm_v2/BBH": 0.5327, "hfopenllm_v2/MATH Level 5": 0.3323, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4246, "hfopenllm_v2/MMLU-PRO": 0.4345 } }, { "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.4", "name": "Hush-Qwen2.5-7B-v1.4", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7835, "hfopenllm_v2/BBH": 0.5423, "hfopenllm_v2/MATH Level 5": 0.426, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4232, "hfopenllm_v2/MMLU-PRO": 0.4195 } }, { "id": "marcuscedricridia/olmner-7b", "name": "olmner-7b", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7254, "hfopenllm_v2/BBH": 0.5472, "hfopenllm_v2/MATH Level 5": 0.463, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.438, "hfopenllm_v2/MMLU-PRO": 0.4309 } }, { "id": "marcuscedricridia/olmner-della-7b", "name": "olmner-della-7b", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7637, "hfopenllm_v2/BBH": 0.5491, "hfopenllm_v2/MATH Level 5": 0.4962, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4208, "hfopenllm_v2/MMLU-PRO": 0.4386 } }, { "id": "marcuscedricridia/olmner-o1-7b", "name": "olmner-o1-7b", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7528, "hfopenllm_v2/BBH": 0.5481, "hfopenllm_v2/MATH Level 5": 0.4924, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4299, "hfopenllm_v2/MMLU-PRO": 0.4386 } }, { "id": "marcuscedricridia/olmner-sbr-7b", "name": "olmner-sbr-7b", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.76, "hfopenllm_v2/BBH": 0.5462, "hfopenllm_v2/MATH Level 5": 0.4947, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4154, "hfopenllm_v2/MMLU-PRO": 0.4412 } }, { "id": "marcuscedricridia/post-cursa-o1", "name": "post-cursa-o1", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7628, "hfopenllm_v2/BBH": 0.548, "hfopenllm_v2/MATH Level 5": 0.4872, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4351, "hfopenllm_v2/MMLU-PRO": 0.4361 } }, { "id": "marcuscedricridia/pre-cursa-o1", "name": "pre-cursa-o1", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7409, "hfopenllm_v2/BBH": 0.5462, "hfopenllm_v2/MATH Level 5": 0.5038, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.426, "hfopenllm_v2/MMLU-PRO": 0.4424 } }, { "id": "marcuscedricridia/pre-cursa-o1-v1.2", "name": "pre-cursa-o1-v1.2", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7549, "hfopenllm_v2/BBH": 0.5487, "hfopenllm_v2/MATH Level 5": 0.5068, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4272, "hfopenllm_v2/MMLU-PRO": 0.4402 } }, { "id": "marcuscedricridia/pre-cursa-o1-v1.3", "name": "pre-cursa-o1-v1.3", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7507, "hfopenllm_v2/BBH": 0.5455, "hfopenllm_v2/MATH Level 5": 0.5076, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4271, "hfopenllm_v2/MMLU-PRO": 0.442 } }, { "id": "marcuscedricridia/pre-cursa-o1-v1.4", "name": "pre-cursa-o1-v1.4", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7488, "hfopenllm_v2/BBH": 0.5493, "hfopenllm_v2/MATH Level 5": 0.4834, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4285, "hfopenllm_v2/MMLU-PRO": 0.4436 } }, { "id": "marcuscedricridia/pre-cursa-o1-v1.6", "name": "pre-cursa-o1-v1.6", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7528, "hfopenllm_v2/BBH": 0.5473, "hfopenllm_v2/MATH Level 5": 0.5, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4234, "hfopenllm_v2/MMLU-PRO": 0.4413 } }, { "id": "marcuscedricridia/Qwen2.5-7B-Preview", "name": "Qwen2.5-7B-Preview", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7679, "hfopenllm_v2/BBH": 0.536, "hfopenllm_v2/MATH Level 5": 0.3444, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.414, "hfopenllm_v2/MMLU-PRO": 0.4258 } }, { "id": "marcuscedricridia/r1o-et", "name": "r1o-et", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3597, "hfopenllm_v2/BBH": 0.4209, "hfopenllm_v2/MATH Level 5": 0.0793, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3579, "hfopenllm_v2/MMLU-PRO": 0.258 } }, { "id": "marcuscedricridia/sbr-o1-7b", "name": "sbr-o1-7b", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7455, "hfopenllm_v2/BBH": 0.5479, "hfopenllm_v2/MATH Level 5": 0.4985, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4404, "hfopenllm_v2/MMLU-PRO": 0.4355 } }, { "id": "marcuscedricridia/stray-r1o-et", "name": "stray-r1o-et", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1562, "hfopenllm_v2/BBH": 0.2967, "hfopenllm_v2/MATH Level 5": 0.0045, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.4086, "hfopenllm_v2/MMLU-PRO": 0.1094 } }, { "id": "marcuscedricridia/Yell-Qwen2.5-7B-Preview", "name": "Yell-Qwen2.5-7B-Preview", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5839, "hfopenllm_v2/BBH": 0.5371, "hfopenllm_v2/MATH Level 5": 0.1926, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4046, "hfopenllm_v2/MMLU-PRO": 0.3798 } }, { "id": "marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1", "name": "Yell-Qwen2.5-7B-Preview-v1.1", "developer": "marcuscedricridia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5757, "hfopenllm_v2/BBH": 0.5348, "hfopenllm_v2/MATH Level 5": 0.1896, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4059, "hfopenllm_v2/MMLU-PRO": 0.3831 } }, { "id": "marin-community/marin-8b-instruct", "name": "Marin 8B Instruct", "developer": "marin-community", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.325, "helm_capabilities/MMLU-Pro": 0.188, "helm_capabilities/GPQA": 0.168, "helm_capabilities/IFEval": 0.632, "helm_capabilities/WildBench": 0.477, "helm_capabilities/Omni-MATH": 0.16 } }, { "id": "MarinaraSpaghetti/Nemomix-v4.0-12B", "name": "Nemomix-v4.0-12B", "developer": "MarinaraSpaghetti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5575, "hfopenllm_v2/BBH": 0.5275, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4244, "hfopenllm_v2/MMLU-PRO": 0.3613 } }, { "id": "MarinaraSpaghetti/NemoReRemix-12B", "name": "NemoReRemix-12B", "developer": "MarinaraSpaghetti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3343, "hfopenllm_v2/BBH": 0.5537, "hfopenllm_v2/MATH Level 5": 0.0906, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.4501, "hfopenllm_v2/MMLU-PRO": 0.3598 } }, { "id": "Marsouuu/general3B-ECE-PRYMMAL-Martial", "name": "general3B-ECE-PRYMMAL-Martial", "developer": "Marsouuu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2722, "hfopenllm_v2/BBH": 0.5394, "hfopenllm_v2/MATH Level 5": 0.1548, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4701, "hfopenllm_v2/MMLU-PRO": 0.3876 } }, { "id": "Marsouuu/general3Bv2-ECE-PRYMMAL-Martial", "name": "general3Bv2-ECE-PRYMMAL-Martial", "developer": "Marsouuu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5693, "hfopenllm_v2/BBH": 0.5637, "hfopenllm_v2/MATH Level 5": 0.3671, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4396, "hfopenllm_v2/MMLU-PRO": 0.4498 } }, { "id": "Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial", "name": "lareneg1_78B-ECE-PRYMMAL-Martial", "developer": "Marsouuu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2795, "hfopenllm_v2/BBH": 0.423, "hfopenllm_v2/MATH Level 5": 0.114, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.3867, "hfopenllm_v2/MMLU-PRO": 0.2922 } }, { "id": "Marsouuu/lareneg3B-ECE-PRYMMAL-Martial", "name": "lareneg3B-ECE-PRYMMAL-Martial", "developer": "Marsouuu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3303, "hfopenllm_v2/BBH": 0.5453, "hfopenllm_v2/MATH Level 5": 0.1518, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4725, "hfopenllm_v2/MMLU-PRO": 0.3767 } }, { "id": "Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial", "name": "lareneg3Bv2-ECE-PRYMMAL-Martial", "developer": "Marsouuu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5753, "hfopenllm_v2/BBH": 0.5623, "hfopenllm_v2/MATH Level 5": 0.3656, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4369, "hfopenllm_v2/MMLU-PRO": 0.4511 } }, { "id": "Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial", "name": "MiniMathExpert-2_61B-ECE-PRYMMAL-Martial", "developer": "Marsouuu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2548, "hfopenllm_v2/BBH": 0.3953, "hfopenllm_v2/MATH Level 5": 0.074, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.4083, "hfopenllm_v2/MMLU-PRO": 0.2274 } }, { "id": "Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial", "name": "MiniQwenMathExpert-ECE-PRYMMAL-Martial", "developer": "Marsouuu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2795, "hfopenllm_v2/BBH": 0.423, "hfopenllm_v2/MATH Level 5": 0.114, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.3867, "hfopenllm_v2/MMLU-PRO": 0.2922 } }, { "id": "Marsouuu/MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial", "name": "MistralBase-4x7B-MoE-ECE-PRYMMAL-Martial", "developer": "Marsouuu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1697, "hfopenllm_v2/BBH": 0.3464, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3991, "hfopenllm_v2/MMLU-PRO": 0.1379 } }, { "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3", "name": "ECE-PRYMMAL-0.5B-FT-EnhancedMUSREnsembleV3", "developer": "matouLeLoup", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1873, "hfopenllm_v2/BBH": 0.3239, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3752, "hfopenllm_v2/MMLU-PRO": 0.172 } }, { "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis", "name": "ECE-PRYMMAL-0.5B-FT-MUSR-ENSEMBLE-V2Mathis", "developer": "matouLeLoup", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1873, "hfopenllm_v2/BBH": 0.3239, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3752, "hfopenllm_v2/MMLU-PRO": 0.172 } }, { "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis", "name": "ECE-PRYMMAL-0.5B-FT-V4-MUSR-ENSEMBLE-Mathis", "developer": "matouLeLoup", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1873, "hfopenllm_v2/BBH": 0.3239, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3752, "hfopenllm_v2/MMLU-PRO": 0.172 } }, { "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis", "name": "ECE-PRYMMAL-0.5B-FT-V4-MUSR-Mathis", "developer": "matouLeLoup", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1882, "hfopenllm_v2/BBH": 0.3233, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3685, "hfopenllm_v2/MMLU-PRO": 0.172 } }, { "id": "matouLeLoup/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", "name": "ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", "developer": "matouLeLoup", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1652, "hfopenllm_v2/BBH": 0.3024, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.4273, "hfopenllm_v2/MMLU-PRO": 0.1116 } }, { "id": "mattshumer/ref_70_e3", "name": "ref_70_e3", "developer": "mattshumer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6294, "hfopenllm_v2/BBH": 0.6501, "hfopenllm_v2/MATH Level 5": 0.2795, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.4328, "hfopenllm_v2/MMLU-PRO": 0.5303 } }, { "id": "mattshumer/Reflection-70B", "name": "mattshumer/Reflection-70B", "developer": "mattshumer", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8422, "reward-bench/Chat": 0.9749, "reward-bench/Chat Hard": 0.7061, "reward-bench/Safety": 0.8318, "reward-bench/Reasoning": 0.8562 } }, { "id": "mattshumer/Reflection-Llama-3.1-70B", "name": "Reflection-Llama-3.1-70B", "developer": "mattshumer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0045, "hfopenllm_v2/BBH": 0.645, "hfopenllm_v2/MATH Level 5": 0.2145, "hfopenllm_v2/GPQA": 0.3633, "hfopenllm_v2/MUSR": 0.4577, "hfopenllm_v2/MMLU-PRO": 0.4955 } }, { "id": "maywell/Qwen2-7B-Multilingual-RP", "name": "Qwen2-7B-Multilingual-RP", "developer": "maywell", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4347, "hfopenllm_v2/BBH": 0.5062, "hfopenllm_v2/MATH Level 5": 0.2243, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.3696, "hfopenllm_v2/MMLU-PRO": 0.3859 } }, { "id": "MaziyarPanahi/calme-2.1-llama3.1-70b", "name": "calme-2.1-llama3.1-70b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8434, "hfopenllm_v2/BBH": 0.6448, "hfopenllm_v2/MATH Level 5": 0.4101, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.438, "hfopenllm_v2/MMLU-PRO": 0.5283 } }, { "id": "MaziyarPanahi/calme-2.1-phi3-4b", "name": "calme-2.1-phi3-4b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5525, "hfopenllm_v2/BBH": 0.5595, "hfopenllm_v2/MATH Level 5": 0.1314, "hfopenllm_v2/GPQA": 0.3297, "hfopenllm_v2/MUSR": 0.4015, "hfopenllm_v2/MMLU-PRO": 0.3746 } }, { "id": "MaziyarPanahi/calme-2.1-phi3.5-4b", "name": "calme-2.1-phi3.5-4b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5659, "hfopenllm_v2/BBH": 0.5484, "hfopenllm_v2/MATH Level 5": 0.2039, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.3995, "hfopenllm_v2/MMLU-PRO": 0.3935 } }, { "id": "MaziyarPanahi/calme-2.1-qwen2-72b", "name": "calme-2.1-qwen2-72b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8163, "hfopenllm_v2/BBH": 0.6966, "hfopenllm_v2/MATH Level 5": 0.4079, "hfopenllm_v2/GPQA": 0.3809, "hfopenllm_v2/MUSR": 0.4732, "hfopenllm_v2/MMLU-PRO": 0.5415 } }, { "id": "MaziyarPanahi/calme-2.1-qwen2-7b", "name": "calme-2.1-qwen2-7b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3816, "hfopenllm_v2/BBH": 0.5046, "hfopenllm_v2/MATH Level 5": 0.2311, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4437, "hfopenllm_v2/MMLU-PRO": 0.3693 } }, { "id": "MaziyarPanahi/calme-2.1-qwen2.5-72b", "name": "calme-2.1-qwen2.5-72b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8662, "hfopenllm_v2/BBH": 0.7262, "hfopenllm_v2/MATH Level 5": 0.5914, "hfopenllm_v2/GPQA": 0.3633, "hfopenllm_v2/MUSR": 0.4298, "hfopenllm_v2/MMLU-PRO": 0.5619 } }, { "id": "MaziyarPanahi/calme-2.1-rys-78b", "name": "calme-2.1-rys-78b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8136, "hfopenllm_v2/BBH": 0.7098, "hfopenllm_v2/MATH Level 5": 0.3943, "hfopenllm_v2/GPQA": 0.3943, "hfopenllm_v2/MUSR": 0.4693, "hfopenllm_v2/MMLU-PRO": 0.5444 } }, { "id": "MaziyarPanahi/calme-2.2-llama3-70b", "name": "calme-2.2-llama3-70b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8208, "hfopenllm_v2/BBH": 0.6435, "hfopenllm_v2/MATH Level 5": 0.2394, "hfopenllm_v2/GPQA": 0.3414, "hfopenllm_v2/MUSR": 0.4446, "hfopenllm_v2/MMLU-PRO": 0.5207 } }, { "id": "MaziyarPanahi/calme-2.2-llama3.1-70b", "name": "calme-2.2-llama3.1-70b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8593, "hfopenllm_v2/BBH": 0.6793, "hfopenllm_v2/MATH Level 5": 0.4366, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4542, "hfopenllm_v2/MMLU-PRO": 0.5415 } }, { "id": "MaziyarPanahi/calme-2.2-phi3-4b", "name": "calme-2.2-phi3-4b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5069, "hfopenllm_v2/BBH": 0.553, "hfopenllm_v2/MATH Level 5": 0.145, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.3976, "hfopenllm_v2/MMLU-PRO": 0.3814 } }, { "id": "MaziyarPanahi/calme-2.2-qwen2-72b", "name": "calme-2.2-qwen2-72b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8008, "hfopenllm_v2/BBH": 0.694, "hfopenllm_v2/MATH Level 5": 0.4532, "hfopenllm_v2/GPQA": 0.3742, "hfopenllm_v2/MUSR": 0.4508, "hfopenllm_v2/MMLU-PRO": 0.5435 } }, { "id": "MaziyarPanahi/calme-2.2-qwen2-7b", "name": "calme-2.2-qwen2-7b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3597, "hfopenllm_v2/BBH": 0.5215, "hfopenllm_v2/MATH Level 5": 0.2145, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4358, "hfopenllm_v2/MMLU-PRO": 0.3899 } }, { "id": "MaziyarPanahi/calme-2.2-qwen2.5-72b", "name": "calme-2.2-qwen2.5-72b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8477, "hfopenllm_v2/BBH": 0.7276, "hfopenllm_v2/MATH Level 5": 0.5891, "hfopenllm_v2/GPQA": 0.3591, "hfopenllm_v2/MUSR": 0.4207, "hfopenllm_v2/MMLU-PRO": 0.5618 } }, { "id": "MaziyarPanahi/calme-2.2-rys-78b", "name": "calme-2.2-rys-78b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7986, "hfopenllm_v2/BBH": 0.7081, "hfopenllm_v2/MATH Level 5": 0.4071, "hfopenllm_v2/GPQA": 0.4069, "hfopenllm_v2/MUSR": 0.4536, "hfopenllm_v2/MMLU-PRO": 0.5386 } }, { "id": "MaziyarPanahi/calme-2.3-llama3-70b", "name": "calme-2.3-llama3-70b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.801, "hfopenllm_v2/BBH": 0.6399, "hfopenllm_v2/MATH Level 5": 0.2326, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4261, "hfopenllm_v2/MMLU-PRO": 0.5204 } }, { "id": "MaziyarPanahi/calme-2.3-llama3.1-70b", "name": "calme-2.3-llama3.1-70b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8605, "hfopenllm_v2/BBH": 0.6872, "hfopenllm_v2/MATH Level 5": 0.3927, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.4568, "hfopenllm_v2/MMLU-PRO": 0.5363 } }, { "id": "MaziyarPanahi/calme-2.3-phi3-4b", "name": "calme-2.3-phi3-4b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4926, "hfopenllm_v2/BBH": 0.5538, "hfopenllm_v2/MATH Level 5": 0.1473, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.3988, "hfopenllm_v2/MMLU-PRO": 0.3828 } }, { "id": "MaziyarPanahi/calme-2.3-qwen2-72b", "name": "calme-2.3-qwen2-72b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.385, "hfopenllm_v2/BBH": 0.6576, "hfopenllm_v2/MATH Level 5": 0.3172, "hfopenllm_v2/GPQA": 0.3716, "hfopenllm_v2/MUSR": 0.4112, "hfopenllm_v2/MMLU-PRO": 0.5419 } }, { "id": "MaziyarPanahi/calme-2.3-qwen2-7b", "name": "calme-2.3-qwen2-7b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3825, "hfopenllm_v2/BBH": 0.5064, "hfopenllm_v2/MATH Level 5": 0.2069, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4422, "hfopenllm_v2/MMLU-PRO": 0.3611 } }, { "id": "MaziyarPanahi/calme-2.3-rys-78b", "name": "calme-2.3-rys-78b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8066, "hfopenllm_v2/BBH": 0.7108, "hfopenllm_v2/MATH Level 5": 0.398, "hfopenllm_v2/GPQA": 0.4044, "hfopenllm_v2/MUSR": 0.4549, "hfopenllm_v2/MMLU-PRO": 0.5475 } }, { "id": "MaziyarPanahi/calme-2.4-llama3-70b", "name": "calme-2.4-llama3-70b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5027, "hfopenllm_v2/BBH": 0.6418, "hfopenllm_v2/MATH Level 5": 0.2447, "hfopenllm_v2/GPQA": 0.3398, "hfopenllm_v2/MUSR": 0.4288, "hfopenllm_v2/MMLU-PRO": 0.5204 } }, { "id": "MaziyarPanahi/calme-2.4-qwen2-7b", "name": "calme-2.4-qwen2-7b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.33, "hfopenllm_v2/BBH": 0.5101, "hfopenllm_v2/MATH Level 5": 0.2032, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4453, "hfopenllm_v2/MMLU-PRO": 0.3977 } }, { "id": "MaziyarPanahi/calme-2.4-rys-78b", "name": "calme-2.4-rys-78b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8011, "hfopenllm_v2/BBH": 0.728, "hfopenllm_v2/MATH Level 5": 0.4071, "hfopenllm_v2/GPQA": 0.4027, "hfopenllm_v2/MUSR": 0.5771, "hfopenllm_v2/MMLU-PRO": 0.7002 } }, { "id": "MaziyarPanahi/calme-2.5-qwen2-7b", "name": "calme-2.5-qwen2-7b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3145, "hfopenllm_v2/BBH": 0.4887, "hfopenllm_v2/MATH Level 5": 0.2258, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4565, "hfopenllm_v2/MMLU-PRO": 0.3682 } }, { "id": "MaziyarPanahi/calme-2.6-qwen2-7b", "name": "calme-2.6-qwen2-7b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3443, "hfopenllm_v2/BBH": 0.493, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4586, "hfopenllm_v2/MMLU-PRO": 0.3732 } }, { "id": "MaziyarPanahi/calme-2.7-qwen2-7b", "name": "calme-2.7-qwen2-7b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3592, "hfopenllm_v2/BBH": 0.4883, "hfopenllm_v2/MATH Level 5": 0.1382, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4824, "hfopenllm_v2/MMLU-PRO": 0.3705 } }, { "id": "MaziyarPanahi/calme-3.1-baguette-3b", "name": "calme-3.1-baguette-3b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6234, "hfopenllm_v2/BBH": 0.4683, "hfopenllm_v2/MATH Level 5": 0.256, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4008, "hfopenllm_v2/MMLU-PRO": 0.3399 } }, { "id": "MaziyarPanahi/calme-3.1-instruct-3b", "name": "calme-3.1-instruct-3b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4336, "hfopenllm_v2/BBH": 0.4813, "hfopenllm_v2/MATH Level 5": 0.1775, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.3952, "hfopenllm_v2/MMLU-PRO": 0.3557 } }, { "id": "MaziyarPanahi/calme-3.1-instruct-78b", "name": "calme-3.1-instruct-78b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8136, "hfopenllm_v2/BBH": 0.7305, "hfopenllm_v2/MATH Level 5": 0.3927, "hfopenllm_v2/GPQA": 0.396, "hfopenllm_v2/MUSR": 0.5891, "hfopenllm_v2/MMLU-PRO": 0.7185 } }, { "id": "MaziyarPanahi/calme-3.1-llamaloi-3b", "name": "calme-3.1-llamaloi-3b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7375, "hfopenllm_v2/BBH": 0.4587, "hfopenllm_v2/MATH Level 5": 0.173, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3515, "hfopenllm_v2/MMLU-PRO": 0.3205 } }, { "id": "MaziyarPanahi/calme-3.2-baguette-3b", "name": "calme-3.2-baguette-3b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6338, "hfopenllm_v2/BBH": 0.4709, "hfopenllm_v2/MATH Level 5": 0.2825, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4021, "hfopenllm_v2/MMLU-PRO": 0.3338 } }, { "id": "MaziyarPanahi/calme-3.2-instruct-3b", "name": "calme-3.2-instruct-3b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5533, "hfopenllm_v2/BBH": 0.4866, "hfopenllm_v2/MATH Level 5": 0.2168, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4047, "hfopenllm_v2/MMLU-PRO": 0.3653 } }, { "id": "MaziyarPanahi/calme-3.2-instruct-78b", "name": "calme-3.2-instruct-78b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8063, "hfopenllm_v2/BBH": 0.7319, "hfopenllm_v2/MATH Level 5": 0.4033, "hfopenllm_v2/GPQA": 0.4027, "hfopenllm_v2/MUSR": 0.6024, "hfopenllm_v2/MMLU-PRO": 0.7303 } }, { "id": "MaziyarPanahi/calme-3.3-baguette-3b", "name": "calme-3.3-baguette-3b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.636, "hfopenllm_v2/BBH": 0.4678, "hfopenllm_v2/MATH Level 5": 0.3807, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3928, "hfopenllm_v2/MMLU-PRO": 0.3342 } }, { "id": "MaziyarPanahi/calme-3.3-instruct-3b", "name": "calme-3.3-instruct-3b", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6423, "hfopenllm_v2/BBH": 0.4693, "hfopenllm_v2/MATH Level 5": 0.3739, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4074, "hfopenllm_v2/MMLU-PRO": 0.3305 } }, { "id": "MaziyarPanahi/Calme-4x7B-MoE-v0.1", "name": "Calme-4x7B-MoE-v0.1", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4315, "hfopenllm_v2/BBH": 0.5103, "hfopenllm_v2/MATH Level 5": 0.0801, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4199, "hfopenllm_v2/MMLU-PRO": 0.3057 } }, { "id": "MaziyarPanahi/Calme-4x7B-MoE-v0.2", "name": "Calme-4x7B-MoE-v0.2", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4294, "hfopenllm_v2/BBH": 0.5111, "hfopenllm_v2/MATH Level 5": 0.074, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.4318, "hfopenllm_v2/MMLU-PRO": 0.3058 } }, { "id": "MaziyarPanahi/Llama-3-70B-Instruct-v0.1", "name": "Llama-3-70B-Instruct-v0.1", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4714, "hfopenllm_v2/BBH": 0.5366, "hfopenllm_v2/MATH Level 5": 0.1805, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4433, "hfopenllm_v2/MMLU-PRO": 0.4618 } }, { "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.10", "name": "Llama-3-8B-Instruct-v0.10", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7667, "hfopenllm_v2/BBH": 0.4924, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4214, "hfopenllm_v2/MMLU-PRO": 0.3862 } }, { "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.8", "name": "Llama-3-8B-Instruct-v0.8", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7528, "hfopenllm_v2/BBH": 0.4963, "hfopenllm_v2/MATH Level 5": 0.0778, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4202, "hfopenllm_v2/MMLU-PRO": 0.3853 } }, { "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.9", "name": "Llama-3-8B-Instruct-v0.9", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.763, "hfopenllm_v2/BBH": 0.4936, "hfopenllm_v2/MATH Level 5": 0.0733, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4148, "hfopenllm_v2/MMLU-PRO": 0.3846 } }, { "id": "MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow", "name": "Qwen1.5-MoE-A2.7B-Wikihow", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2954, "hfopenllm_v2/BBH": 0.392, "hfopenllm_v2/MATH Level 5": 0.0823, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3502, "hfopenllm_v2/MMLU-PRO": 0.238 } }, { "id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.1", "name": "Qwen2-7B-Instruct-v0.1", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3352, "hfopenllm_v2/BBH": 0.5123, "hfopenllm_v2/MATH Level 5": 0.2213, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4435, "hfopenllm_v2/MMLU-PRO": 0.3857 } }, { "id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.8", "name": "Qwen2-7B-Instruct-v0.8", "developer": "MaziyarPanahi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2775, "hfopenllm_v2/BBH": 0.4637, "hfopenllm_v2/MATH Level 5": 0.1767, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4293, "hfopenllm_v2/MMLU-PRO": 0.3566 } }, { "id": "meditsolutions/Llama-3.1-MedIT-SUN-8B", "name": "Llama-3.1-MedIT-SUN-8B", "developer": "meditsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7837, "hfopenllm_v2/BBH": 0.5187, "hfopenllm_v2/MATH Level 5": 0.2092, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4056, "hfopenllm_v2/MMLU-PRO": 0.3916 } }, { "id": "meditsolutions/Llama-3.2-SUN-1B-chat", "name": "Llama-3.2-SUN-1B-chat", "developer": "meditsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5482, "hfopenllm_v2/BBH": 0.3514, "hfopenllm_v2/MATH Level 5": 0.0642, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3249, "hfopenllm_v2/MMLU-PRO": 0.1838 } }, { "id": "meditsolutions/Llama-3.2-SUN-1B-Instruct", "name": "Llama-3.2-SUN-1B-Instruct", "developer": "meditsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6413, "hfopenllm_v2/BBH": 0.3474, "hfopenllm_v2/MATH Level 5": 0.071, "hfopenllm_v2/GPQA": 0.2424, "hfopenllm_v2/MUSR": 0.3514, "hfopenllm_v2/MMLU-PRO": 0.1781 } }, { "id": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-26000", "name": "Llama-3.2-SUN-2.4B-checkpoint-26000", "developer": "meditsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2814, "hfopenllm_v2/BBH": 0.3018, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4103, "hfopenllm_v2/MMLU-PRO": 0.1345 } }, { "id": "meditsolutions/Llama-3.2-SUN-2.4B-checkpoint-34800", "name": "Llama-3.2-SUN-2.4B-checkpoint-34800", "developer": "meditsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2501, "hfopenllm_v2/BBH": 0.3161, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4022, "hfopenllm_v2/MMLU-PRO": 0.1357 } }, { "id": "meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0", "name": "Llama-3.2-SUN-2.4B-v1.0.0", "developer": "meditsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5637, "hfopenllm_v2/BBH": 0.3391, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3209, "hfopenllm_v2/MMLU-PRO": 0.1543 } }, { "id": "meditsolutions/Llama-3.2-SUN-2.5B-chat", "name": "Llama-3.2-SUN-2.5B-chat", "developer": "meditsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5604, "hfopenllm_v2/BBH": 0.3575, "hfopenllm_v2/MATH Level 5": 0.071, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3155, "hfopenllm_v2/MMLU-PRO": 0.1813 } }, { "id": "meditsolutions/Llama-3.2-SUN-HDIC-1B-Instruct", "name": "Llama-3.2-SUN-HDIC-1B-Instruct", "developer": "meditsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6827, "hfopenllm_v2/BBH": 0.3508, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.2366, "hfopenllm_v2/MUSR": 0.3594, "hfopenllm_v2/MMLU-PRO": 0.1687 } }, { "id": "meditsolutions/MedIT-Mesh-3B-Instruct", "name": "MedIT-Mesh-3B-Instruct", "developer": "meditsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5814, "hfopenllm_v2/BBH": 0.5576, "hfopenllm_v2/MATH Level 5": 0.2032, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4048, "hfopenllm_v2/MMLU-PRO": 0.4012 } }, { "id": "meditsolutions/MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune", "name": "MSH-Lite-7B-v1-Bielik-v2.3-Instruct-Llama-Prune", "developer": "meditsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3655, "hfopenllm_v2/BBH": 0.4035, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4253, "hfopenllm_v2/MMLU-PRO": 0.219 } }, { "id": "meditsolutions/MSH-v1-Bielik-v2.3-Instruct-MedIT-merge", "name": "MSH-v1-Bielik-v2.3-Instruct-MedIT-merge", "developer": "meditsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5814, "hfopenllm_v2/BBH": 0.5672, "hfopenllm_v2/MATH Level 5": 0.2077, "hfopenllm_v2/GPQA": 0.3456, "hfopenllm_v2/MUSR": 0.4385, "hfopenllm_v2/MMLU-PRO": 0.35 } }, { "id": "meditsolutions/SmolLM2-MedIT-Upscale-2B", "name": "SmolLM2-MedIT-Upscale-2B", "developer": "meditsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6429, "hfopenllm_v2/BBH": 0.3551, "hfopenllm_v2/MATH Level 5": 0.0559, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3314, "hfopenllm_v2/MMLU-PRO": 0.1971 } }, { "id": "meetkai/functionary-small-v3.1", "name": "functionary-small-v3.1", "developer": "meetkai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6275, "hfopenllm_v2/BBH": 0.4982, "hfopenllm_v2/MATH Level 5": 0.1571, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.3834, "hfopenllm_v2/MMLU-PRO": 0.3349 } }, { "id": "meraGPT/mera-mix-4x7B", "name": "mera-mix-4x7B", "developer": "meraGPT", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4832, "hfopenllm_v2/BBH": 0.4019, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4057, "hfopenllm_v2/MMLU-PRO": 0.2748 } }, { "id": "mergekit-community/diabolic6045_ELN-AOC-CAIN", "name": "diabolic6045_ELN-AOC-CAIN", "developer": "mergekit-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0862, "hfopenllm_v2/BBH": 0.3126, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3658, "hfopenllm_v2/MMLU-PRO": 0.1191 } }, { "id": "mergekit-community/JAJUKA-WEWILLNEVERFORGETYOU-3B", "name": "JAJUKA-WEWILLNEVERFORGETYOU-3B", "developer": "mergekit-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4941, "hfopenllm_v2/BBH": 0.437, "hfopenllm_v2/MATH Level 5": 0.1246, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.3656, "hfopenllm_v2/MMLU-PRO": 0.3033 } }, { "id": "mergekit-community/mergekit-dare_ties-ajgjgea", "name": "mergekit-dare_ties-ajgjgea", "developer": "mergekit-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5263, "hfopenllm_v2/BBH": 0.3495, "hfopenllm_v2/MATH Level 5": 0.0642, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3289, "hfopenllm_v2/MMLU-PRO": 0.1744 } }, { "id": "mergekit-community/mergekit-della-zgowfmf", "name": "mergekit-della-zgowfmf", "developer": "mergekit-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4828, "hfopenllm_v2/BBH": 0.6591, "hfopenllm_v2/MATH Level 5": 0.3618, "hfopenllm_v2/GPQA": 0.3901, "hfopenllm_v2/MUSR": 0.4834, "hfopenllm_v2/MMLU-PRO": 0.5415 } }, { "id": "mergekit-community/mergekit-model_stock-azgztvm", "name": "mergekit-model_stock-azgztvm", "developer": "mergekit-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5062, "hfopenllm_v2/BBH": 0.6543, "hfopenllm_v2/MATH Level 5": 0.4373, "hfopenllm_v2/GPQA": 0.3817, "hfopenllm_v2/MUSR": 0.473, "hfopenllm_v2/MMLU-PRO": 0.5406 } }, { "id": "mergekit-community/mergekit-slerp-fmrazcr", "name": "mergekit-slerp-fmrazcr", "developer": "mergekit-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4174, "hfopenllm_v2/BBH": 0.5342, "hfopenllm_v2/MATH Level 5": 0.1193, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4105, "hfopenllm_v2/MMLU-PRO": 0.3777 } }, { "id": "mergekit-community/mergekit-ties-rraxdhv", "name": "mergekit-ties-rraxdhv", "developer": "mergekit-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1123, "hfopenllm_v2/BBH": 0.5184, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4202, "hfopenllm_v2/MMLU-PRO": 0.391 } }, { "id": "mergekit-community/mergekit-ties-ykqemwr", "name": "mergekit-ties-ykqemwr", "developer": "mergekit-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.36, "hfopenllm_v2/BBH": 0.5455, "hfopenllm_v2/MATH Level 5": 0.1224, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.4198, "hfopenllm_v2/MMLU-PRO": 0.3734 } }, { "id": "mergekit-community/sexeh_time_testing", "name": "sexeh_time_testing", "developer": "mergekit-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7329, "hfopenllm_v2/BBH": 0.5241, "hfopenllm_v2/MATH Level 5": 0.0899, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.3619, "hfopenllm_v2/MMLU-PRO": 0.3667 } }, { "id": "mergekit-community/SuperQwen-2.5-1.5B", "name": "SuperQwen-2.5-1.5B", "developer": "mergekit-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1336, "hfopenllm_v2/BBH": 0.2907, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3355, "hfopenllm_v2/MMLU-PRO": 0.1075 } }, { "id": "mergekit-community/VirtuosoSmall-InstructModelStock", "name": "VirtuosoSmall-InstructModelStock", "developer": "mergekit-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5238, "hfopenllm_v2/BBH": 0.6518, "hfopenllm_v2/MATH Level 5": 0.4094, "hfopenllm_v2/GPQA": 0.3826, "hfopenllm_v2/MUSR": 0.4756, "hfopenllm_v2/MMLU-PRO": 0.5421 } }, { "id": "MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", "name": "ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis", "developer": "MEscriva", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0866, "hfopenllm_v2/BBH": 0.3057, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.4017, "hfopenllm_v2/MMLU-PRO": 0.1154 } }, { "id": "meta-llama/Llama-2-13b-chat-hf", "name": "Llama-2-13b-chat-hf", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3985, "hfopenllm_v2/BBH": 0.3343, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2315, "hfopenllm_v2/MUSR": 0.4007, "hfopenllm_v2/MMLU-PRO": 0.1923 } }, { "id": "meta-llama/Llama-2-13b-hf", "name": "Llama-2-13b-hf", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2482, "hfopenllm_v2/BBH": 0.4126, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3538, "hfopenllm_v2/MMLU-PRO": 0.2378 } }, { "id": "meta-llama/Llama-2-70b-chat-hf", "name": "Llama-2-70b-chat-hf", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4958, "hfopenllm_v2/BBH": 0.3042, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3687, "hfopenllm_v2/MMLU-PRO": 0.2433 } }, { "id": "meta-llama/Llama-2-70b-hf", "name": "Llama-2-70b-hf", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2407, "hfopenllm_v2/BBH": 0.5473, "hfopenllm_v2/MATH Level 5": 0.0325, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4124, "hfopenllm_v2/MMLU-PRO": 0.3718 } }, { "id": "meta-llama/Llama-2-7b-chat-hf", "name": "Llama-2-7b-chat-hf", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3986, "hfopenllm_v2/BBH": 0.3114, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3676, "hfopenllm_v2/MMLU-PRO": 0.1688 } }, { "id": "meta-llama/Llama-2-7b-hf", "name": "Llama-2-7b-hf", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2519, "hfopenllm_v2/BBH": 0.3496, "hfopenllm_v2/MATH Level 5": 0.0174, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3701, "hfopenllm_v2/MMLU-PRO": 0.1861 } }, { "id": "meta-llama/Llama-3.1-70B", "name": "Llama-3.1-70B", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1684, "hfopenllm_v2/BBH": 0.626, "hfopenllm_v2/MATH Level 5": 0.1843, "hfopenllm_v2/GPQA": 0.3876, "hfopenllm_v2/MUSR": 0.4572, "hfopenllm_v2/MMLU-PRO": 0.4654 } }, { "id": "meta-llama/Llama-3.1-70B-Instruct", "name": "Llama-3.1-70B-Instruct", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8669, "hfopenllm_v2/BBH": 0.6917, "hfopenllm_v2/MATH Level 5": 0.3807, "hfopenllm_v2/GPQA": 0.3565, "hfopenllm_v2/MUSR": 0.4581, "hfopenllm_v2/MMLU-PRO": 0.5309 } }, { "id": "meta-llama/Llama-3.1-8B", "name": "Llama-3.1-8B", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1246, "hfopenllm_v2/BBH": 0.466, "hfopenllm_v2/MATH Level 5": 0.0657, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.3812, "hfopenllm_v2/MMLU-PRO": 0.3288 } }, { "id": "meta-llama/Llama-3.1-8B-Instruct", "name": "Llama-3.1-8B-Instruct", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4922, "hfopenllm_v2/BBH": 0.5087, "hfopenllm_v2/MATH Level 5": 0.1556, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.3972, "hfopenllm_v2/MMLU-PRO": 0.3798 } }, { "id": "meta-llama/Llama-3.2-1B", "name": "Llama-3.2-1B", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1478, "hfopenllm_v2/BBH": 0.3115, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2282, "hfopenllm_v2/MUSR": 0.3447, "hfopenllm_v2/MMLU-PRO": 0.1203 } }, { "id": "meta-llama/Llama-3.2-1B-Instruct", "name": "Llama-3.2-1B-Instruct", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5698, "hfopenllm_v2/BBH": 0.3497, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3329, "hfopenllm_v2/MMLU-PRO": 0.1682 } }, { "id": "meta-llama/Llama-3.2-3B", "name": "Llama-3.2-3B", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1337, "hfopenllm_v2/BBH": 0.3905, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3577, "hfopenllm_v2/MMLU-PRO": 0.2488 } }, { "id": "meta-llama/Llama-3.2-3B-Instruct", "name": "Llama-3.2-3B-Instruct", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7393, "hfopenllm_v2/BBH": 0.461, "hfopenllm_v2/MATH Level 5": 0.1767, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.3529, "hfopenllm_v2/MMLU-PRO": 0.3195 } }, { "id": "meta-llama/Llama-3.3-70B-Instruct", "name": "Llama-3.3-70B-Instruct", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8998, "hfopenllm_v2/BBH": 0.6919, "hfopenllm_v2/MATH Level 5": 0.4834, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4461, "hfopenllm_v2/MMLU-PRO": 0.5332 } }, { "id": "meta-llama/Meta-Llama-3-70B", "name": "Meta-Llama-3-70B", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1603, "hfopenllm_v2/BBH": 0.6461, "hfopenllm_v2/MATH Level 5": 0.1858, "hfopenllm_v2/GPQA": 0.3977, "hfopenllm_v2/MUSR": 0.4518, "hfopenllm_v2/MMLU-PRO": 0.4709 } }, { "id": "meta-llama/Meta-Llama-3-70B-Instruct", "name": "Meta-Llama-3-70B-Instruct", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8099, "hfopenllm_v2/BBH": 0.6547, "hfopenllm_v2/MATH Level 5": 0.2447, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4154, "hfopenllm_v2/MMLU-PRO": 0.5207, "reward-bench/Score": 0.7627, "reward-bench/Chat": 0.9763, "reward-bench/Chat Hard": 0.5888, "reward-bench/Safety": 0.7297, "reward-bench/Reasoning": 0.7854, "reward-bench/Prior Sets (0.5 weight)": 0.7035 } }, { "id": "meta-llama/Meta-Llama-3-8B", "name": "Meta-Llama-3-8B", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1455, "hfopenllm_v2/BBH": 0.4598, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.3614, "hfopenllm_v2/MMLU-PRO": 0.321 } }, { "id": "meta-llama/Meta-Llama-3-8B-Instruct", "name": "Meta-Llama-3-8B-Instruct", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7408, "hfopenllm_v2/BBH": 0.4989, "hfopenllm_v2/MATH Level 5": 0.0869, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3568, "hfopenllm_v2/MMLU-PRO": 0.3664, "reward-bench/Score": 0.645, "reward-bench/Chat": 0.8547, "reward-bench/Chat Hard": 0.4156, "reward-bench/Safety": 0.6797, "reward-bench/Reasoning": 0.6482, "reward-bench/Prior Sets (0.5 weight)": 0.6082 } }, { "id": "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", "name": "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8412, "reward-bench/Chat": 0.9721, "reward-bench/Chat Hard": 0.7456, "reward-bench/Safety": 0.7757, "reward-bench/Reasoning": 0.8715 } }, { "id": "meta-llama/Meta-Llama-3.1-70B-Instruct", "name": "meta-llama/Meta-Llama-3.1-70B-Instruct", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8405, "reward-bench/Chat": 0.9721, "reward-bench/Chat Hard": 0.7018, "reward-bench/Safety": 0.8284, "reward-bench/Reasoning": 0.8599 } }, { "id": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", "name": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7808, "reward-bench/Chat": 0.8757, "reward-bench/Chat Hard": 0.6689, "reward-bench/Safety": 0.7507, "reward-bench/Reasoning": 0.828 } }, { "id": "meta-llama/Meta-Llama-3.1-8B", "name": "Meta Llama 3.1 8B", "developer": "unknown", "evaluator_relationship": null, "benchmark_scores": { "la_leaderboard/la_leaderboard": 27.04 } }, { "id": "meta-llama/Meta-Llama-3.1-8B-Instruct", "name": "Meta Llama 3.1 8B Instruct", "developer": "unknown", "evaluator_relationship": null, "benchmark_scores": { "la_leaderboard/la_leaderboard": 30.23 } }, { "id": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", "name": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", "developer": "meta-llama", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6565, "reward-bench/Chat": 0.8073, "reward-bench/Chat Hard": 0.4978, "reward-bench/Safety": 0.6399, "reward-bench/Reasoning": 0.6811 } }, { "id": "meta-metrics/MetaMetrics-RM-v1.0", "name": "meta-metrics/MetaMetrics-RM-v1.0", "developer": "meta-metrics", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9342, "reward-bench/Chat": 0.9832, "reward-bench/Chat Hard": 0.864, "reward-bench/Safety": 0.9081, "reward-bench/Reasoning": 0.9816 } }, { "id": "meta/LLaMA-13B", "name": "LLaMA 13B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.595, "helm_classic/MMLU": 0.422, "helm_classic/BoolQ": 0.714, "helm_classic/NarrativeQA": 0.711, "helm_classic/NaturalQuestions (open-book)": 0.614, "helm_classic/QuAC": 0.347, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.324, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.928, "helm_classic/CivilComments": 0.6, "helm_classic/RAFT": 0.643 } }, { "id": "meta/llama-2-13b", "name": "Llama 2 13B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.823, "helm_classic/MMLU": 0.507, "helm_classic/BoolQ": 0.811, "helm_classic/NarrativeQA": 0.744, "helm_classic/NaturalQuestions (open-book)": 0.637, "helm_classic/QuAC": 0.424, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.33, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.962, "helm_classic/CivilComments": 0.588, "helm_classic/RAFT": 0.707, "helm_lite/Mean win rate": 0.233, "helm_lite/NarrativeQA": 0.741, "helm_lite/NaturalQuestions (closed-book)": 0.371, "helm_lite/OpenbookQA": 0.634, "helm_lite/MMLU": 0.505, "helm_lite/MATH": 0.102, "helm_lite/GSM8K": 0.266, "helm_lite/LegalBench": 0.591, "helm_lite/MedQA": 0.392, "helm_lite/WMT 2014": 0.167, "helm_mmlu/MMLU All Subjects": 0.554, "helm_mmlu/Abstract Algebra": 0.27, "helm_mmlu/Anatomy": 0.496, "helm_mmlu/College Physics": 0.235, "helm_mmlu/Computer Security": 0.69, "helm_mmlu/Econometrics": 0.307, "helm_mmlu/Global Facts": 0.38, "helm_mmlu/Jurisprudence": 0.704, "helm_mmlu/Philosophy": 0.672, "helm_mmlu/Professional Psychology": 0.567, "helm_mmlu/Us Foreign Policy": 0.83, "helm_mmlu/Astronomy": 0.546, "helm_mmlu/Business Ethics": 0.55, "helm_mmlu/Clinical Knowledge": 0.592, "helm_mmlu/Conceptual Physics": 0.413, "helm_mmlu/Electrical Engineering": 0.49, "helm_mmlu/Elementary Mathematics": 0.307, "helm_mmlu/Formal Logic": 0.381, "helm_mmlu/High School World History": 0.705, "helm_mmlu/Human Sexuality": 0.618, "helm_mmlu/International Law": 0.752, "helm_mmlu/Logical Fallacies": 0.687, "helm_mmlu/Machine Learning": 0.286, "helm_mmlu/Management": 0.738, "helm_mmlu/Marketing": 0.786, "helm_mmlu/Medical Genetics": 0.57, "helm_mmlu/Miscellaneous": 0.748, "helm_mmlu/Moral Scenarios": 0.407, "helm_mmlu/Nutrition": 0.627, "helm_mmlu/Prehistory": 0.654, "helm_mmlu/Public Relations": 0.6, "helm_mmlu/Security Studies": 0.608, "helm_mmlu/Sociology": 0.761, "helm_mmlu/Virology": 0.476, "helm_mmlu/World Religions": 0.76, "helm_mmlu/Mean win rate": 0.502 } }, { "id": "meta/llama-2-70b", "name": "Llama 2 70B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.944, "helm_classic/MMLU": 0.582, "helm_classic/BoolQ": 0.886, "helm_classic/NarrativeQA": 0.77, "helm_classic/NaturalQuestions (open-book)": 0.674, "helm_classic/QuAC": 0.484, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.554, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.961, "helm_classic/CivilComments": 0.652, "helm_classic/RAFT": 0.727, "helm_lite/Mean win rate": 0.482, "helm_lite/NarrativeQA": 0.763, "helm_lite/NaturalQuestions (closed-book)": 0.46, "helm_lite/OpenbookQA": 0.838, "helm_lite/MMLU": 0.58, "helm_lite/MATH": 0.323, "helm_lite/GSM8K": 0.567, "helm_lite/LegalBench": 0.673, "helm_lite/MedQA": 0.618, "helm_lite/WMT 2014": 0.196, "helm_mmlu/MMLU All Subjects": 0.695, "helm_mmlu/Abstract Algebra": 0.31, "helm_mmlu/Anatomy": 0.607, "helm_mmlu/College Physics": 0.363, "helm_mmlu/Computer Security": 0.77, "helm_mmlu/Econometrics": 0.43, "helm_mmlu/Global Facts": 0.47, "helm_mmlu/Jurisprudence": 0.824, "helm_mmlu/Philosophy": 0.791, "helm_mmlu/Professional Psychology": 0.76, "helm_mmlu/Us Foreign Policy": 0.92, "helm_mmlu/Astronomy": 0.829, "helm_mmlu/Business Ethics": 0.73, "helm_mmlu/Clinical Knowledge": 0.717, "helm_mmlu/Conceptual Physics": 0.668, "helm_mmlu/Electrical Engineering": 0.634, "helm_mmlu/Elementary Mathematics": 0.421, "helm_mmlu/Formal Logic": 0.468, "helm_mmlu/High School World History": 0.882, "helm_mmlu/Human Sexuality": 0.84, "helm_mmlu/International Law": 0.868, "helm_mmlu/Logical Fallacies": 0.791, "helm_mmlu/Machine Learning": 0.491, "helm_mmlu/Management": 0.845, "helm_mmlu/Marketing": 0.889, "helm_mmlu/Medical Genetics": 0.72, "helm_mmlu/Miscellaneous": 0.857, "helm_mmlu/Moral Scenarios": 0.45, "helm_mmlu/Nutrition": 0.758, "helm_mmlu/Prehistory": 0.84, "helm_mmlu/Public Relations": 0.745, "helm_mmlu/Security Studies": 0.796, "helm_mmlu/Sociology": 0.9, "helm_mmlu/Virology": 0.53, "helm_mmlu/World Religions": 0.854, "helm_mmlu/Mean win rate": 0.508 } }, { "id": "meta/llama-2-7b", "name": "Llama 2 7B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.607, "helm_classic/MMLU": 0.431, "helm_classic/BoolQ": 0.762, "helm_classic/NarrativeQA": 0.691, "helm_classic/NaturalQuestions (open-book)": 0.611, "helm_classic/QuAC": 0.406, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.272, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.907, "helm_classic/CivilComments": 0.562, "helm_classic/RAFT": 0.643, "helm_lite/Mean win rate": 0.152, "helm_lite/NarrativeQA": 0.686, "helm_lite/NaturalQuestions (closed-book)": 0.333, "helm_lite/OpenbookQA": 0.544, "helm_lite/MMLU": 0.425, "helm_lite/MATH": 0.097, "helm_lite/GSM8K": 0.154, "helm_lite/LegalBench": 0.502, "helm_lite/MedQA": 0.392, "helm_lite/WMT 2014": 0.144, "helm_mmlu/MMLU All Subjects": 0.458, "helm_mmlu/Abstract Algebra": 0.29, "helm_mmlu/Anatomy": 0.452, "helm_mmlu/College Physics": 0.196, "helm_mmlu/Computer Security": 0.59, "helm_mmlu/Econometrics": 0.316, "helm_mmlu/Global Facts": 0.29, "helm_mmlu/Jurisprudence": 0.519, "helm_mmlu/Philosophy": 0.592, "helm_mmlu/Professional Psychology": 0.459, "helm_mmlu/Us Foreign Policy": 0.64, "helm_mmlu/Astronomy": 0.408, "helm_mmlu/Business Ethics": 0.48, "helm_mmlu/Clinical Knowledge": 0.453, "helm_mmlu/Conceptual Physics": 0.434, "helm_mmlu/Electrical Engineering": 0.407, "helm_mmlu/Elementary Mathematics": 0.254, "helm_mmlu/Formal Logic": 0.27, "helm_mmlu/High School World History": 0.662, "helm_mmlu/Human Sexuality": 0.557, "helm_mmlu/International Law": 0.628, "helm_mmlu/Logical Fallacies": 0.466, "helm_mmlu/Machine Learning": 0.402, "helm_mmlu/Management": 0.563, "helm_mmlu/Marketing": 0.697, "helm_mmlu/Medical Genetics": 0.53, "helm_mmlu/Miscellaneous": 0.632, "helm_mmlu/Moral Scenarios": 0.238, "helm_mmlu/Nutrition": 0.497, "helm_mmlu/Prehistory": 0.503, "helm_mmlu/Public Relations": 0.509, "helm_mmlu/Security Studies": 0.433, "helm_mmlu/Sociology": 0.617, "helm_mmlu/Virology": 0.392, "helm_mmlu/World Religions": 0.713, "helm_mmlu/Mean win rate": 0.681 } }, { "id": "meta/llama-3-1-8b-instruct-prompt", "name": "Llama-3.1-8B-Instruct (Prompt)", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 85.0, "bfcl/bfcl.overall.overall_accuracy": 25.83, "bfcl/bfcl.overall.total_cost_usd": 7.49, "bfcl/bfcl.overall.latency_mean_s": 5.6, "bfcl/bfcl.overall.latency_std_s": 19.37, "bfcl/bfcl.overall.latency_p95_s": 22.6, "bfcl/bfcl.non_live.ast_accuracy": 84.0, "bfcl/bfcl.non_live.simple_ast_accuracy": 71.0, "bfcl/bfcl.non_live.multiple_ast_accuracy": 95.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 87.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 82.5, "bfcl/bfcl.live.live_accuracy": 70.76, "bfcl/bfcl.live.live_simple_ast_accuracy": 72.87, "bfcl/bfcl.live.live_multiple_ast_accuracy": 71.13, "bfcl/bfcl.live.live_parallel_ast_accuracy": 50.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 45.83, "bfcl/bfcl.multi_turn.accuracy": 11.12, "bfcl/bfcl.multi_turn.base_accuracy": 13.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 9.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 9.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 13.0, "bfcl/bfcl.web_search.accuracy": 3.0, "bfcl/bfcl.web_search.base_accuracy": 6.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 10.75, "bfcl/bfcl.memory.kv_accuracy": 7.74, "bfcl/bfcl.memory.vector_accuracy": 5.81, "bfcl/bfcl.memory.recursive_summarization_accuracy": 18.71, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 42.7, "bfcl/bfcl.format_sensitivity.max_delta": 74.5, "bfcl/bfcl.format_sensitivity.stddev": 29.1 } }, { "id": "meta/llama-3-2-1b-instruct-fc", "name": "Llama-3.2-1B-Instruct (FC)", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 107.0, "bfcl/bfcl.overall.overall_accuracy": 10.82, "bfcl/bfcl.overall.total_cost_usd": 1.64, "bfcl/bfcl.overall.latency_mean_s": 3.21, "bfcl/bfcl.overall.latency_std_s": 10.04, "bfcl/bfcl.overall.latency_p95_s": 9.77, "bfcl/bfcl.non_live.ast_accuracy": 38.38, "bfcl/bfcl.non_live.simple_ast_accuracy": 44.0, "bfcl/bfcl.non_live.multiple_ast_accuracy": 50.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 44.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 15.0, "bfcl/bfcl.live.live_accuracy": 11.77, "bfcl/bfcl.live.live_simple_ast_accuracy": 31.78, "bfcl/bfcl.live.live_multiple_ast_accuracy": 7.31, "bfcl/bfcl.live.live_parallel_ast_accuracy": 0.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 0.0, "bfcl/bfcl.multi_turn.accuracy": 0.0, "bfcl/bfcl.multi_turn.base_accuracy": 0.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 0.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 0.0, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 3.23, "bfcl/bfcl.memory.kv_accuracy": 2.58, "bfcl/bfcl.memory.vector_accuracy": 2.58, "bfcl/bfcl.memory.recursive_summarization_accuracy": 4.52, "bfcl/bfcl.relevance.relevance_detection_accuracy": 43.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 51.57 } }, { "id": "meta/llama-3-2-3b-instruct-fc", "name": "Llama-3.2-3B-Instruct (FC)", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 98.0, "bfcl/bfcl.overall.overall_accuracy": 21.95, "bfcl/bfcl.overall.total_cost_usd": 6.2, "bfcl/bfcl.overall.latency_mean_s": 6.1, "bfcl/bfcl.overall.latency_std_s": 20.07, "bfcl/bfcl.overall.latency_p95_s": 17.27, "bfcl/bfcl.non_live.ast_accuracy": 82.67, "bfcl/bfcl.non_live.simple_ast_accuracy": 70.67, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 88.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 79.0, "bfcl/bfcl.live.live_accuracy": 58.33, "bfcl/bfcl.live.live_simple_ast_accuracy": 65.12, "bfcl/bfcl.live.live_multiple_ast_accuracy": 57.64, "bfcl/bfcl.live.live_parallel_ast_accuracy": 25.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 37.5, "bfcl/bfcl.multi_turn.accuracy": 4.0, "bfcl/bfcl.multi_turn.base_accuracy": 5.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 3.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 4.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 3.5, "bfcl/bfcl.web_search.accuracy": 1.0, "bfcl/bfcl.web_search.base_accuracy": 2.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 6.24, "bfcl/bfcl.memory.kv_accuracy": 3.23, "bfcl/bfcl.memory.vector_accuracy": 3.23, "bfcl/bfcl.memory.recursive_summarization_accuracy": 12.26, "bfcl/bfcl.relevance.relevance_detection_accuracy": 87.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 52.06 } }, { "id": "meta/llama-3-3-70b-instruct-fc", "name": "Llama-3.3-70B-Instruct (FC)", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 62.0, "bfcl/bfcl.overall.overall_accuracy": 31.9, "bfcl/bfcl.overall.total_cost_usd": 29.54, "bfcl/bfcl.overall.latency_mean_s": 26.11, "bfcl/bfcl.overall.latency_std_s": 93.22, "bfcl/bfcl.overall.latency_p95_s": 187.93, "bfcl/bfcl.non_live.ast_accuracy": 88.02, "bfcl/bfcl.non_live.simple_ast_accuracy": 76.08, "bfcl/bfcl.non_live.multiple_ast_accuracy": 95.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 90.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 91.0, "bfcl/bfcl.live.live_accuracy": 76.61, "bfcl/bfcl.live.live_simple_ast_accuracy": 81.4, "bfcl/bfcl.live.live_multiple_ast_accuracy": 75.5, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 21.5, "bfcl/bfcl.multi_turn.base_accuracy": 26.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 19.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 14.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 26.5, "bfcl/bfcl.web_search.accuracy": 10.0, "bfcl/bfcl.web_search.base_accuracy": 14.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 6.0, "bfcl/bfcl.memory.accuracy": 8.17, "bfcl/bfcl.memory.kv_accuracy": 4.52, "bfcl/bfcl.memory.vector_accuracy": 8.39, "bfcl/bfcl.memory.recursive_summarization_accuracy": 11.61, "bfcl/bfcl.relevance.relevance_detection_accuracy": 100.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 53.53 } }, { "id": "meta/llama-3-70b", "name": "Llama 3 70B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.793, "helm_lite/NarrativeQA": 0.798, "helm_lite/NaturalQuestions (closed-book)": 0.475, "helm_lite/OpenbookQA": 0.934, "helm_lite/MMLU": 0.695, "helm_lite/MATH": 0.663, "helm_lite/GSM8K": 0.805, "helm_lite/LegalBench": 0.733, "helm_lite/MedQA": 0.777, "helm_lite/WMT 2014": 0.225, "helm_mmlu/MMLU All Subjects": 0.793, "helm_mmlu/Abstract Algebra": 0.43, "helm_mmlu/Anatomy": 0.785, "helm_mmlu/College Physics": 0.529, "helm_mmlu/Computer Security": 0.85, "helm_mmlu/Econometrics": 0.693, "helm_mmlu/Global Facts": 0.49, "helm_mmlu/Jurisprudence": 0.861, "helm_mmlu/Philosophy": 0.865, "helm_mmlu/Professional Psychology": 0.871, "helm_mmlu/Us Foreign Policy": 0.94, "helm_mmlu/Astronomy": 0.921, "helm_mmlu/Business Ethics": 0.83, "helm_mmlu/Clinical Knowledge": 0.845, "helm_mmlu/Conceptual Physics": 0.838, "helm_mmlu/Electrical Engineering": 0.766, "helm_mmlu/Elementary Mathematics": 0.632, "helm_mmlu/Formal Logic": 0.651, "helm_mmlu/High School World History": 0.941, "helm_mmlu/Human Sexuality": 0.878, "helm_mmlu/International Law": 0.901, "helm_mmlu/Logical Fallacies": 0.865, "helm_mmlu/Machine Learning": 0.714, "helm_mmlu/Management": 0.913, "helm_mmlu/Marketing": 0.94, "helm_mmlu/Medical Genetics": 0.89, "helm_mmlu/Miscellaneous": 0.917, "helm_mmlu/Moral Scenarios": 0.598, "helm_mmlu/Nutrition": 0.876, "helm_mmlu/Prehistory": 0.91, "helm_mmlu/Public Relations": 0.727, "helm_mmlu/Security Studies": 0.833, "helm_mmlu/Sociology": 0.93, "helm_mmlu/Virology": 0.59, "helm_mmlu/World Religions": 0.906, "helm_mmlu/Mean win rate": 0.524 } }, { "id": "meta/llama-3-8b", "name": "Llama 3 8B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.387, "helm_lite/NarrativeQA": 0.754, "helm_lite/NaturalQuestions (closed-book)": 0.378, "helm_lite/OpenbookQA": 0.766, "helm_lite/MMLU": 0.602, "helm_lite/MATH": 0.391, "helm_lite/GSM8K": 0.499, "helm_lite/LegalBench": 0.637, "helm_lite/MedQA": 0.581, "helm_lite/WMT 2014": 0.183, "helm_mmlu/MMLU All Subjects": 0.668, "helm_mmlu/Abstract Algebra": 0.33, "helm_mmlu/Anatomy": 0.696, "helm_mmlu/College Physics": 0.451, "helm_mmlu/Computer Security": 0.8, "helm_mmlu/Econometrics": 0.518, "helm_mmlu/Global Facts": 0.34, "helm_mmlu/Jurisprudence": 0.741, "helm_mmlu/Philosophy": 0.743, "helm_mmlu/Professional Psychology": 0.711, "helm_mmlu/Us Foreign Policy": 0.88, "helm_mmlu/Astronomy": 0.711, "helm_mmlu/Business Ethics": 0.65, "helm_mmlu/Clinical Knowledge": 0.751, "helm_mmlu/Conceptual Physics": 0.557, "helm_mmlu/Electrical Engineering": 0.669, "helm_mmlu/Elementary Mathematics": 0.426, "helm_mmlu/Formal Logic": 0.468, "helm_mmlu/High School World History": 0.823, "helm_mmlu/Human Sexuality": 0.748, "helm_mmlu/International Law": 0.843, "helm_mmlu/Logical Fallacies": 0.755, "helm_mmlu/Machine Learning": 0.545, "helm_mmlu/Management": 0.874, "helm_mmlu/Marketing": 0.885, "helm_mmlu/Medical Genetics": 0.83, "helm_mmlu/Miscellaneous": 0.831, "helm_mmlu/Moral Scenarios": 0.416, "helm_mmlu/Nutrition": 0.761, "helm_mmlu/Prehistory": 0.738, "helm_mmlu/Public Relations": 0.736, "helm_mmlu/Security Studies": 0.771, "helm_mmlu/Sociology": 0.866, "helm_mmlu/Virology": 0.566, "helm_mmlu/World Religions": 0.819, "helm_mmlu/Mean win rate": 0.733 } }, { "id": "meta/llama-3.1-405b-instruct-turbo", "name": "Llama 3.1 Instruct Turbo 405B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.618, "helm_capabilities/MMLU-Pro": 0.723, "helm_capabilities/GPQA": 0.522, "helm_capabilities/IFEval": 0.811, "helm_capabilities/WildBench": 0.783, "helm_capabilities/Omni-MATH": 0.249, "helm_lite/Mean win rate": 0.854, "helm_lite/NarrativeQA": 0.749, "helm_lite/NaturalQuestions (closed-book)": 0.456, "helm_lite/OpenbookQA": 0.94, "helm_lite/MMLU": 0.759, "helm_lite/MATH": 0.827, "helm_lite/GSM8K": 0.949, "helm_lite/LegalBench": 0.707, "helm_lite/MedQA": 0.805, "helm_lite/WMT 2014": 0.238, "helm_mmlu/MMLU All Subjects": 0.845, "helm_mmlu/Abstract Algebra": 0.7, "helm_mmlu/Anatomy": 0.822, "helm_mmlu/College Physics": 0.696, "helm_mmlu/Computer Security": 0.81, "helm_mmlu/Econometrics": 0.746, "helm_mmlu/Global Facts": 0.71, "helm_mmlu/Jurisprudence": 0.87, "helm_mmlu/Philosophy": 0.878, "helm_mmlu/Professional Psychology": 0.861, "helm_mmlu/Us Foreign Policy": 0.94, "helm_mmlu/Astronomy": 0.921, "helm_mmlu/Business Ethics": 0.81, "helm_mmlu/Clinical Knowledge": 0.879, "helm_mmlu/Conceptual Physics": 0.877, "helm_mmlu/Electrical Engineering": 0.821, "helm_mmlu/Elementary Mathematics": 0.828, "helm_mmlu/Formal Logic": 0.698, "helm_mmlu/High School World History": 0.941, "helm_mmlu/Human Sexuality": 0.855, "helm_mmlu/International Law": 0.95, "helm_mmlu/Logical Fallacies": 0.92, "helm_mmlu/Machine Learning": 0.795, "helm_mmlu/Management": 0.893, "helm_mmlu/Marketing": 0.962, "helm_mmlu/Medical Genetics": 0.93, "helm_mmlu/Miscellaneous": 0.939, "helm_mmlu/Moral Scenarios": 0.876, "helm_mmlu/Nutrition": 0.928, "helm_mmlu/Prehistory": 0.929, "helm_mmlu/Public Relations": 0.818, "helm_mmlu/Security Studies": 0.857, "helm_mmlu/Sociology": 0.94, "helm_mmlu/Virology": 0.572, "helm_mmlu/World Religions": 0.906, "helm_mmlu/Mean win rate": 0.33 } }, { "id": "meta/llama-3.1-70b-instruct-turbo", "name": "Llama 3.1 Instruct Turbo 70B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.574, "helm_capabilities/MMLU-Pro": 0.653, "helm_capabilities/GPQA": 0.426, "helm_capabilities/IFEval": 0.821, "helm_capabilities/WildBench": 0.758, "helm_capabilities/Omni-MATH": 0.21, "helm_lite/Mean win rate": 0.808, "helm_lite/NarrativeQA": 0.772, "helm_lite/NaturalQuestions (closed-book)": 0.452, "helm_lite/OpenbookQA": 0.938, "helm_lite/MMLU": 0.709, "helm_lite/MATH": 0.783, "helm_lite/GSM8K": 0.938, "helm_lite/LegalBench": 0.687, "helm_lite/MedQA": 0.769, "helm_lite/WMT 2014": 0.223, "helm_mmlu/MMLU All Subjects": 0.801, "helm_mmlu/Abstract Algebra": 0.55, "helm_mmlu/Anatomy": 0.8, "helm_mmlu/College Physics": 0.559, "helm_mmlu/Computer Security": 0.8, "helm_mmlu/Econometrics": 0.675, "helm_mmlu/Global Facts": 0.61, "helm_mmlu/Jurisprudence": 0.889, "helm_mmlu/Philosophy": 0.833, "helm_mmlu/Professional Psychology": 0.846, "helm_mmlu/Us Foreign Policy": 0.93, "helm_mmlu/Astronomy": 0.908, "helm_mmlu/Business Ethics": 0.72, "helm_mmlu/Clinical Knowledge": 0.845, "helm_mmlu/Conceptual Physics": 0.834, "helm_mmlu/Electrical Engineering": 0.745, "helm_mmlu/Elementary Mathematics": 0.701, "helm_mmlu/Formal Logic": 0.675, "helm_mmlu/High School World History": 0.937, "helm_mmlu/Human Sexuality": 0.855, "helm_mmlu/International Law": 0.926, "helm_mmlu/Logical Fallacies": 0.84, "helm_mmlu/Machine Learning": 0.696, "helm_mmlu/Management": 0.913, "helm_mmlu/Marketing": 0.936, "helm_mmlu/Medical Genetics": 0.93, "helm_mmlu/Miscellaneous": 0.913, "helm_mmlu/Moral Scenarios": 0.834, "helm_mmlu/Nutrition": 0.889, "helm_mmlu/Prehistory": 0.88, "helm_mmlu/Public Relations": 0.709, "helm_mmlu/Security Studies": 0.849, "helm_mmlu/Sociology": 0.92, "helm_mmlu/Virology": 0.578, "helm_mmlu/World Religions": 0.895, "helm_mmlu/Mean win rate": 0.021 } }, { "id": "meta/llama-3.1-8b-instruct-turbo", "name": "Llama 3.1 Instruct Turbo 8B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.444, "helm_capabilities/MMLU-Pro": 0.406, "helm_capabilities/GPQA": 0.247, "helm_capabilities/IFEval": 0.743, "helm_capabilities/WildBench": 0.686, "helm_capabilities/Omni-MATH": 0.137, "helm_lite/Mean win rate": 0.303, "helm_lite/NarrativeQA": 0.756, "helm_lite/NaturalQuestions (closed-book)": 0.209, "helm_lite/OpenbookQA": 0.74, "helm_lite/MMLU": 0.5, "helm_lite/MATH": 0.703, "helm_lite/GSM8K": 0.798, "helm_lite/LegalBench": 0.342, "helm_lite/MedQA": 0.245, "helm_lite/WMT 2014": 0.181, "helm_mmlu/MMLU All Subjects": 0.561, "helm_mmlu/Abstract Algebra": 0.26, "helm_mmlu/Anatomy": 0.459, "helm_mmlu/College Physics": 0.363, "helm_mmlu/Computer Security": 0.71, "helm_mmlu/Econometrics": 0.351, "helm_mmlu/Global Facts": 0.26, "helm_mmlu/Jurisprudence": 0.731, "helm_mmlu/Philosophy": 0.64, "helm_mmlu/Professional Psychology": 0.649, "helm_mmlu/Us Foreign Policy": 0.79, "helm_mmlu/Astronomy": 0.645, "helm_mmlu/Business Ethics": 0.65, "helm_mmlu/Clinical Knowledge": 0.615, "helm_mmlu/Conceptual Physics": 0.528, "helm_mmlu/Electrical Engineering": 0.441, "helm_mmlu/Elementary Mathematics": 0.429, "helm_mmlu/Formal Logic": 0.444, "helm_mmlu/High School World History": 0.515, "helm_mmlu/Human Sexuality": 0.733, "helm_mmlu/International Law": 0.694, "helm_mmlu/Logical Fallacies": 0.742, "helm_mmlu/Machine Learning": 0.384, "helm_mmlu/Management": 0.709, "helm_mmlu/Marketing": 0.833, "helm_mmlu/Medical Genetics": 0.66, "helm_mmlu/Miscellaneous": 0.653, "helm_mmlu/Moral Scenarios": 0.368, "helm_mmlu/Nutrition": 0.712, "helm_mmlu/Prehistory": 0.728, "helm_mmlu/Public Relations": 0.664, "helm_mmlu/Security Studies": 0.576, "helm_mmlu/Sociology": 0.701, "helm_mmlu/Virology": 0.446, "helm_mmlu/World Religions": 0.789, "helm_mmlu/Mean win rate": 0.475 } }, { "id": "meta/llama-3.2-11b-vision-instruct-turbo", "name": "Llama 3.2 Vision Instruct Turbo 11B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.325, "helm_lite/NarrativeQA": 0.756, "helm_lite/NaturalQuestions (closed-book)": 0.234, "helm_lite/OpenbookQA": 0.724, "helm_lite/MMLU": 0.511, "helm_lite/MATH": 0.739, "helm_lite/GSM8K": 0.823, "helm_lite/LegalBench": 0.435, "helm_lite/MedQA": 0.27, "helm_lite/WMT 2014": 0.179, "helm_mmlu/MMLU All Subjects": 0.565, "helm_mmlu/Abstract Algebra": 0.28, "helm_mmlu/Anatomy": 0.533, "helm_mmlu/College Physics": 0.333, "helm_mmlu/Computer Security": 0.71, "helm_mmlu/Econometrics": 0.395, "helm_mmlu/Global Facts": 0.25, "helm_mmlu/Jurisprudence": 0.722, "helm_mmlu/Philosophy": 0.646, "helm_mmlu/Professional Psychology": 0.649, "helm_mmlu/Us Foreign Policy": 0.78, "helm_mmlu/Astronomy": 0.671, "helm_mmlu/Business Ethics": 0.64, "helm_mmlu/Clinical Knowledge": 0.638, "helm_mmlu/Conceptual Physics": 0.536, "helm_mmlu/Electrical Engineering": 0.51, "helm_mmlu/Elementary Mathematics": 0.458, "helm_mmlu/Formal Logic": 0.46, "helm_mmlu/High School World History": 0.502, "helm_mmlu/Human Sexuality": 0.763, "helm_mmlu/International Law": 0.711, "helm_mmlu/Logical Fallacies": 0.742, "helm_mmlu/Machine Learning": 0.375, "helm_mmlu/Management": 0.728, "helm_mmlu/Marketing": 0.838, "helm_mmlu/Medical Genetics": 0.7, "helm_mmlu/Miscellaneous": 0.644, "helm_mmlu/Moral Scenarios": 0.328, "helm_mmlu/Nutrition": 0.752, "helm_mmlu/Prehistory": 0.744, "helm_mmlu/Public Relations": 0.645, "helm_mmlu/Security Studies": 0.567, "helm_mmlu/Sociology": 0.627, "helm_mmlu/Virology": 0.446, "helm_mmlu/World Religions": 0.696, "helm_mmlu/Mean win rate": 0.897 } }, { "id": "meta/llama-3.2-90b-vision-instruct-turbo", "name": "Llama 3.2 Vision Instruct Turbo 90B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.819, "helm_lite/NarrativeQA": 0.777, "helm_lite/NaturalQuestions (closed-book)": 0.457, "helm_lite/OpenbookQA": 0.942, "helm_lite/MMLU": 0.703, "helm_lite/MATH": 0.791, "helm_lite/GSM8K": 0.936, "helm_lite/LegalBench": 0.68, "helm_lite/MedQA": 0.769, "helm_lite/WMT 2014": 0.224, "helm_mmlu/MMLU All Subjects": 0.803, "helm_mmlu/Abstract Algebra": 0.52, "helm_mmlu/Anatomy": 0.8, "helm_mmlu/College Physics": 0.539, "helm_mmlu/Computer Security": 0.81, "helm_mmlu/Econometrics": 0.684, "helm_mmlu/Global Facts": 0.6, "helm_mmlu/Jurisprudence": 0.88, "helm_mmlu/Philosophy": 0.839, "helm_mmlu/Professional Psychology": 0.843, "helm_mmlu/Us Foreign Policy": 0.93, "helm_mmlu/Astronomy": 0.921, "helm_mmlu/Business Ethics": 0.76, "helm_mmlu/Clinical Knowledge": 0.845, "helm_mmlu/Conceptual Physics": 0.826, "helm_mmlu/Electrical Engineering": 0.759, "helm_mmlu/Elementary Mathematics": 0.688, "helm_mmlu/Formal Logic": 0.683, "helm_mmlu/High School World History": 0.941, "helm_mmlu/Human Sexuality": 0.87, "helm_mmlu/International Law": 0.934, "helm_mmlu/Logical Fallacies": 0.834, "helm_mmlu/Machine Learning": 0.688, "helm_mmlu/Management": 0.913, "helm_mmlu/Marketing": 0.944, "helm_mmlu/Medical Genetics": 0.92, "helm_mmlu/Miscellaneous": 0.913, "helm_mmlu/Moral Scenarios": 0.841, "helm_mmlu/Nutrition": 0.889, "helm_mmlu/Prehistory": 0.886, "helm_mmlu/Public Relations": 0.718, "helm_mmlu/Security Studies": 0.853, "helm_mmlu/Sociology": 0.92, "helm_mmlu/Virology": 0.584, "helm_mmlu/World Religions": 0.901, "helm_mmlu/Mean win rate": 0.773 } }, { "id": "meta/llama-3.3-70b-instruct-turbo", "name": "Llama 3.3 Instruct Turbo 70B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.812, "helm_lite/NarrativeQA": 0.791, "helm_lite/NaturalQuestions (closed-book)": 0.431, "helm_lite/OpenbookQA": 0.928, "helm_lite/MMLU": 0.7, "helm_lite/MATH": 0.808, "helm_lite/GSM8K": 0.942, "helm_lite/LegalBench": 0.725, "helm_lite/MedQA": 0.761, "helm_lite/WMT 2014": 0.219, "helm_mmlu/MMLU All Subjects": 0.791, "helm_mmlu/Abstract Algebra": 0.5, "helm_mmlu/Anatomy": 0.778, "helm_mmlu/College Physics": 0.52, "helm_mmlu/Computer Security": 0.8, "helm_mmlu/Econometrics": 0.719, "helm_mmlu/Global Facts": 0.58, "helm_mmlu/Jurisprudence": 0.87, "helm_mmlu/Philosophy": 0.83, "helm_mmlu/Professional Psychology": 0.845, "helm_mmlu/Us Foreign Policy": 0.93, "helm_mmlu/Astronomy": 0.888, "helm_mmlu/Business Ethics": 0.8, "helm_mmlu/Clinical Knowledge": 0.83, "helm_mmlu/Conceptual Physics": 0.821, "helm_mmlu/Electrical Engineering": 0.745, "helm_mmlu/Elementary Mathematics": 0.672, "helm_mmlu/Formal Logic": 0.675, "helm_mmlu/High School World History": 0.907, "helm_mmlu/Human Sexuality": 0.855, "helm_mmlu/International Law": 0.884, "helm_mmlu/Logical Fallacies": 0.816, "helm_mmlu/Machine Learning": 0.714, "helm_mmlu/Management": 0.903, "helm_mmlu/Marketing": 0.927, "helm_mmlu/Medical Genetics": 0.9, "helm_mmlu/Miscellaneous": 0.914, "helm_mmlu/Moral Scenarios": 0.698, "helm_mmlu/Nutrition": 0.882, "helm_mmlu/Prehistory": 0.895, "helm_mmlu/Public Relations": 0.727, "helm_mmlu/Security Studies": 0.845, "helm_mmlu/Sociology": 0.92, "helm_mmlu/Virology": 0.566, "helm_mmlu/World Religions": 0.883, "helm_mmlu/Mean win rate": 0.722 } }, { "id": "meta/LLaMA-30B", "name": "LLaMA 30B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.781, "helm_classic/MMLU": 0.531, "helm_classic/BoolQ": 0.861, "helm_classic/NarrativeQA": 0.752, "helm_classic/NaturalQuestions (open-book)": 0.666, "helm_classic/QuAC": 0.39, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.344, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.927, "helm_classic/CivilComments": 0.549, "helm_classic/RAFT": 0.752 } }, { "id": "meta/llama-4-maverick", "name": "meta/llama-4-maverick", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.0, "livecodebenchpro/Easy Problems": 0.09859154929577464 } }, { "id": "meta/llama-4-maverick-17b-128e-instruct-fp8", "name": "Llama 4 Maverick 17Bx128E Instruct FP8", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.718, "helm_capabilities/MMLU-Pro": 0.81, "helm_capabilities/GPQA": 0.65, "helm_capabilities/IFEval": 0.908, "helm_capabilities/WildBench": 0.8, "helm_capabilities/Omni-MATH": 0.422 } }, { "id": "meta/llama-4-maverick-17b-128e-instruct-fp8-fc", "name": "Llama-4-Maverick-17B-128E-Instruct-FP8 (FC)", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 50.0, "bfcl/bfcl.overall.overall_accuracy": 37.29, "bfcl/bfcl.overall.total_cost_usd": 18.25, "bfcl/bfcl.overall.latency_mean_s": 18.43, "bfcl/bfcl.overall.latency_std_s": 34.11, "bfcl/bfcl.overall.latency_p95_s": 102.75, "bfcl/bfcl.non_live.ast_accuracy": 88.65, "bfcl/bfcl.non_live.simple_ast_accuracy": 77.08, "bfcl/bfcl.non_live.multiple_ast_accuracy": 95.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 94.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 88.5, "bfcl/bfcl.live.live_accuracy": 73.65, "bfcl/bfcl.live.live_simple_ast_accuracy": 84.5, "bfcl/bfcl.live.live_multiple_ast_accuracy": 71.04, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 20.25, "bfcl/bfcl.multi_turn.base_accuracy": 27.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 22.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 14.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 18.0, "bfcl/bfcl.web_search.accuracy": 28.0, "bfcl/bfcl.web_search.base_accuracy": 39.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 17.0, "bfcl/bfcl.memory.accuracy": 18.92, "bfcl/bfcl.memory.kv_accuracy": 8.39, "bfcl/bfcl.memory.vector_accuracy": 32.9, "bfcl/bfcl.memory.recursive_summarization_accuracy": 15.48, "bfcl/bfcl.relevance.relevance_detection_accuracy": 100.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 55.97 } }, { "id": "meta/llama-4-scout-17b-16e-instruct", "name": "Llama 4 Scout 17Bx16E Instruct", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.644, "helm_capabilities/MMLU-Pro": 0.742, "helm_capabilities/GPQA": 0.507, "helm_capabilities/IFEval": 0.818, "helm_capabilities/WildBench": 0.779, "helm_capabilities/Omni-MATH": 0.373 } }, { "id": "meta/llama-4-scout-17b-16e-instruct-fc", "name": "Llama-4-Scout-17B-16E-Instruct (FC)", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 72.0, "bfcl/bfcl.overall.overall_accuracy": 28.13, "bfcl/bfcl.overall.total_cost_usd": 24.68, "bfcl/bfcl.overall.latency_mean_s": 17.86, "bfcl/bfcl.overall.latency_std_s": 50.68, "bfcl/bfcl.overall.latency_p95_s": 166.2, "bfcl/bfcl.non_live.ast_accuracy": 89.38, "bfcl/bfcl.non_live.simple_ast_accuracy": 79.0, "bfcl/bfcl.non_live.multiple_ast_accuracy": 94.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 94.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 90.5, "bfcl/bfcl.live.live_accuracy": 74.69, "bfcl/bfcl.live.live_simple_ast_accuracy": 81.78, "bfcl/bfcl.live.live_multiple_ast_accuracy": 72.74, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 79.17, "bfcl/bfcl.multi_turn.accuracy": 9.0, "bfcl/bfcl.multi_turn.base_accuracy": 12.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 7.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 7.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 9.5, "bfcl/bfcl.web_search.accuracy": 14.5, "bfcl/bfcl.web_search.base_accuracy": 18.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 11.0, "bfcl/bfcl.memory.accuracy": 8.17, "bfcl/bfcl.memory.kv_accuracy": 2.58, "bfcl/bfcl.memory.vector_accuracy": 2.58, "bfcl/bfcl.memory.recursive_summarization_accuracy": 19.35, "bfcl/bfcl.relevance.relevance_detection_accuracy": 100.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 44.92 } }, { "id": "meta/LLaMA-65B", "name": "LLaMA 65B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.908, "helm_classic/MMLU": 0.584, "helm_classic/BoolQ": 0.871, "helm_classic/NarrativeQA": 0.755, "helm_classic/NaturalQuestions (open-book)": 0.672, "helm_classic/QuAC": 0.401, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.508, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.962, "helm_classic/CivilComments": 0.655, "helm_classic/RAFT": 0.702, "helm_lite/Mean win rate": 0.345, "helm_lite/NarrativeQA": 0.755, "helm_lite/NaturalQuestions (closed-book)": 0.433, "helm_lite/OpenbookQA": 0.754, "helm_lite/MMLU": 0.584, "helm_lite/MATH": 0.257, "helm_lite/GSM8K": 0.489, "helm_lite/LegalBench": 0.48, "helm_lite/MedQA": 0.507, "helm_lite/WMT 2014": 0.189 } }, { "id": "meta/LLaMA-7B", "name": "LLaMA 7B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.533, "helm_classic/MMLU": 0.321, "helm_classic/BoolQ": 0.756, "helm_classic/NarrativeQA": 0.669, "helm_classic/NaturalQuestions (open-book)": 0.589, "helm_classic/QuAC": 0.338, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.28, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.947, "helm_classic/CivilComments": 0.563, "helm_classic/RAFT": 0.573 } }, { "id": "meta/OPT-175B", "name": "OPT 175B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.609, "helm_classic/MMLU": 0.318, "helm_classic/BoolQ": 0.793, "helm_classic/NarrativeQA": 0.671, "helm_classic/NaturalQuestions (open-book)": 0.615, "helm_classic/QuAC": 0.36, "helm_classic/HellaSwag": 0.791, "helm_classic/OpenbookQA": 0.586, "helm_classic/TruthfulQA": 0.25, "helm_classic/MS MARCO (TREC)": 0.448, "helm_classic/CNN/DailyMail": 0.146, "helm_classic/XSUM": 0.155, "helm_classic/IMDB": 0.947, "helm_classic/CivilComments": 0.505, "helm_classic/RAFT": 0.606 } }, { "id": "meta/OPT-66B", "name": "OPT 66B", "developer": "Meta", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.448, "helm_classic/MMLU": 0.276, "helm_classic/BoolQ": 0.76, "helm_classic/NarrativeQA": 0.638, "helm_classic/NaturalQuestions (open-book)": 0.596, "helm_classic/QuAC": 0.357, "helm_classic/HellaSwag": 0.745, "helm_classic/OpenbookQA": 0.534, "helm_classic/TruthfulQA": 0.201, "helm_classic/MS MARCO (TREC)": 0.482, "helm_classic/CNN/DailyMail": 0.136, "helm_classic/XSUM": 0.126, "helm_classic/IMDB": 0.917, "helm_classic/CivilComments": 0.506, "helm_classic/RAFT": 0.557 } }, { "id": "mhl1/Qwen2.5-0.5B-cinstruct-stage1", "name": "Qwen2.5-0.5B-cinstruct-stage1", "developer": "mhl1", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1482, "hfopenllm_v2/BBH": 0.3256, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.35, "hfopenllm_v2/MMLU-PRO": 0.1139 } }, { "id": "microsoft/DialoGPT-medium", "name": "DialoGPT-medium", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1479, "hfopenllm_v2/BBH": 0.3014, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.4287, "hfopenllm_v2/MMLU-PRO": 0.1119 } }, { "id": "microsoft/Orca-2-13b", "name": "Orca-2-13b", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3128, "hfopenllm_v2/BBH": 0.4884, "hfopenllm_v2/MATH Level 5": 0.0317, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.513, "hfopenllm_v2/MMLU-PRO": 0.2749 } }, { "id": "microsoft/Orca-2-7b", "name": "Orca-2-7b", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2183, "hfopenllm_v2/BBH": 0.4452, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.5026, "hfopenllm_v2/MMLU-PRO": 0.2319 } }, { "id": "microsoft/phi-1", "name": "phi-1", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2068, "hfopenllm_v2/BBH": 0.3139, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3525, "hfopenllm_v2/MMLU-PRO": 0.1162 } }, { "id": "microsoft/phi-1_5", "name": "phi-1_5", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2033, "hfopenllm_v2/BBH": 0.336, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3404, "hfopenllm_v2/MMLU-PRO": 0.1691 } }, { "id": "microsoft/phi-2", "name": "Phi-2", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.169, "helm_lite/NarrativeQA": 0.703, "helm_lite/NaturalQuestions (closed-book)": 0.155, "helm_lite/OpenbookQA": 0.798, "helm_lite/MMLU": 0.518, "helm_lite/MATH": 0.255, "helm_lite/GSM8K": 0.581, "helm_lite/LegalBench": 0.334, "helm_lite/MedQA": 0.41, "helm_lite/WMT 2014": 0.038, "helm_mmlu/MMLU All Subjects": 0.584, "helm_mmlu/Abstract Algebra": 0.31, "helm_mmlu/Anatomy": 0.437, "helm_mmlu/College Physics": 0.382, "helm_mmlu/Computer Security": 0.73, "helm_mmlu/Econometrics": 0.342, "helm_mmlu/Global Facts": 0.35, "helm_mmlu/Jurisprudence": 0.694, "helm_mmlu/Philosophy": 0.598, "helm_mmlu/Professional Psychology": 0.572, "helm_mmlu/Us Foreign Policy": 0.78, "helm_mmlu/Astronomy": 0.605, "helm_mmlu/Business Ethics": 0.59, "helm_mmlu/Clinical Knowledge": 0.619, "helm_mmlu/Conceptual Physics": 0.519, "helm_mmlu/Electrical Engineering": 0.545, "helm_mmlu/Elementary Mathematics": 0.463, "helm_mmlu/Formal Logic": 0.389, "helm_mmlu/High School World History": 0.73, "helm_mmlu/Human Sexuality": 0.733, "helm_mmlu/International Law": 0.752, "helm_mmlu/Logical Fallacies": 0.767, "helm_mmlu/Machine Learning": 0.5, "helm_mmlu/Management": 0.748, "helm_mmlu/Marketing": 0.833, "helm_mmlu/Medical Genetics": 0.62, "helm_mmlu/Miscellaneous": 0.688, "helm_mmlu/Moral Scenarios": 0.231, "helm_mmlu/Nutrition": 0.627, "helm_mmlu/Prehistory": 0.605, "helm_mmlu/Public Relations": 0.673, "helm_mmlu/Security Studies": 0.702, "helm_mmlu/Sociology": 0.816, "helm_mmlu/Virology": 0.47, "helm_mmlu/World Religions": 0.702, "helm_mmlu/Mean win rate": 0.824, "hfopenllm_v2/IFEval": 0.2739, "hfopenllm_v2/BBH": 0.4881, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.4099, "hfopenllm_v2/MMLU-PRO": 0.2628 } }, { "id": "microsoft/Phi-3-medium-128k-instruct", "name": "Phi-3-medium-128k-instruct", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.604, "hfopenllm_v2/BBH": 0.6382, "hfopenllm_v2/MATH Level 5": 0.1918, "hfopenllm_v2/GPQA": 0.3364, "hfopenllm_v2/MUSR": 0.4129, "hfopenllm_v2/MMLU-PRO": 0.4712 } }, { "id": "microsoft/phi-3-medium-4k-instruct", "name": "Phi-3 14B", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.509, "helm_lite/NarrativeQA": 0.724, "helm_lite/NaturalQuestions (closed-book)": 0.278, "helm_lite/OpenbookQA": 0.916, "helm_lite/MMLU": 0.675, "helm_lite/MATH": 0.611, "helm_lite/GSM8K": 0.878, "helm_lite/LegalBench": 0.593, "helm_lite/MedQA": 0.696, "helm_lite/WMT 2014": 0.17, "helm_mmlu/MMLU All Subjects": 0.775, "helm_mmlu/Abstract Algebra": 0.5, "helm_mmlu/Anatomy": 0.719, "helm_mmlu/College Physics": 0.529, "helm_mmlu/Computer Security": 0.79, "helm_mmlu/Econometrics": 0.614, "helm_mmlu/Global Facts": 0.5, "helm_mmlu/Jurisprudence": 0.88, "helm_mmlu/Philosophy": 0.804, "helm_mmlu/Professional Psychology": 0.835, "helm_mmlu/Us Foreign Policy": 0.95, "helm_mmlu/Astronomy": 0.849, "helm_mmlu/Business Ethics": 0.8, "helm_mmlu/Clinical Knowledge": 0.826, "helm_mmlu/Conceptual Physics": 0.809, "helm_mmlu/Electrical Engineering": 0.683, "helm_mmlu/Elementary Mathematics": 0.709, "helm_mmlu/Formal Logic": 0.587, "helm_mmlu/High School World History": 0.903, "helm_mmlu/Human Sexuality": 0.863, "helm_mmlu/International Law": 0.934, "helm_mmlu/Logical Fallacies": 0.828, "helm_mmlu/Machine Learning": 0.696, "helm_mmlu/Management": 0.864, "helm_mmlu/Marketing": 0.919, "helm_mmlu/Medical Genetics": 0.91, "helm_mmlu/Miscellaneous": 0.894, "helm_mmlu/Moral Scenarios": 0.639, "helm_mmlu/Nutrition": 0.837, "helm_mmlu/Prehistory": 0.867, "helm_mmlu/Public Relations": 0.755, "helm_mmlu/Security Studies": 0.829, "helm_mmlu/Sociology": 0.891, "helm_mmlu/Virology": 0.554, "helm_mmlu/World Religions": 0.865, "helm_mmlu/Mean win rate": 0.015, "hfopenllm_v2/IFEval": 0.6423, "hfopenllm_v2/BBH": 0.6412, "hfopenllm_v2/MATH Level 5": 0.1956, "hfopenllm_v2/GPQA": 0.3364, "hfopenllm_v2/MUSR": 0.4258, "hfopenllm_v2/MMLU-PRO": 0.4676 } }, { "id": "microsoft/Phi-3-mini-128k-instruct", "name": "Phi-3-mini-128k-instruct", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5976, "hfopenllm_v2/BBH": 0.5575, "hfopenllm_v2/MATH Level 5": 0.1405, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.3937, "hfopenllm_v2/MMLU-PRO": 0.3734 } }, { "id": "microsoft/Phi-3-mini-4k-instruct", "name": "Phi-3-mini-4k-instruct", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5613, "hfopenllm_v2/BBH": 0.5676, "hfopenllm_v2/MATH Level 5": 0.1163, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.395, "hfopenllm_v2/MMLU-PRO": 0.3866 } }, { "id": "microsoft/Phi-3-small-128k-instruct", "name": "Phi-3-small-128k-instruct", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6368, "hfopenllm_v2/BBH": 0.6202, "hfopenllm_v2/MATH Level 5": 0.2026, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4378, "hfopenllm_v2/MMLU-PRO": 0.4491 } }, { "id": "microsoft/phi-3-small-8k-instruct", "name": "Phi-3 7B", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.473, "helm_lite/NarrativeQA": 0.754, "helm_lite/NaturalQuestions (closed-book)": 0.324, "helm_lite/OpenbookQA": 0.912, "helm_lite/MMLU": 0.659, "helm_lite/MATH": 0.703, "helm_lite/GSM8K": -1.0, "helm_lite/LegalBench": 0.584, "helm_lite/MedQA": 0.672, "helm_lite/WMT 2014": 0.154, "helm_mmlu/MMLU All Subjects": 0.757, "helm_mmlu/Abstract Algebra": 0.44, "helm_mmlu/Anatomy": 0.726, "helm_mmlu/College Physics": 0.559, "helm_mmlu/Computer Security": 0.77, "helm_mmlu/Econometrics": 0.596, "helm_mmlu/Global Facts": 0.52, "helm_mmlu/Jurisprudence": 0.843, "helm_mmlu/Philosophy": 0.82, "helm_mmlu/Professional Psychology": 0.835, "helm_mmlu/Us Foreign Policy": 0.95, "helm_mmlu/Astronomy": 0.849, "helm_mmlu/Business Ethics": 0.77, "helm_mmlu/Clinical Knowledge": 0.83, "helm_mmlu/Conceptual Physics": 0.779, "helm_mmlu/Electrical Engineering": 0.69, "helm_mmlu/Elementary Mathematics": 0.619, "helm_mmlu/Formal Logic": 0.595, "helm_mmlu/High School World History": 0.848, "helm_mmlu/Human Sexuality": 0.817, "helm_mmlu/International Law": 0.851, "helm_mmlu/Logical Fallacies": 0.81, "helm_mmlu/Machine Learning": 0.652, "helm_mmlu/Management": 0.903, "helm_mmlu/Marketing": 0.897, "helm_mmlu/Medical Genetics": 0.84, "helm_mmlu/Miscellaneous": 0.871, "helm_mmlu/Moral Scenarios": 0.711, "helm_mmlu/Nutrition": 0.833, "helm_mmlu/Prehistory": 0.858, "helm_mmlu/Public Relations": 0.727, "helm_mmlu/Security Studies": 0.804, "helm_mmlu/Sociology": 0.886, "helm_mmlu/Virology": 0.548, "helm_mmlu/World Religions": 0.825, "helm_mmlu/Mean win rate": 0.708, "hfopenllm_v2/IFEval": 0.6497, "hfopenllm_v2/BBH": 0.6208, "hfopenllm_v2/MATH Level 5": 0.1887, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4558, "hfopenllm_v2/MMLU-PRO": 0.4506 } }, { "id": "microsoft/Phi-3.5-mini-instruct", "name": "Phi-3.5-mini-instruct", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5775, "hfopenllm_v2/BBH": 0.5518, "hfopenllm_v2/MATH Level 5": 0.1964, "hfopenllm_v2/GPQA": 0.3398, "hfopenllm_v2/MUSR": 0.4021, "hfopenllm_v2/MMLU-PRO": 0.3962 } }, { "id": "microsoft/Phi-3.5-MoE-instruct", "name": "Phi-3.5-MoE-instruct", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6925, "hfopenllm_v2/BBH": 0.6408, "hfopenllm_v2/MATH Level 5": 0.3119, "hfopenllm_v2/GPQA": 0.3557, "hfopenllm_v2/MUSR": 0.4565, "hfopenllm_v2/MMLU-PRO": 0.4658 } }, { "id": "microsoft/phi-4", "name": "phi-4", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0585, "hfopenllm_v2/BBH": 0.6691, "hfopenllm_v2/MATH Level 5": 0.3165, "hfopenllm_v2/GPQA": 0.406, "hfopenllm_v2/MUSR": 0.5034, "hfopenllm_v2/MMLU-PRO": 0.5287 } }, { "id": "microsoft/Phi-4-mini-instruct", "name": "Phi-4-mini-instruct", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7378, "hfopenllm_v2/BBH": 0.5689, "hfopenllm_v2/MATH Level 5": 0.1699, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.3873, "hfopenllm_v2/MMLU-PRO": 0.3932 } }, { "id": "microsoft/phi-4-prompt", "name": "Phi-4 (Prompt)", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 70.0, "bfcl/bfcl.overall.overall_accuracy": 28.79, "bfcl/bfcl.overall.total_cost_usd": 8.72, "bfcl/bfcl.overall.latency_mean_s": 9.49, "bfcl/bfcl.overall.latency_std_s": 26.73, "bfcl/bfcl.overall.latency_p95_s": 23.02, "bfcl/bfcl.non_live.ast_accuracy": 69.56, "bfcl/bfcl.non_live.simple_ast_accuracy": 74.25, "bfcl/bfcl.non_live.multiple_ast_accuracy": 89.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 65.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 49.5, "bfcl/bfcl.live.live_accuracy": 60.7, "bfcl/bfcl.live.live_simple_ast_accuracy": 65.5, "bfcl/bfcl.live.live_multiple_ast_accuracy": 59.64, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 41.67, "bfcl/bfcl.multi_turn.accuracy": 3.88, "bfcl/bfcl.multi_turn.base_accuracy": 9.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 3.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 3.0, "bfcl/bfcl.web_search.accuracy": 4.5, "bfcl/bfcl.web_search.base_accuracy": 4.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 5.0, "bfcl/bfcl.memory.accuracy": 24.73, "bfcl/bfcl.memory.kv_accuracy": 17.42, "bfcl/bfcl.memory.vector_accuracy": 25.16, "bfcl/bfcl.memory.recursive_summarization_accuracy": 31.61, "bfcl/bfcl.relevance.relevance_detection_accuracy": 50.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 87.55, "bfcl/bfcl.format_sensitivity.max_delta": 81.5, "bfcl/bfcl.format_sensitivity.stddev": 23.34 } }, { "id": "microsoft/TNLG-v2-530B", "name": "TNLG v2 530B", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.787, "helm_classic/MMLU": 0.469, "helm_classic/BoolQ": 0.809, "helm_classic/NarrativeQA": 0.722, "helm_classic/NaturalQuestions (open-book)": 0.642, "helm_classic/QuAC": 0.39, "helm_classic/HellaSwag": 0.799, "helm_classic/OpenbookQA": 0.562, "helm_classic/TruthfulQA": 0.251, "helm_classic/MS MARCO (TREC)": 0.643, "helm_classic/CNN/DailyMail": 0.161, "helm_classic/XSUM": 0.169, "helm_classic/IMDB": 0.941, "helm_classic/CivilComments": 0.601, "helm_classic/RAFT": 0.679 } }, { "id": "microsoft/TNLG-v2-6.7B", "name": "TNLG v2 6.7B", "developer": "microsoft", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.309, "helm_classic/MMLU": 0.242, "helm_classic/BoolQ": 0.698, "helm_classic/NarrativeQA": 0.631, "helm_classic/NaturalQuestions (open-book)": 0.561, "helm_classic/QuAC": 0.345, "helm_classic/HellaSwag": 0.704, "helm_classic/OpenbookQA": 0.478, "helm_classic/TruthfulQA": 0.167, "helm_classic/MS MARCO (TREC)": 0.332, "helm_classic/CNN/DailyMail": 0.146, "helm_classic/XSUM": 0.11, "helm_classic/IMDB": 0.927, "helm_classic/CivilComments": 0.532, "helm_classic/RAFT": 0.525 } }, { "id": "mightbe/Better-PairRM", "name": "mightbe/Better-PairRM", "developer": "mightbe", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.673, "reward-bench/Chat": 0.9553, "reward-bench/Chat Hard": 0.3925, "reward-bench/Safety": 0.8203, "reward-bench/Reasoning": 0.4983, "reward-bench/Prior Sets (0.5 weight)": 0.724 } }, { "id": "migtissera/Llama-3-70B-Synthia-v3.5", "name": "Llama-3-70B-Synthia-v3.5", "developer": "migtissera", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6076, "hfopenllm_v2/BBH": 0.6489, "hfopenllm_v2/MATH Level 5": 0.2115, "hfopenllm_v2/GPQA": 0.3876, "hfopenllm_v2/MUSR": 0.4922, "hfopenllm_v2/MMLU-PRO": 0.4658 } }, { "id": "migtissera/Llama-3-8B-Synthia-v3.5", "name": "Llama-3-8B-Synthia-v3.5", "developer": "migtissera", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.507, "hfopenllm_v2/BBH": 0.4888, "hfopenllm_v2/MATH Level 5": 0.0657, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.4044, "hfopenllm_v2/MMLU-PRO": 0.303 } }, { "id": "migtissera/Tess-3-7B-SFT", "name": "Tess-3-7B-SFT", "developer": "migtissera", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3946, "hfopenllm_v2/BBH": 0.4607, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.4113, "hfopenllm_v2/MMLU-PRO": 0.3034 } }, { "id": "migtissera/Tess-3-Mistral-Nemo-12B", "name": "Tess-3-Mistral-Nemo-12B", "developer": "migtissera", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3355, "hfopenllm_v2/BBH": 0.4899, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.4458, "hfopenllm_v2/MMLU-PRO": 0.2565 } }, { "id": "migtissera/Tess-v2.5-Phi-3-medium-128k-14B", "name": "Tess-v2.5-Phi-3-medium-128k-14B", "developer": "migtissera", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4539, "hfopenllm_v2/BBH": 0.6207, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4113, "hfopenllm_v2/MMLU-PRO": 0.3732 } }, { "id": "migtissera/Tess-v2.5.2-Qwen2-72B", "name": "Tess-v2.5.2-Qwen2-72B", "developer": "migtissera", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4494, "hfopenllm_v2/BBH": 0.6647, "hfopenllm_v2/MATH Level 5": 0.2938, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.4188, "hfopenllm_v2/MMLU-PRO": 0.5561 } }, { "id": "migtissera/Trinity-2-Codestral-22B", "name": "Trinity-2-Codestral-22B", "developer": "migtissera", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4202, "hfopenllm_v2/BBH": 0.5593, "hfopenllm_v2/MATH Level 5": 0.0967, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4111, "hfopenllm_v2/MMLU-PRO": 0.3308 } }, { "id": "migtissera/Trinity-2-Codestral-22B-v0.2", "name": "Trinity-2-Codestral-22B-v0.2", "developer": "migtissera", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.443, "hfopenllm_v2/BBH": 0.5706, "hfopenllm_v2/MATH Level 5": 0.0869, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4031, "hfopenllm_v2/MMLU-PRO": 0.3354 } }, { "id": "Minami-su/Amara-o1-7B-Qwen", "name": "Amara-o1-7B-Qwen", "developer": "Minami-su", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.739, "hfopenllm_v2/BBH": 0.5199, "hfopenllm_v2/MATH Level 5": 0.5181, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4007, "hfopenllm_v2/MMLU-PRO": 0.4083 } }, { "id": "Minami-su/Amara-o2-7B-Qwen", "name": "Amara-o2-7B-Qwen", "developer": "Minami-su", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7147, "hfopenllm_v2/BBH": 0.5173, "hfopenllm_v2/MATH Level 5": 0.4086, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3781, "hfopenllm_v2/MMLU-PRO": 0.4165 } }, { "id": "Minami-su/test-7B-00", "name": "test-7B-00", "developer": "Minami-su", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.669, "hfopenllm_v2/BBH": 0.4466, "hfopenllm_v2/MATH Level 5": 0.4517, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4126, "hfopenllm_v2/MMLU-PRO": 0.3588 } }, { "id": "Minami-su/test-7B-01", "name": "test-7B-01", "developer": "Minami-su", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6736, "hfopenllm_v2/BBH": 0.4422, "hfopenllm_v2/MATH Level 5": 0.4554, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4153, "hfopenllm_v2/MMLU-PRO": 0.3536 } }, { "id": "Minami-su/test-v2-7B-00", "name": "test-v2-7B-00", "developer": "Minami-su", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6747, "hfopenllm_v2/BBH": 0.4416, "hfopenllm_v2/MATH Level 5": 0.4418, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4154, "hfopenllm_v2/MMLU-PRO": 0.3472 } }, { "id": "mindw96/DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3", "name": "DeepSeek-llama3.3-Bllossom-8B-DACON-LLM3", "developer": "mindw96", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1388, "hfopenllm_v2/BBH": 0.3068, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.3792, "hfopenllm_v2/MMLU-PRO": 0.1106 } }, { "id": "minghaowu/Qwen1.5-1.8B-OpenHermes-2.5", "name": "Qwen1.5-1.8B-OpenHermes-2.5", "developer": "minghaowu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2778, "hfopenllm_v2/BBH": 0.3375, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3529, "hfopenllm_v2/MMLU-PRO": 0.1792 } }, { "id": "minimax/Minimax-2.5", "name": "Minimax-2.5", "developer": "minimax", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Corporate Lawyer Mean Score": 0.339 } }, { "id": "minimax/minimax-m2", "name": "MiniMax M2", "developer": "MiniMax", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 30.0 } }, { "id": "minimax/minimax-m2.1", "name": "MiniMax M2.1", "developer": "MiniMax", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 29.2 } }, { "id": "minimax/minimax-m2.5", "name": "Minimax m2.5", "developer": "Minimax", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 42.2 } }, { "id": "ministral/Ministral-3b-instruct", "name": "Ministral-3b-instruct", "developer": "ministral", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1358, "hfopenllm_v2/BBH": 0.3192, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3382, "hfopenllm_v2/MMLU-PRO": 0.1093 } }, { "id": "mistral-community/Mistral-7B-v0.2", "name": "Mistral-7B-v0.2", "developer": "mistral-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2266, "hfopenllm_v2/BBH": 0.451, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4032, "hfopenllm_v2/MMLU-PRO": 0.2953 } }, { "id": "mistral-community/Mixtral-8x22B-v0.1", "name": "Mixtral-8x22B-v0.1", "developer": "mistral-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3167, "hfopenllm_v2/BBH": 0.38, "hfopenllm_v2/MATH Level 5": 0.1543, "hfopenllm_v2/GPQA": 0.33, "hfopenllm_v2/MUSR": 0.3533, "hfopenllm_v2/MMLU-PRO": 0.36 } }, { "id": "mistral-community/mixtral-8x22B-v0.3", "name": "mixtral-8x22B-v0.3", "developer": "mistral-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2583, "hfopenllm_v2/BBH": 0.625, "hfopenllm_v2/MATH Level 5": 0.1835, "hfopenllm_v2/GPQA": 0.3775, "hfopenllm_v2/MUSR": 0.4037, "hfopenllm_v2/MMLU-PRO": 0.4639 } }, { "id": "mistralai/Codestral-22B-v0.1", "name": "Codestral-22B-v0.1", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5772, "hfopenllm_v2/BBH": 0.5139, "hfopenllm_v2/MATH Level 5": 0.1005, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4187, "hfopenllm_v2/MMLU-PRO": 0.3156 } }, { "id": "mistralai/Ministral-8B-Instruct-2410", "name": "Ministral-8B-Instruct-2410", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5896, "hfopenllm_v2/BBH": 0.4762, "hfopenllm_v2/MATH Level 5": 0.1956, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4138, "hfopenllm_v2/MMLU-PRO": 0.3291 } }, { "id": "mistralai/ministral-8b-instruct-2410-fc", "name": "Ministral-8B-Instruct-2410 (FC)", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 105.0, "bfcl/bfcl.overall.overall_accuracy": 11.1, "bfcl/bfcl.overall.total_cost_usd": 70.01, "bfcl/bfcl.overall.latency_mean_s": 82.07, "bfcl/bfcl.overall.latency_std_s": 212.99, "bfcl/bfcl.overall.latency_p95_s": 568.59, "bfcl/bfcl.non_live.ast_accuracy": 0.0, "bfcl/bfcl.non_live.simple_ast_accuracy": 0.0, "bfcl/bfcl.non_live.multiple_ast_accuracy": 0.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 0.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 0.0, "bfcl/bfcl.live.live_accuracy": 0.0, "bfcl/bfcl.live.live_simple_ast_accuracy": 0.0, "bfcl/bfcl.live.live_multiple_ast_accuracy": 0.0, "bfcl/bfcl.live.live_parallel_ast_accuracy": 0.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 0.0, "bfcl/bfcl.multi_turn.accuracy": 0.0, "bfcl/bfcl.multi_turn.base_accuracy": 0.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 0.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 0.0, "bfcl/bfcl.web_search.accuracy": 1.0, "bfcl/bfcl.web_search.base_accuracy": 2.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 4.52, "bfcl/bfcl.memory.kv_accuracy": 3.87, "bfcl/bfcl.memory.vector_accuracy": 7.1, "bfcl/bfcl.memory.recursive_summarization_accuracy": 2.58, "bfcl/bfcl.relevance.relevance_detection_accuracy": 0.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 100.0, "bfcl/bfcl.format_sensitivity.max_delta": 0.0, "bfcl/bfcl.format_sensitivity.stddev": 0.0 } }, { "id": "mistralai/Mistral-7B-Instruct-v0.1", "name": "Mistral-7B-Instruct-v0.1", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4487, "hfopenllm_v2/BBH": 0.3355, "hfopenllm_v2/MATH Level 5": 0.0227, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3848, "hfopenllm_v2/MMLU-PRO": 0.2414 } }, { "id": "mistralai/Mistral-7B-Instruct-v0.2", "name": "Mistral-7B-Instruct-v0.2", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5496, "hfopenllm_v2/BBH": 0.446, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3966, "hfopenllm_v2/MMLU-PRO": 0.2717 } }, { "id": "mistralai/mistral-7b-instruct-v0.3", "name": "Mistral Instruct v0.3 7B", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.376, "helm_capabilities/MMLU-Pro": 0.277, "helm_capabilities/GPQA": 0.303, "helm_capabilities/IFEval": 0.567, "helm_capabilities/WildBench": 0.66, "helm_capabilities/Omni-MATH": 0.072, "helm_lite/Mean win rate": 0.196, "helm_lite/NarrativeQA": 0.716, "helm_lite/NaturalQuestions (closed-book)": 0.253, "helm_lite/OpenbookQA": 0.79, "helm_lite/MMLU": 0.51, "helm_lite/MATH": 0.289, "helm_lite/GSM8K": 0.538, "helm_lite/LegalBench": 0.331, "helm_lite/MedQA": 0.517, "helm_lite/WMT 2014": 0.142, "helm_mmlu/MMLU All Subjects": 0.599, "helm_mmlu/Abstract Algebra": 0.27, "helm_mmlu/Anatomy": 0.585, "helm_mmlu/College Physics": 0.343, "helm_mmlu/Computer Security": 0.7, "helm_mmlu/Econometrics": 0.421, "helm_mmlu/Global Facts": 0.33, "helm_mmlu/Jurisprudence": 0.713, "helm_mmlu/Philosophy": 0.659, "helm_mmlu/Professional Psychology": 0.641, "helm_mmlu/Us Foreign Policy": 0.79, "helm_mmlu/Astronomy": 0.638, "helm_mmlu/Business Ethics": 0.57, "helm_mmlu/Clinical Knowledge": 0.687, "helm_mmlu/Conceptual Physics": 0.549, "helm_mmlu/Electrical Engineering": 0.572, "helm_mmlu/Elementary Mathematics": 0.402, "helm_mmlu/Formal Logic": 0.397, "helm_mmlu/High School World History": 0.759, "helm_mmlu/Human Sexuality": 0.702, "helm_mmlu/International Law": 0.76, "helm_mmlu/Logical Fallacies": 0.712, "helm_mmlu/Machine Learning": 0.455, "helm_mmlu/Management": 0.767, "helm_mmlu/Marketing": 0.842, "helm_mmlu/Medical Genetics": 0.75, "helm_mmlu/Miscellaneous": 0.785, "helm_mmlu/Moral Scenarios": 0.393, "helm_mmlu/Nutrition": 0.676, "helm_mmlu/Prehistory": 0.673, "helm_mmlu/Public Relations": 0.636, "helm_mmlu/Security Studies": 0.682, "helm_mmlu/Sociology": 0.806, "helm_mmlu/Virology": 0.47, "helm_mmlu/World Religions": 0.825, "helm_mmlu/Mean win rate": 0.509, "hfopenllm_v2/IFEval": 0.5465, "hfopenllm_v2/BBH": 0.4722, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3739, "hfopenllm_v2/MMLU-PRO": 0.3075 } }, { "id": "mistralai/mistral-7b-v0.1", "name": "Mistral v0.1 7B", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.292, "helm_lite/NarrativeQA": 0.716, "helm_lite/NaturalQuestions (closed-book)": 0.367, "helm_lite/OpenbookQA": 0.776, "helm_lite/MMLU": 0.584, "helm_lite/MATH": 0.297, "helm_lite/GSM8K": 0.377, "helm_lite/LegalBench": 0.58, "helm_lite/MedQA": 0.525, "helm_lite/WMT 2014": 0.16, "helm_mmlu/MMLU All Subjects": 0.566, "helm_mmlu/Abstract Algebra": 0.25, "helm_mmlu/Anatomy": 0.467, "helm_mmlu/College Physics": 0.314, "helm_mmlu/Computer Security": 0.69, "helm_mmlu/Econometrics": 0.351, "helm_mmlu/Global Facts": 0.29, "helm_mmlu/Jurisprudence": 0.667, "helm_mmlu/Philosophy": 0.63, "helm_mmlu/Professional Psychology": 0.578, "helm_mmlu/Us Foreign Policy": 0.79, "helm_mmlu/Astronomy": 0.599, "helm_mmlu/Business Ethics": 0.56, "helm_mmlu/Clinical Knowledge": 0.653, "helm_mmlu/Conceptual Physics": 0.451, "helm_mmlu/Electrical Engineering": 0.538, "helm_mmlu/Elementary Mathematics": 0.32, "helm_mmlu/Formal Logic": 0.365, "helm_mmlu/High School World History": 0.726, "helm_mmlu/Human Sexuality": 0.702, "helm_mmlu/International Law": 0.76, "helm_mmlu/Logical Fallacies": 0.693, "helm_mmlu/Machine Learning": 0.438, "helm_mmlu/Management": 0.709, "helm_mmlu/Marketing": 0.833, "helm_mmlu/Medical Genetics": 0.68, "helm_mmlu/Miscellaneous": 0.72, "helm_mmlu/Moral Scenarios": 0.33, "helm_mmlu/Nutrition": 0.657, "helm_mmlu/Prehistory": 0.642, "helm_mmlu/Public Relations": 0.6, "helm_mmlu/Security Studies": 0.731, "helm_mmlu/Sociology": 0.831, "helm_mmlu/Virology": 0.44, "helm_mmlu/World Religions": 0.789, "helm_mmlu/Mean win rate": 0.213, "hfopenllm_v2/IFEval": 0.2386, "hfopenllm_v2/BBH": 0.4419, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4139, "hfopenllm_v2/MMLU-PRO": 0.3013 } }, { "id": "mistralai/Mistral-7B-v0.3", "name": "Mistral-7B-v0.3", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2266, "hfopenllm_v2/BBH": 0.4517, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4032, "hfopenllm_v2/MMLU-PRO": 0.2953 } }, { "id": "mistralai/mistral-large-2402", "name": "Mistral Large 2402", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.328, "helm_lite/NarrativeQA": 0.454, "helm_lite/NaturalQuestions (closed-book)": 0.311, "helm_lite/OpenbookQA": 0.894, "helm_lite/MMLU": 0.638, "helm_lite/MATH": 0.75, "helm_lite/GSM8K": 0.694, "helm_lite/LegalBench": 0.479, "helm_lite/MedQA": 0.499, "helm_lite/WMT 2014": 0.182, "helm_mmlu/MMLU All Subjects": 0.688, "helm_mmlu/Abstract Algebra": 0.45, "helm_mmlu/Anatomy": 0.674, "helm_mmlu/College Physics": 0.373, "helm_mmlu/Computer Security": 0.8, "helm_mmlu/Econometrics": 0.64, "helm_mmlu/Global Facts": 0.34, "helm_mmlu/Jurisprudence": 0.815, "helm_mmlu/Philosophy": 0.794, "helm_mmlu/Professional Psychology": 0.809, "helm_mmlu/Us Foreign Policy": 0.92, "helm_mmlu/Astronomy": 0.842, "helm_mmlu/Business Ethics": 0.67, "helm_mmlu/Clinical Knowledge": 0.751, "helm_mmlu/Conceptual Physics": 0.574, "helm_mmlu/Electrical Engineering": 0.545, "helm_mmlu/Elementary Mathematics": 0.508, "helm_mmlu/Formal Logic": 0.532, "helm_mmlu/High School World History": 0.886, "helm_mmlu/Human Sexuality": 0.847, "helm_mmlu/International Law": 0.868, "helm_mmlu/Logical Fallacies": 0.81, "helm_mmlu/Machine Learning": 0.562, "helm_mmlu/Management": 0.854, "helm_mmlu/Marketing": 0.897, "helm_mmlu/Medical Genetics": 0.74, "helm_mmlu/Miscellaneous": 0.9, "helm_mmlu/Moral Scenarios": 0.579, "helm_mmlu/Nutrition": 0.791, "helm_mmlu/Prehistory": 0.904, "helm_mmlu/Public Relations": 0.709, "helm_mmlu/Security Studies": 0.824, "helm_mmlu/Sociology": 0.93, "helm_mmlu/Virology": 0.554, "helm_mmlu/World Religions": 0.883, "helm_mmlu/Mean win rate": 0.464 } }, { "id": "mistralai/mistral-large-2407", "name": "Mistral Large 2 2407", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.744, "helm_lite/NarrativeQA": 0.779, "helm_lite/NaturalQuestions (closed-book)": 0.453, "helm_lite/OpenbookQA": 0.932, "helm_lite/MMLU": 0.725, "helm_lite/MATH": 0.677, "helm_lite/GSM8K": 0.912, "helm_lite/LegalBench": 0.646, "helm_lite/MedQA": 0.775, "helm_lite/WMT 2014": 0.192, "helm_mmlu/MMLU All Subjects": 0.8, "helm_mmlu/Abstract Algebra": 0.7, "helm_mmlu/Anatomy": 0.785, "helm_mmlu/College Physics": 0.559, "helm_mmlu/Computer Security": 0.81, "helm_mmlu/Econometrics": 0.693, "helm_mmlu/Global Facts": 0.56, "helm_mmlu/Jurisprudence": 0.861, "helm_mmlu/Philosophy": 0.826, "helm_mmlu/Professional Psychology": 0.861, "helm_mmlu/Us Foreign Policy": 0.9, "helm_mmlu/Astronomy": 0.921, "helm_mmlu/Business Ethics": 0.79, "helm_mmlu/Clinical Knowledge": 0.864, "helm_mmlu/Conceptual Physics": 0.864, "helm_mmlu/Electrical Engineering": 0.793, "helm_mmlu/Elementary Mathematics": 0.799, "helm_mmlu/Formal Logic": 0.579, "helm_mmlu/High School World History": 0.92, "helm_mmlu/Human Sexuality": 0.924, "helm_mmlu/International Law": 0.926, "helm_mmlu/Logical Fallacies": 0.847, "helm_mmlu/Machine Learning": 0.661, "helm_mmlu/Management": 0.883, "helm_mmlu/Marketing": 0.94, "helm_mmlu/Medical Genetics": 0.9, "helm_mmlu/Miscellaneous": 0.936, "helm_mmlu/Moral Scenarios": 0.839, "helm_mmlu/Nutrition": 0.827, "helm_mmlu/Prehistory": 0.92, "helm_mmlu/Public Relations": 0.764, "helm_mmlu/Security Studies": 0.865, "helm_mmlu/Sociology": 0.91, "helm_mmlu/Virology": 0.59, "helm_mmlu/World Religions": 0.865, "helm_mmlu/Mean win rate": 0.24 } }, { "id": "mistralai/mistral-large-2411", "name": "Mistral Large 2411", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.598, "helm_capabilities/MMLU-Pro": 0.599, "helm_capabilities/GPQA": 0.435, "helm_capabilities/IFEval": 0.876, "helm_capabilities/WildBench": 0.801, "helm_capabilities/Omni-MATH": 0.281 } }, { "id": "mistralai/mistral-large-2411-fc", "name": "mistral-large-2411 (FC)", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 46.0, "bfcl/bfcl.overall.overall_accuracy": 38.37, "bfcl/bfcl.overall.total_cost_usd": 115.98, "bfcl/bfcl.overall.latency_mean_s": 2.04, "bfcl/bfcl.overall.latency_std_s": 4.02, "bfcl/bfcl.overall.latency_p95_s": 4.68, "bfcl/bfcl.non_live.ast_accuracy": 84.65, "bfcl/bfcl.non_live.simple_ast_accuracy": 72.08, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 89.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 83.5, "bfcl/bfcl.live.live_accuracy": 81.87, "bfcl/bfcl.live.live_simple_ast_accuracy": 87.21, "bfcl/bfcl.live.live_multiple_ast_accuracy": 80.72, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 75.0, "bfcl/bfcl.multi_turn.accuracy": 14.12, "bfcl/bfcl.multi_turn.base_accuracy": 18.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 11.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 13.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 13.5, "bfcl/bfcl.web_search.accuracy": 28.0, "bfcl/bfcl.web_search.base_accuracy": 41.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 15.0, "bfcl/bfcl.memory.accuracy": 24.95, "bfcl/bfcl.memory.kv_accuracy": 18.71, "bfcl/bfcl.memory.vector_accuracy": 29.03, "bfcl/bfcl.memory.recursive_summarization_accuracy": 27.1, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 68.92 } }, { "id": "mistralai/mistral-large-2411-prompt", "name": "mistral-large-2411 (Prompt)", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 63.0, "bfcl/bfcl.overall.overall_accuracy": 31.84, "bfcl/bfcl.overall.total_cost_usd": 232.42, "bfcl/bfcl.overall.latency_mean_s": 1.82, "bfcl/bfcl.overall.latency_std_s": 7.15, "bfcl/bfcl.overall.latency_p95_s": 4.08, "bfcl/bfcl.non_live.ast_accuracy": 83.0, "bfcl/bfcl.non_live.simple_ast_accuracy": 75.5, "bfcl/bfcl.non_live.multiple_ast_accuracy": 89.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 87.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 80.0, "bfcl/bfcl.live.live_accuracy": 68.1, "bfcl/bfcl.live.live_simple_ast_accuracy": 83.72, "bfcl/bfcl.live.live_multiple_ast_accuracy": 64.01, "bfcl/bfcl.live.live_parallel_ast_accuracy": 93.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 62.5, "bfcl/bfcl.multi_turn.accuracy": 13.75, "bfcl/bfcl.multi_turn.base_accuracy": 20.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 5.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 11.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 19.0, "bfcl/bfcl.web_search.accuracy": 20.0, "bfcl/bfcl.web_search.base_accuracy": 28.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 12.0, "bfcl/bfcl.memory.accuracy": 23.66, "bfcl/bfcl.memory.kv_accuracy": 16.77, "bfcl/bfcl.memory.vector_accuracy": 30.97, "bfcl/bfcl.memory.recursive_summarization_accuracy": 23.23, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 38.77, "bfcl/bfcl.format_sensitivity.max_delta": 13.5, "bfcl/bfcl.format_sensitivity.stddev": 3.91 } }, { "id": "mistralai/Mistral-Large-Instruct-2411", "name": "Mistral-Large-Instruct-2411", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8401, "hfopenllm_v2/BBH": 0.6747, "hfopenllm_v2/MATH Level 5": 0.4955, "hfopenllm_v2/GPQA": 0.4371, "hfopenllm_v2/MUSR": 0.454, "hfopenllm_v2/MMLU-PRO": 0.5562 } }, { "id": "mistralai/mistral-medium-2312", "name": "Mistral Medium 2312", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.268, "helm_lite/NarrativeQA": 0.449, "helm_lite/NaturalQuestions (closed-book)": 0.29, "helm_lite/OpenbookQA": 0.83, "helm_lite/MMLU": 0.618, "helm_lite/MATH": 0.565, "helm_lite/GSM8K": 0.706, "helm_lite/LegalBench": 0.452, "helm_lite/MedQA": 0.61, "helm_lite/WMT 2014": 0.169 } }, { "id": "mistralai/mistral-medium-2505", "name": "Mistral-Medium-2505", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 48.0, "bfcl/bfcl.overall.overall_accuracy": 37.69, "bfcl/bfcl.overall.total_cost_usd": 36.51, "bfcl/bfcl.overall.latency_mean_s": 1.21, "bfcl/bfcl.overall.latency_std_s": 3.5, "bfcl/bfcl.overall.latency_p95_s": 2.86, "bfcl/bfcl.non_live.ast_accuracy": 85.33, "bfcl/bfcl.non_live.simple_ast_accuracy": 76.33, "bfcl/bfcl.non_live.multiple_ast_accuracy": 91.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 88.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 85.5, "bfcl/bfcl.live.live_accuracy": 66.03, "bfcl/bfcl.live.live_simple_ast_accuracy": 80.23, "bfcl/bfcl.live.live_multiple_ast_accuracy": 62.39, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 62.5, "bfcl/bfcl.multi_turn.accuracy": 9.88, "bfcl/bfcl.multi_turn.base_accuracy": 13.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 6.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 6.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 13.5, "bfcl/bfcl.web_search.accuracy": 39.0, "bfcl/bfcl.web_search.base_accuracy": 41.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 37.0, "bfcl/bfcl.memory.accuracy": 21.72, "bfcl/bfcl.memory.kv_accuracy": 16.13, "bfcl/bfcl.memory.vector_accuracy": 14.84, "bfcl/bfcl.memory.recursive_summarization_accuracy": 34.19, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 74.49, "bfcl/bfcl.format_sensitivity.max_delta": 21.5, "bfcl/bfcl.format_sensitivity.stddev": 5.02 } }, { "id": "mistralai/mistral-medium-2505-fc", "name": "Mistral-Medium-2505 (FC)", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 49.0, "bfcl/bfcl.overall.overall_accuracy": 37.56, "bfcl/bfcl.overall.total_cost_usd": 18.8, "bfcl/bfcl.overall.latency_mean_s": 1.6, "bfcl/bfcl.overall.latency_std_s": 4.44, "bfcl/bfcl.overall.latency_p95_s": 4.19, "bfcl/bfcl.non_live.ast_accuracy": 67.44, "bfcl/bfcl.non_live.simple_ast_accuracy": 39.75, "bfcl/bfcl.non_live.multiple_ast_accuracy": 78.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 83.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 69.0, "bfcl/bfcl.live.live_accuracy": 67.95, "bfcl/bfcl.live.live_simple_ast_accuracy": 67.05, "bfcl/bfcl.live.live_multiple_ast_accuracy": 68.09, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 62.5, "bfcl/bfcl.multi_turn.accuracy": 10.75, "bfcl/bfcl.multi_turn.base_accuracy": 15.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 7.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 7.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 13.0, "bfcl/bfcl.web_search.accuracy": 35.0, "bfcl/bfcl.web_search.base_accuracy": 36.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 34.0, "bfcl/bfcl.memory.accuracy": 23.01, "bfcl/bfcl.memory.kv_accuracy": 15.48, "bfcl/bfcl.memory.vector_accuracy": 20.0, "bfcl/bfcl.memory.recursive_summarization_accuracy": 33.55, "bfcl/bfcl.relevance.relevance_detection_accuracy": 62.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 91.95 } }, { "id": "mistralai/mistral-medium-3", "name": "mistral-medium-3", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.5511, "global-mmlu-lite/Culturally Sensitive": 0.5391, "global-mmlu-lite/Culturally Agnostic": 0.5631, "global-mmlu-lite/Arabic": 0.455, "global-mmlu-lite/English": 0.38, "global-mmlu-lite/Bengali": 0.5175, "global-mmlu-lite/German": 0.4775, "global-mmlu-lite/French": 0.41, "global-mmlu-lite/Hindi": 0.555, "global-mmlu-lite/Indonesian": 0.515, "global-mmlu-lite/Italian": 0.535, "global-mmlu-lite/Japanese": 0.58, "global-mmlu-lite/Korean": 0.595, "global-mmlu-lite/Portuguese": 0.5175, "global-mmlu-lite/Spanish": 0.5375, "global-mmlu-lite/Swahili": 0.7075, "global-mmlu-lite/Yoruba": 0.7675, "global-mmlu-lite/Chinese": 0.535, "global-mmlu-lite/Burmese": 0.7325 } }, { "id": "mistralai/Mistral-Nemo-Base-2407", "name": "Mistral-Nemo-Base-2407", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.163, "hfopenllm_v2/BBH": 0.5035, "hfopenllm_v2/MATH Level 5": 0.0597, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.3921, "hfopenllm_v2/MMLU-PRO": 0.3472 } }, { "id": "mistralai/Mistral-Nemo-Instruct-2407", "name": "Mistral-Nemo-Instruct-2407", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.638, "hfopenllm_v2/BBH": 0.5037, "hfopenllm_v2/MATH Level 5": 0.1269, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.39, "hfopenllm_v2/MMLU-PRO": 0.3517 } }, { "id": "mistralai/mistral-small-2402", "name": "Mistral Small 2402", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.288, "helm_lite/NarrativeQA": 0.519, "helm_lite/NaturalQuestions (closed-book)": 0.304, "helm_lite/OpenbookQA": 0.862, "helm_lite/MMLU": 0.593, "helm_lite/MATH": 0.621, "helm_lite/GSM8K": 0.734, "helm_lite/LegalBench": 0.389, "helm_lite/MedQA": 0.616, "helm_lite/WMT 2014": 0.169, "helm_mmlu/MMLU All Subjects": 0.687, "helm_mmlu/Abstract Algebra": 0.26, "helm_mmlu/Anatomy": 0.674, "helm_mmlu/College Physics": 0.402, "helm_mmlu/Computer Security": 0.77, "helm_mmlu/Econometrics": 0.614, "helm_mmlu/Global Facts": 0.45, "helm_mmlu/Jurisprudence": 0.833, "helm_mmlu/Philosophy": 0.765, "helm_mmlu/Professional Psychology": 0.768, "helm_mmlu/Us Foreign Policy": 0.89, "helm_mmlu/Astronomy": 0.77, "helm_mmlu/Business Ethics": 0.71, "helm_mmlu/Clinical Knowledge": 0.766, "helm_mmlu/Conceptual Physics": 0.685, "helm_mmlu/Electrical Engineering": 0.628, "helm_mmlu/Elementary Mathematics": 0.415, "helm_mmlu/Formal Logic": 0.516, "helm_mmlu/High School World History": 0.857, "helm_mmlu/Human Sexuality": 0.824, "helm_mmlu/International Law": 0.826, "helm_mmlu/Logical Fallacies": 0.804, "helm_mmlu/Machine Learning": 0.562, "helm_mmlu/Management": 0.786, "helm_mmlu/Marketing": 0.906, "helm_mmlu/Medical Genetics": 0.75, "helm_mmlu/Miscellaneous": 0.844, "helm_mmlu/Moral Scenarios": 0.575, "helm_mmlu/Nutrition": 0.761, "helm_mmlu/Prehistory": 0.802, "helm_mmlu/Public Relations": 0.773, "helm_mmlu/Security Studies": 0.788, "helm_mmlu/Sociology": 0.871, "helm_mmlu/Virology": 0.542, "helm_mmlu/World Religions": 0.848, "helm_mmlu/Mean win rate": 0.54 } }, { "id": "mistralai/Mistral-Small-24B-Base-2501", "name": "Mistral-Small-24B-Base-2501", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1672, "hfopenllm_v2/BBH": 0.6442, "hfopenllm_v2/MATH Level 5": 0.1971, "hfopenllm_v2/GPQA": 0.3876, "hfopenllm_v2/MUSR": 0.4237, "hfopenllm_v2/MMLU-PRO": 0.5406 } }, { "id": "mistralai/mistral-small-2503", "name": "mistral-small-2503", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.7852, "global-mmlu-lite/Culturally Sensitive": 0.7537, "global-mmlu-lite/Culturally Agnostic": 0.8166, "global-mmlu-lite/Arabic": 0.7875, "global-mmlu-lite/English": 0.8, "global-mmlu-lite/Bengali": 0.7725, "global-mmlu-lite/German": 0.7975, "global-mmlu-lite/French": 0.8, "global-mmlu-lite/Hindi": 0.795, "global-mmlu-lite/Indonesian": 0.785, "global-mmlu-lite/Italian": 0.805, "global-mmlu-lite/Japanese": 0.77, "global-mmlu-lite/Korean": 0.79, "global-mmlu-lite/Portuguese": 0.7925, "global-mmlu-lite/Spanish": 0.7825, "global-mmlu-lite/Swahili": 0.775, "global-mmlu-lite/Yoruba": 0.735, "global-mmlu-lite/Chinese": 0.7925, "global-mmlu-lite/Burmese": 0.7825, "helm_capabilities/Mean score": 0.558, "helm_capabilities/MMLU-Pro": 0.61, "helm_capabilities/GPQA": 0.392, "helm_capabilities/IFEval": 0.75, "helm_capabilities/WildBench": 0.788, "helm_capabilities/Omni-MATH": 0.248 } }, { "id": "mistralai/mistral-small-2506-fc", "name": "Mistral-small-2506 (FC)", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 51.0, "bfcl/bfcl.overall.overall_accuracy": 37.15, "bfcl/bfcl.overall.total_cost_usd": 5.2, "bfcl/bfcl.overall.latency_mean_s": 1.48, "bfcl/bfcl.overall.latency_std_s": 18.25, "bfcl/bfcl.overall.latency_p95_s": 2.5, "bfcl/bfcl.non_live.ast_accuracy": 73.6, "bfcl/bfcl.non_live.simple_ast_accuracy": 38.92, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 83.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 78.5, "bfcl/bfcl.live.live_accuracy": 77.28, "bfcl/bfcl.live.live_simple_ast_accuracy": 69.38, "bfcl/bfcl.live.live_multiple_ast_accuracy": 79.39, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 11.5, "bfcl/bfcl.multi_turn.base_accuracy": 17.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 6.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 10.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 12.0, "bfcl/bfcl.web_search.accuracy": 31.0, "bfcl/bfcl.web_search.base_accuracy": 37.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 25.0, "bfcl/bfcl.memory.accuracy": 18.06, "bfcl/bfcl.memory.kv_accuracy": 8.39, "bfcl/bfcl.memory.vector_accuracy": 14.19, "bfcl/bfcl.memory.recursive_summarization_accuracy": 31.61, "bfcl/bfcl.relevance.relevance_detection_accuracy": 87.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 87.94 } }, { "id": "mistralai/mistral-small-2506-prompt", "name": "Mistral-Small-2506 (Prompt)", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 59.0, "bfcl/bfcl.overall.overall_accuracy": 32.38, "bfcl/bfcl.overall.total_cost_usd": 6.91, "bfcl/bfcl.overall.latency_mean_s": 0.92, "bfcl/bfcl.overall.latency_std_s": 6.79, "bfcl/bfcl.overall.latency_p95_s": 2.02, "bfcl/bfcl.non_live.ast_accuracy": 89.69, "bfcl/bfcl.non_live.simple_ast_accuracy": 78.75, "bfcl/bfcl.non_live.multiple_ast_accuracy": 96.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 92.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 91.5, "bfcl/bfcl.live.live_accuracy": 79.05, "bfcl/bfcl.live.live_simple_ast_accuracy": 81.4, "bfcl/bfcl.live.live_multiple_ast_accuracy": 78.54, "bfcl/bfcl.live.live_parallel_ast_accuracy": 93.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 66.67, "bfcl/bfcl.multi_turn.accuracy": 14.75, "bfcl/bfcl.multi_turn.base_accuracy": 20.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 17.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 9.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 12.0, "bfcl/bfcl.web_search.accuracy": 7.5, "bfcl/bfcl.web_search.base_accuracy": 9.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 6.0, "bfcl/bfcl.memory.accuracy": 15.05, "bfcl/bfcl.memory.kv_accuracy": 2.58, "bfcl/bfcl.memory.vector_accuracy": 11.61, "bfcl/bfcl.memory.recursive_summarization_accuracy": 30.97, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 65.73, "bfcl/bfcl.format_sensitivity.max_delta": 50.0, "bfcl/bfcl.format_sensitivity.stddev": 13.57 } }, { "id": "mistralai/Mistral-Small-Instruct-2409", "name": "Mistral-Small-Instruct-2409", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.667, "hfopenllm_v2/BBH": 0.5213, "hfopenllm_v2/MATH Level 5": 0.1435, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.3632, "hfopenllm_v2/MMLU-PRO": 0.396 } }, { "id": "mistralai/Mistral-v0.1-7B", "name": "Mistral v0.1 7B", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.884, "helm_classic/MMLU": 0.572, "helm_classic/BoolQ": 0.874, "helm_classic/NarrativeQA": 0.716, "helm_classic/NaturalQuestions (open-book)": 0.687, "helm_classic/QuAC": 0.423, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.422, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.962, "helm_classic/CivilComments": 0.624, "helm_classic/RAFT": 0.707 } }, { "id": "mistralai/mixtral-8x22b", "name": "Mixtral 8x22B", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.705, "helm_lite/NarrativeQA": 0.779, "helm_lite/NaturalQuestions (closed-book)": 0.478, "helm_lite/OpenbookQA": 0.882, "helm_lite/MMLU": 0.701, "helm_lite/MATH": 0.656, "helm_lite/GSM8K": 0.8, "helm_lite/LegalBench": 0.708, "helm_lite/MedQA": 0.704, "helm_lite/WMT 2014": 0.209, "helm_mmlu/MMLU All Subjects": 0.778, "helm_mmlu/Abstract Algebra": 0.48, "helm_mmlu/Anatomy": 0.741, "helm_mmlu/College Physics": 0.569, "helm_mmlu/Computer Security": 0.84, "helm_mmlu/Econometrics": 0.667, "helm_mmlu/Global Facts": 0.56, "helm_mmlu/Jurisprudence": 0.852, "helm_mmlu/Philosophy": 0.842, "helm_mmlu/Professional Psychology": 0.845, "helm_mmlu/Us Foreign Policy": 0.95, "helm_mmlu/Astronomy": 0.882, "helm_mmlu/Business Ethics": 0.74, "helm_mmlu/Clinical Knowledge": 0.819, "helm_mmlu/Conceptual Physics": 0.796, "helm_mmlu/Electrical Engineering": 0.766, "helm_mmlu/Elementary Mathematics": 0.622, "helm_mmlu/Formal Logic": 0.627, "helm_mmlu/High School World History": 0.895, "helm_mmlu/Human Sexuality": 0.885, "helm_mmlu/International Law": 0.917, "helm_mmlu/Logical Fallacies": 0.877, "helm_mmlu/Machine Learning": 0.661, "helm_mmlu/Management": 0.883, "helm_mmlu/Marketing": 0.915, "helm_mmlu/Medical Genetics": 0.85, "helm_mmlu/Miscellaneous": 0.899, "helm_mmlu/Moral Scenarios": 0.646, "helm_mmlu/Nutrition": 0.866, "helm_mmlu/Prehistory": 0.87, "helm_mmlu/Public Relations": 0.755, "helm_mmlu/Security Studies": 0.865, "helm_mmlu/Sociology": 0.92, "helm_mmlu/Virology": 0.596, "helm_mmlu/World Religions": 0.901, "helm_mmlu/Mean win rate": 0.598 } }, { "id": "mistralai/Mixtral-8x22B-Instruct-v0.1", "name": "Mixtral-8x22B-Instruct-v0.1", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.478, "helm_capabilities/MMLU-Pro": 0.46, "helm_capabilities/GPQA": 0.334, "helm_capabilities/IFEval": 0.724, "helm_capabilities/WildBench": 0.711, "helm_capabilities/Omni-MATH": 0.163, "hfopenllm_v2/IFEval": 0.7184, "hfopenllm_v2/BBH": 0.6125, "hfopenllm_v2/MATH Level 5": 0.1873, "hfopenllm_v2/GPQA": 0.3733, "hfopenllm_v2/MUSR": 0.4311, "hfopenllm_v2/MMLU-PRO": 0.4483 } }, { "id": "mistralai/Mixtral-8x22B-v0.1", "name": "Mixtral-8x22B-v0.1", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2583, "hfopenllm_v2/BBH": 0.624, "hfopenllm_v2/MATH Level 5": 0.1835, "hfopenllm_v2/GPQA": 0.3758, "hfopenllm_v2/MUSR": 0.4037, "hfopenllm_v2/MMLU-PRO": 0.4639 } }, { "id": "mistralai/mixtral-8x7b-32kseqlen", "name": "Mixtral 8x7B 32K seqlen", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.51, "helm_lite/NarrativeQA": 0.767, "helm_lite/NaturalQuestions (closed-book)": 0.427, "helm_lite/OpenbookQA": 0.868, "helm_lite/MMLU": 0.649, "helm_lite/MATH": 0.494, "helm_lite/GSM8K": 0.622, "helm_lite/LegalBench": 0.63, "helm_lite/MedQA": 0.652, "helm_lite/WMT 2014": 0.19, "helm_mmlu/MMLU All Subjects": 0.717, "helm_mmlu/Abstract Algebra": 0.38, "helm_mmlu/Anatomy": 0.696, "helm_mmlu/College Physics": 0.51, "helm_mmlu/Computer Security": 0.81, "helm_mmlu/Econometrics": 0.605, "helm_mmlu/Global Facts": 0.46, "helm_mmlu/Jurisprudence": 0.833, "helm_mmlu/Philosophy": 0.797, "helm_mmlu/Professional Psychology": 0.779, "helm_mmlu/Us Foreign Policy": 0.93, "helm_mmlu/Astronomy": 0.829, "helm_mmlu/Business Ethics": 0.72, "helm_mmlu/Clinical Knowledge": 0.785, "helm_mmlu/Conceptual Physics": 0.681, "helm_mmlu/Electrical Engineering": 0.676, "helm_mmlu/Elementary Mathematics": 0.476, "helm_mmlu/Formal Logic": 0.532, "helm_mmlu/High School World History": 0.886, "helm_mmlu/Human Sexuality": 0.87, "helm_mmlu/International Law": 0.86, "helm_mmlu/Logical Fallacies": 0.767, "helm_mmlu/Machine Learning": 0.509, "helm_mmlu/Management": 0.845, "helm_mmlu/Marketing": 0.923, "helm_mmlu/Medical Genetics": 0.76, "helm_mmlu/Miscellaneous": 0.881, "helm_mmlu/Moral Scenarios": 0.444, "helm_mmlu/Nutrition": 0.83, "helm_mmlu/Prehistory": 0.849, "helm_mmlu/Public Relations": 0.682, "helm_mmlu/Security Studies": 0.792, "helm_mmlu/Sociology": 0.871, "helm_mmlu/Virology": 0.506, "helm_mmlu/World Religions": 0.871, "helm_mmlu/Mean win rate": 0.689 } }, { "id": "mistralai/Mixtral-8x7B-Instruct-v0.1", "name": "Mixtral-8x7B-Instruct-v0.1", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.397, "helm_capabilities/MMLU-Pro": 0.335, "helm_capabilities/GPQA": 0.296, "helm_capabilities/IFEval": 0.575, "helm_capabilities/WildBench": 0.673, "helm_capabilities/Omni-MATH": 0.105, "hfopenllm_v2/IFEval": 0.5599, "hfopenllm_v2/BBH": 0.4962, "hfopenllm_v2/MATH Level 5": 0.0914, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4203, "hfopenllm_v2/MMLU-PRO": 0.3692, "reward-bench/Score": 0.7455, "reward-bench/Chat": 0.9497, "reward-bench/Chat Hard": 0.6404, "reward-bench/Safety": 0.7257, "reward-bench/Reasoning": 0.7872, "reward-bench/Prior Sets (0.5 weight)": 0.5033 } }, { "id": "mistralai/Mixtral-8x7B-v0.1", "name": "Mixtral-8x7B-v0.1", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2326, "hfopenllm_v2/BBH": 0.5098, "hfopenllm_v2/MATH Level 5": 0.0937, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4413, "hfopenllm_v2/MMLU-PRO": 0.3871 } }, { "id": "mistralai/open-mistral-nemo-2407", "name": "Mistral NeMo 2402", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.333, "helm_lite/NarrativeQA": 0.731, "helm_lite/NaturalQuestions (closed-book)": 0.265, "helm_lite/OpenbookQA": 0.822, "helm_lite/MMLU": 0.604, "helm_lite/MATH": 0.668, "helm_lite/GSM8K": 0.782, "helm_lite/LegalBench": 0.415, "helm_lite/MedQA": 0.59, "helm_lite/WMT 2014": 0.177, "helm_mmlu/MMLU All Subjects": 0.653, "helm_mmlu/Abstract Algebra": 0.29, "helm_mmlu/Anatomy": 0.607, "helm_mmlu/College Physics": 0.373, "helm_mmlu/Computer Security": 0.81, "helm_mmlu/Econometrics": 0.561, "helm_mmlu/Global Facts": 0.4, "helm_mmlu/Jurisprudence": 0.796, "helm_mmlu/Philosophy": 0.733, "helm_mmlu/Professional Psychology": 0.588, "helm_mmlu/Us Foreign Policy": 0.89, "helm_mmlu/Astronomy": 0.691, "helm_mmlu/Business Ethics": 0.49, "helm_mmlu/Clinical Knowledge": 0.736, "helm_mmlu/Conceptual Physics": 0.647, "helm_mmlu/Electrical Engineering": 0.531, "helm_mmlu/Elementary Mathematics": 0.439, "helm_mmlu/Formal Logic": 0.405, "helm_mmlu/High School World History": 0.848, "helm_mmlu/Human Sexuality": 0.702, "helm_mmlu/International Law": 0.769, "helm_mmlu/Logical Fallacies": 0.791, "helm_mmlu/Machine Learning": 0.402, "helm_mmlu/Management": 0.796, "helm_mmlu/Marketing": 0.889, "helm_mmlu/Medical Genetics": 0.78, "helm_mmlu/Miscellaneous": 0.861, "helm_mmlu/Moral Scenarios": 0.381, "helm_mmlu/Nutrition": 0.709, "helm_mmlu/Prehistory": 0.765, "helm_mmlu/Public Relations": 0.718, "helm_mmlu/Security Studies": 0.771, "helm_mmlu/Sociology": 0.726, "helm_mmlu/Virology": 0.56, "helm_mmlu/World Religions": 0.789, "helm_mmlu/Mean win rate": 0.215 } }, { "id": "mistralai/open-mistral-nemo-2407-fc", "name": "Open-Mistral-Nemo-2407 (FC)", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 78.0, "bfcl/bfcl.overall.overall_accuracy": 27.63, "bfcl/bfcl.overall.total_cost_usd": 8.12, "bfcl/bfcl.overall.latency_mean_s": 1.07, "bfcl/bfcl.overall.latency_std_s": 11.93, "bfcl/bfcl.overall.latency_p95_s": 1.39, "bfcl/bfcl.non_live.ast_accuracy": 82.81, "bfcl/bfcl.non_live.simple_ast_accuracy": 65.75, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 88.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 84.5, "bfcl/bfcl.live.live_accuracy": 73.8, "bfcl/bfcl.live.live_simple_ast_accuracy": 78.68, "bfcl/bfcl.live.live_multiple_ast_accuracy": 72.84, "bfcl/bfcl.live.live_parallel_ast_accuracy": 68.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 66.67, "bfcl/bfcl.multi_turn.accuracy": 7.75, "bfcl/bfcl.multi_turn.base_accuracy": 12.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 6.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 7.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 4.5, "bfcl/bfcl.web_search.accuracy": 7.0, "bfcl/bfcl.web_search.base_accuracy": 9.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 5.0, "bfcl/bfcl.memory.accuracy": 10.32, "bfcl/bfcl.memory.kv_accuracy": 8.39, "bfcl/bfcl.memory.vector_accuracy": 9.68, "bfcl/bfcl.memory.recursive_summarization_accuracy": 12.9, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 61.77 } }, { "id": "mistralai/open-mistral-nemo-2407-prompt", "name": "Open-Mistral-Nemo-2407 (Prompt)", "developer": "mistralai", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 102.0, "bfcl/bfcl.overall.overall_accuracy": 19.31, "bfcl/bfcl.overall.total_cost_usd": 13.8, "bfcl/bfcl.overall.latency_mean_s": 0.84, "bfcl/bfcl.overall.latency_std_s": 7.05, "bfcl/bfcl.overall.latency_p95_s": 1.32, "bfcl/bfcl.non_live.ast_accuracy": 88.46, "bfcl/bfcl.non_live.simple_ast_accuracy": 79.33, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 90.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 91.5, "bfcl/bfcl.live.live_accuracy": 73.95, "bfcl/bfcl.live.live_simple_ast_accuracy": 78.29, "bfcl/bfcl.live.live_multiple_ast_accuracy": 73.03, "bfcl/bfcl.live.live_parallel_ast_accuracy": 87.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 58.33, "bfcl/bfcl.multi_turn.accuracy": 0.75, "bfcl/bfcl.multi_turn.base_accuracy": 0.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 1.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 0.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 1.5, "bfcl/bfcl.web_search.accuracy": 2.5, "bfcl/bfcl.web_search.base_accuracy": 3.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 2.0, "bfcl/bfcl.memory.accuracy": 8.6, "bfcl/bfcl.memory.kv_accuracy": 9.68, "bfcl/bfcl.memory.vector_accuracy": 9.68, "bfcl/bfcl.memory.recursive_summarization_accuracy": 6.45, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 6.28, "bfcl/bfcl.format_sensitivity.max_delta": 14.5, "bfcl/bfcl.format_sensitivity.stddev": 4.6 } }, { "id": "mixtao/MixTAO-7Bx2-MoE-v8.1", "name": "MixTAO-7Bx2-MoE-v8.1", "developer": "mixtao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4162, "hfopenllm_v2/BBH": 0.5189, "hfopenllm_v2/MATH Level 5": 0.0906, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4463, "hfopenllm_v2/MMLU-PRO": 0.3123 } }, { "id": "mkurman/llama-3.2-MEDIT-3B-o1", "name": "llama-3.2-MEDIT-3B-o1", "developer": "mkurman", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4382, "hfopenllm_v2/BBH": 0.44, "hfopenllm_v2/MATH Level 5": 0.1307, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3565, "hfopenllm_v2/MMLU-PRO": 0.2741 } }, { "id": "mkurman/phi-4-MedIT-11B-exp-1", "name": "phi-4-MedIT-11B-exp-1", "developer": "mkurman", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5948, "hfopenllm_v2/BBH": 0.5414, "hfopenllm_v2/MATH Level 5": 0.0899, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.3848, "hfopenllm_v2/MMLU-PRO": 0.3825 } }, { "id": "mkurman/phi4-MedIT-10B-o1", "name": "phi4-MedIT-10B-o1", "developer": "mkurman", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3463, "hfopenllm_v2/BBH": 0.5198, "hfopenllm_v2/MATH Level 5": 0.1148, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.3968, "hfopenllm_v2/MMLU-PRO": 0.3507 } }, { "id": "mkxu/llama-3-8b-instruct-fpo", "name": "llama-3-8b-instruct-fpo", "developer": "mkxu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.679, "hfopenllm_v2/BBH": 0.4959, "hfopenllm_v2/MATH Level 5": 0.0733, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3658, "hfopenllm_v2/MMLU-PRO": 0.3605 } }, { "id": "mkxu/llama-3-8b-po1", "name": "llama-3-8b-po1", "developer": "mkxu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4081, "hfopenllm_v2/BBH": 0.4976, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.3804, "hfopenllm_v2/MMLU-PRO": 0.3562 } }, { "id": "mlabonne/AlphaMonarch-7B", "name": "AlphaMonarch-7B", "developer": "mlabonne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4939, "hfopenllm_v2/BBH": 0.4626, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.4121, "hfopenllm_v2/MMLU-PRO": 0.2473 } }, { "id": "mlabonne/Beyonder-4x7B-v3", "name": "Beyonder-4x7B-v3", "developer": "mlabonne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5608, "hfopenllm_v2/BBH": 0.4671, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4045, "hfopenllm_v2/MMLU-PRO": 0.2512 } }, { "id": "mlabonne/BigQwen2.5-52B-Instruct", "name": "BigQwen2.5-52B-Instruct", "developer": "mlabonne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7913, "hfopenllm_v2/BBH": 0.7121, "hfopenllm_v2/MATH Level 5": 0.5476, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4113, "hfopenllm_v2/MMLU-PRO": 0.5519 } }, { "id": "mlabonne/BigQwen2.5-Echo-47B-Instruct", "name": "BigQwen2.5-Echo-47B-Instruct", "developer": "mlabonne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7357, "hfopenllm_v2/BBH": 0.6125, "hfopenllm_v2/MATH Level 5": 0.4381, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4125, "hfopenllm_v2/MMLU-PRO": 0.4734 } }, { "id": "mlabonne/ChimeraLlama-3-8B-v2", "name": "ChimeraLlama-3-8B-v2", "developer": "mlabonne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4469, "hfopenllm_v2/BBH": 0.5046, "hfopenllm_v2/MATH Level 5": 0.0906, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.3791, "hfopenllm_v2/MMLU-PRO": 0.3569 } }, { "id": "mlabonne/ChimeraLlama-3-8B-v3", "name": "ChimeraLlama-3-8B-v3", "developer": "mlabonne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4408, "hfopenllm_v2/BBH": 0.4978, "hfopenllm_v2/MATH Level 5": 0.0884, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4004, "hfopenllm_v2/MMLU-PRO": 0.3669 } }, { "id": "mlabonne/Daredevil-8B", "name": "Daredevil-8B", "developer": "mlabonne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4548, "hfopenllm_v2/BBH": 0.5194, "hfopenllm_v2/MATH Level 5": 0.1065, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.3939, "hfopenllm_v2/MMLU-PRO": 0.3831 } }, { "id": "mlabonne/Daredevil-8B-abliterated", "name": "Daredevil-8B-abliterated", "developer": "mlabonne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4426, "hfopenllm_v2/BBH": 0.4254, "hfopenllm_v2/MATH Level 5": 0.0944, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.407, "hfopenllm_v2/MMLU-PRO": 0.3701 } }, { "id": "mlabonne/Hermes-3-Llama-3.1-70B-lorablated", "name": "Hermes-3-Llama-3.1-70B-lorablated", "developer": "mlabonne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3424, "hfopenllm_v2/BBH": 0.6693, "hfopenllm_v2/MATH Level 5": 0.2243, "hfopenllm_v2/GPQA": 0.3658, "hfopenllm_v2/MUSR": 0.5029, "hfopenllm_v2/MMLU-PRO": 0.4679 } }, { "id": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", "name": "Meta-Llama-3.1-8B-Instruct-abliterated", "developer": "mlabonne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7329, "hfopenllm_v2/BBH": 0.4874, "hfopenllm_v2/MATH Level 5": 0.0687, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3649, "hfopenllm_v2/MMLU-PRO": 0.3503 } }, { "id": "mlabonne/NeuralBeagle14-7B", "name": "NeuralBeagle14-7B", "developer": "mlabonne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4935, "hfopenllm_v2/BBH": 0.4628, "hfopenllm_v2/MATH Level 5": 0.0521, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4319, "hfopenllm_v2/MMLU-PRO": 0.2601 } }, { "id": "mlabonne/NeuralDaredevil-8B-abliterated", "name": "NeuralDaredevil-8B-abliterated", "developer": "mlabonne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7561, "hfopenllm_v2/BBH": 0.5111, "hfopenllm_v2/MATH Level 5": 0.0906, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4019, "hfopenllm_v2/MMLU-PRO": 0.3841 } }, { "id": "mlabonne/OrpoLlama-3-8B", "name": "OrpoLlama-3-8B", "developer": "mlabonne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3653, "hfopenllm_v2/BBH": 0.4424, "hfopenllm_v2/MATH Level 5": 0.0559, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3579, "hfopenllm_v2/MMLU-PRO": 0.2705 } }, { "id": "mlabonne/phixtral-2x2_8", "name": "phixtral-2x2_8", "developer": "mlabonne", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3431, "hfopenllm_v2/BBH": 0.4889, "hfopenllm_v2/MATH Level 5": 0.0355, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3644, "hfopenllm_v2/MMLU-PRO": 0.2551 } }, { "id": "MLP-KTLim/llama-3-Korean-Bllossom-8B", "name": "llama-3-Korean-Bllossom-8B", "developer": "MLP-KTLim", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5113, "hfopenllm_v2/BBH": 0.49, "hfopenllm_v2/MATH Level 5": 0.102, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3675, "hfopenllm_v2/MMLU-PRO": 0.3594 } }, { "id": "mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32", "name": "Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32", "developer": "mlx-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3369, "hfopenllm_v2/BBH": 0.3292, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3249, "hfopenllm_v2/MMLU-PRO": 0.1638 } }, { "id": "mlx-community/Mistral-Small-24B-Instruct-2501-bf16", "name": "Mistral-Small-24B-Instruct-2501-bf16", "developer": "mlx-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6283, "hfopenllm_v2/BBH": 0.6713, "hfopenllm_v2/MATH Level 5": 0.3225, "hfopenllm_v2/GPQA": 0.3951, "hfopenllm_v2/MUSR": 0.4618, "hfopenllm_v2/MMLU-PRO": 0.5395 } }, { "id": "mmnga/Llama-3-70B-japanese-suzume-vector-v0.1", "name": "Llama-3-70B-japanese-suzume-vector-v0.1", "developer": "mmnga", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4649, "hfopenllm_v2/BBH": 0.6542, "hfopenllm_v2/MATH Level 5": 0.2326, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4141, "hfopenllm_v2/MMLU-PRO": 0.5224 } }, { "id": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Llama3-8B-v1.1", "name": "DeepSeek-R1-ReDistill-Llama3-8B-v1.1", "developer": "mobiuslabsgmbh", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3704, "hfopenllm_v2/BBH": 0.3473, "hfopenllm_v2/MATH Level 5": 0.3285, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3396, "hfopenllm_v2/MMLU-PRO": 0.2198 } }, { "id": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1", "name": "DeepSeek-R1-ReDistill-Qwen-7B-v1.1", "developer": "mobiuslabsgmbh", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3473, "hfopenllm_v2/BBH": 0.3698, "hfopenllm_v2/MATH Level 5": 0.3497, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.4009, "hfopenllm_v2/MMLU-PRO": 0.2326 } }, { "id": "ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1", "name": "Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1", "developer": "ModelCloud", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5269, "hfopenllm_v2/BBH": 0.3253, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3249, "hfopenllm_v2/MMLU-PRO": 0.1764 } }, { "id": "ModelSpace/GemmaX2-28-9B-v0.1", "name": "GemmaX2-28-9B-v0.1", "developer": "ModelSpace", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0039, "hfopenllm_v2/BBH": 0.3687, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.3537, "hfopenllm_v2/MMLU-PRO": 0.2231 } }, { "id": "moeru-ai/L3.1-Moe-2x8B-v0.2", "name": "L3.1-Moe-2x8B-v0.2", "developer": "moeru-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7348, "hfopenllm_v2/BBH": 0.5256, "hfopenllm_v2/MATH Level 5": 0.1699, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4199, "hfopenllm_v2/MMLU-PRO": 0.3858 } }, { "id": "moeru-ai/L3.1-Moe-4x8B-v0.1", "name": "L3.1-Moe-4x8B-v0.1", "developer": "moeru-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4332, "hfopenllm_v2/BBH": 0.4939, "hfopenllm_v2/MATH Level 5": 0.1299, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3609, "hfopenllm_v2/MMLU-PRO": 0.3454 } }, { "id": "moeru-ai/L3.1-Moe-4x8B-v0.2", "name": "L3.1-Moe-4x8B-v0.2", "developer": "moeru-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5407, "hfopenllm_v2/BBH": 0.4466, "hfopenllm_v2/MATH Level 5": 0.1035, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3234, "hfopenllm_v2/MMLU-PRO": 0.2763 } }, { "id": "monsterapi/gemma-2-2b-LoRA-MonsterInstruct", "name": "gemma-2-2b-LoRA-MonsterInstruct", "developer": "monsterapi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3903, "hfopenllm_v2/BBH": 0.365, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3644, "hfopenllm_v2/MMLU-PRO": 0.1987 } }, { "id": "monsterapi/Llama-3_1-8B-Instruct-orca-ORPO", "name": "Llama-3_1-8B-Instruct-orca-ORPO", "developer": "monsterapi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2273, "hfopenllm_v2/BBH": 0.2865, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.3445, "hfopenllm_v2/MMLU-PRO": 0.1168 } }, { "id": "MoonRide/Llama-3.2-3B-Khelavaster", "name": "Llama-3.2-3B-Khelavaster", "developer": "MoonRide", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4925, "hfopenllm_v2/BBH": 0.4516, "hfopenllm_v2/MATH Level 5": 0.1616, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3699, "hfopenllm_v2/MMLU-PRO": 0.3122 } }, { "id": "moonshot-ai/kimi-k2-instruct", "name": "Kimi K2 Instruct", "developer": "Moonshot AI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 27.8 } }, { "id": "moonshot-ai/kimi-k2-thinking", "name": "Kimi K2 Thinking", "developer": "Moonshot AI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 35.7 } }, { "id": "moonshot-ai/kimi-k2.5", "name": "Kimi K2.5", "developer": "Kimi", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 43.2 } }, { "id": "moonshot/Kimi K2 Thinking", "name": "Kimi K2 Thinking", "developer": "moonshot", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Overall Pass@1": 0.04, "apex-agents/Overall Pass@8": 0.144, "apex-agents/Overall Mean Score": 0.115, "apex-agents/Investment Banking Pass@1": 0.012, "apex-agents/Management Consulting Pass@1": 0.029, "apex-agents/Corporate Law Pass@1": 0.08, "apex-agents/Corporate Lawyer Mean Score": 0.223 } }, { "id": "moonshot/Kimi K2.5", "name": "Kimi K2.5", "developer": "moonshot", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Corporate Lawyer Mean Score": 0.402 } }, { "id": "moonshotai/kimi-k2-instruct", "name": "Kimi K2 Instruct", "developer": "moonshotai", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.768, "helm_capabilities/MMLU-Pro": 0.819, "helm_capabilities/GPQA": 0.652, "helm_capabilities/IFEval": 0.85, "helm_capabilities/WildBench": 0.862, "helm_capabilities/Omni-MATH": 0.654 } }, { "id": "moonshotai/moonshotai-kimi-k2-instruct-fc", "name": "Moonshotai-Kimi-K2-Instruct (FC)", "developer": "moonshotai", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 11.0, "bfcl/bfcl.overall.overall_accuracy": 59.06, "bfcl/bfcl.overall.total_cost_usd": 6.19, "bfcl/bfcl.overall.latency_mean_s": 6.4, "bfcl/bfcl.overall.latency_std_s": 9.38, "bfcl/bfcl.overall.latency_p95_s": 13.78, "bfcl/bfcl.non_live.ast_accuracy": 81.6, "bfcl/bfcl.non_live.simple_ast_accuracy": 69.42, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 82.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 83.0, "bfcl/bfcl.live.live_accuracy": 78.68, "bfcl/bfcl.live.live_simple_ast_accuracy": 81.78, "bfcl/bfcl.live.live_multiple_ast_accuracy": 78.06, "bfcl/bfcl.live.live_parallel_ast_accuracy": 87.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 66.67, "bfcl/bfcl.multi_turn.accuracy": 50.63, "bfcl/bfcl.multi_turn.base_accuracy": 62.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 41.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 44.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 55.0, "bfcl/bfcl.web_search.accuracy": 66.5, "bfcl/bfcl.web_search.base_accuracy": 72.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 61.0, "bfcl/bfcl.memory.accuracy": 29.03, "bfcl/bfcl.memory.kv_accuracy": 21.94, "bfcl/bfcl.memory.vector_accuracy": 20.0, "bfcl/bfcl.memory.recursive_summarization_accuracy": 45.16, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 87.34 } }, { "id": "mosaicml/MPT-30B", "name": "MPT 30B", "developer": "mosaicml", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.714, "helm_classic/MMLU": 0.437, "helm_classic/BoolQ": 0.704, "helm_classic/NarrativeQA": 0.732, "helm_classic/NaturalQuestions (open-book)": 0.673, "helm_classic/QuAC": 0.393, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.231, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.959, "helm_classic/CivilComments": 0.599, "helm_classic/RAFT": 0.723 } }, { "id": "mosaicml/mpt-7b", "name": "mpt-7b", "developer": "mosaicml", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2152, "hfopenllm_v2/BBH": 0.33, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3672, "hfopenllm_v2/MMLU-PRO": 0.1206 } }, { "id": "mosaicml/MPT-Instruct-30B", "name": "MPT-Instruct 30B", "developer": "mosaicml", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.716, "helm_classic/MMLU": 0.444, "helm_classic/BoolQ": 0.85, "helm_classic/NarrativeQA": 0.733, "helm_classic/NaturalQuestions (open-book)": 0.697, "helm_classic/QuAC": 0.327, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.234, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.956, "helm_classic/CivilComments": 0.573, "helm_classic/RAFT": 0.68 } }, { "id": "mosama/Qwen2.5-1.5B-Instruct-CoT-Reflection", "name": "Qwen2.5-1.5B-Instruct-CoT-Reflection", "developer": "mosama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.287, "hfopenllm_v2/BBH": 0.4109, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3212, "hfopenllm_v2/MMLU-PRO": 0.2651 } }, { "id": "Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged", "name": "llama-3.2-1b-Insomnia-ChatBot-merged", "developer": "Mostafa8Mehrabi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1321, "hfopenllm_v2/BBH": 0.3004, "hfopenllm_v2/MATH Level 5": 0.0076, "hfopenllm_v2/GPQA": 0.2366, "hfopenllm_v2/MUSR": 0.3382, "hfopenllm_v2/MMLU-PRO": 0.1131 } }, { "id": "mrdayl/OpenCogito", "name": "OpenCogito", "developer": "mrdayl", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3934, "hfopenllm_v2/BBH": 0.472, "hfopenllm_v2/MATH Level 5": 0.2183, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.424, "hfopenllm_v2/MMLU-PRO": 0.3452 } }, { "id": "mrdayl/OpenCognito", "name": "OpenCognito", "developer": "mrdayl", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4062, "hfopenllm_v2/BBH": 0.4706, "hfopenllm_v2/MATH Level 5": 0.2115, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4293, "hfopenllm_v2/MMLU-PRO": 0.3443 } }, { "id": "mrdayl/OpenCognito-r1", "name": "OpenCognito-r1", "developer": "mrdayl", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4241, "hfopenllm_v2/BBH": 0.4673, "hfopenllm_v2/MATH Level 5": 0.1903, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4241, "hfopenllm_v2/MMLU-PRO": 0.3475 } }, { "id": "mrdayl/OpenCognito-r2", "name": "OpenCognito-r2", "developer": "mrdayl", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3959, "hfopenllm_v2/BBH": 0.4688, "hfopenllm_v2/MATH Level 5": 0.2024, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4202, "hfopenllm_v2/MMLU-PRO": 0.3462 } }, { "id": "mrdayl/OpenThink", "name": "OpenThink", "developer": "mrdayl", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2054, "hfopenllm_v2/BBH": 0.346, "hfopenllm_v2/MATH Level 5": 0.2885, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.3289, "hfopenllm_v2/MMLU-PRO": 0.185 } }, { "id": "mrm8488/phi-4-14B-grpo-gsm8k-3e", "name": "phi-4-14B-grpo-gsm8k-3e", "developer": "mrm8488", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6885, "hfopenllm_v2/BBH": 0.6805, "hfopenllm_v2/MATH Level 5": 0.4524, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.3994, "hfopenllm_v2/MMLU-PRO": 0.5268 } }, { "id": "mrm8488/phi-4-14B-grpo-limo", "name": "phi-4-14B-grpo-limo", "developer": "mrm8488", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6812, "hfopenllm_v2/BBH": 0.6785, "hfopenllm_v2/MATH Level 5": 0.4569, "hfopenllm_v2/GPQA": 0.3364, "hfopenllm_v2/MUSR": 0.3981, "hfopenllm_v2/MMLU-PRO": 0.5261 } }, { "id": "MrRobotoAI/MrRoboto-ProLong-8b-v4i", "name": "MrRoboto-ProLong-8b-v4i", "developer": "MrRobotoAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3835, "hfopenllm_v2/BBH": 0.4585, "hfopenllm_v2/MATH Level 5": 0.0551, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4014, "hfopenllm_v2/MMLU-PRO": 0.3068 } }, { "id": "MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b", "name": "MrRoboto-ProLongBASE-pt8-unaligned-8b", "developer": "MrRobotoAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3475, "hfopenllm_v2/BBH": 0.4515, "hfopenllm_v2/MATH Level 5": 0.0423, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4279, "hfopenllm_v2/MMLU-PRO": 0.2566 } }, { "id": "MTSAIR/Cotype-Nano", "name": "Cotype-Nano", "developer": "MTSAIR", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3748, "hfopenllm_v2/BBH": 0.3865, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3289, "hfopenllm_v2/MMLU-PRO": 0.2477 } }, { "id": "MTSAIR/MultiVerse_70B", "name": "MultiVerse_70B", "developer": "MTSAIR", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5249, "hfopenllm_v2/BBH": 0.6183, "hfopenllm_v2/MATH Level 5": 0.1926, "hfopenllm_v2/GPQA": 0.354, "hfopenllm_v2/MUSR": 0.474, "hfopenllm_v2/MMLU-PRO": 0.486 } }, { "id": "mukaj/Llama-3.1-Hawkish-8B", "name": "Llama-3.1-Hawkish-8B", "developer": "mukaj", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.672, "hfopenllm_v2/BBH": 0.4884, "hfopenllm_v2/MATH Level 5": 0.2432, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3967, "hfopenllm_v2/MMLU-PRO": 0.3331 } }, { "id": "multiple/multiple", "name": "Multiple", "developer": "Multiple", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 59.1 } }, { "id": "MultivexAI/Gladiator-Mini-Exp-1211-3B", "name": "Gladiator-Mini-Exp-1211-3B", "developer": "MultivexAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6876, "hfopenllm_v2/BBH": 0.4484, "hfopenllm_v2/MATH Level 5": 0.1375, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.326, "hfopenllm_v2/MMLU-PRO": 0.3152 } }, { "id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct", "name": "Gladiator-Mini-Exp-1221-3B-Instruct", "developer": "MultivexAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6079, "hfopenllm_v2/BBH": 0.437, "hfopenllm_v2/MATH Level 5": 0.1352, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3115, "hfopenllm_v2/MMLU-PRO": 0.3049 } }, { "id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2", "name": "Gladiator-Mini-Exp-1221-3B-Instruct-V2", "developer": "MultivexAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6215, "hfopenllm_v2/BBH": 0.4389, "hfopenllm_v2/MATH Level 5": 0.1412, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3008, "hfopenllm_v2/MMLU-PRO": 0.3025 } }, { "id": "MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct", "name": "Gladiator-Mini-Exp-1222-3B-Instruct", "developer": "MultivexAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6163, "hfopenllm_v2/BBH": 0.4373, "hfopenllm_v2/MATH Level 5": 0.1412, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3128, "hfopenllm_v2/MMLU-PRO": 0.3017 } }, { "id": "MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF", "name": "Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF", "developer": "MultivexAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.144, "hfopenllm_v2/BBH": 0.2908, "hfopenllm_v2/MATH Level 5": 0.006, "hfopenllm_v2/GPQA": 0.255, "hfopenllm_v2/MUSR": 0.3642, "hfopenllm_v2/MMLU-PRO": 0.1109 } }, { "id": "Mxode/NanoLM-0.3B-Instruct-v1", "name": "NanoLM-0.3B-Instruct-v1", "developer": "Mxode", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1537, "hfopenllm_v2/BBH": 0.3028, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.4155, "hfopenllm_v2/MMLU-PRO": 0.1105 } }, { "id": "Mxode/NanoLM-0.3B-Instruct-v1.1", "name": "NanoLM-0.3B-Instruct-v1.1", "developer": "Mxode", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1783, "hfopenllm_v2/BBH": 0.3014, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.4273, "hfopenllm_v2/MMLU-PRO": 0.1121 } }, { "id": "Mxode/NanoLM-0.3B-Instruct-v2", "name": "NanoLM-0.3B-Instruct-v2", "developer": "Mxode", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1668, "hfopenllm_v2/BBH": 0.2921, "hfopenllm_v2/MATH Level 5": 0.0068, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3955, "hfopenllm_v2/MMLU-PRO": 0.1134 } }, { "id": "Mxode/NanoLM-1B-Instruct-v1.1", "name": "NanoLM-1B-Instruct-v1.1", "developer": "Mxode", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2395, "hfopenllm_v2/BBH": 0.3184, "hfopenllm_v2/MATH Level 5": 0.0363, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3433, "hfopenllm_v2/MMLU-PRO": 0.1215 } }, { "id": "Mxode/NanoLM-1B-Instruct-v2", "name": "NanoLM-1B-Instruct-v2", "developer": "Mxode", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.263, "hfopenllm_v2/BBH": 0.3123, "hfopenllm_v2/MATH Level 5": 0.0415, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3552, "hfopenllm_v2/MMLU-PRO": 0.1238 } }, { "id": "my_model/", "name": "my_model/", "developer": "my_model", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5267, "reward-bench/Chat": 0.4553, "reward-bench/Chat Hard": 0.5592, "reward-bench/Safety": 0.4392, "reward-bench/Reasoning": 0.6532 } }, { "id": "nanbeige/nanbeige3-5-pro-thinking-fc", "name": "Nanbeige3.5-Pro-Thinking (FC)", "developer": "nanbeige", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 32.0, "bfcl/bfcl.overall.overall_accuracy": 47.68, "bfcl/bfcl.overall.total_cost_usd": 23.46, "bfcl/bfcl.overall.latency_mean_s": 21.12, "bfcl/bfcl.overall.latency_std_s": 28.61, "bfcl/bfcl.overall.latency_p95_s": 63.29, "bfcl/bfcl.non_live.ast_accuracy": 38.35, "bfcl/bfcl.non_live.simple_ast_accuracy": 43.92, "bfcl/bfcl.non_live.multiple_ast_accuracy": 36.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 53.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 20.0, "bfcl/bfcl.live.live_accuracy": 69.95, "bfcl/bfcl.live.live_simple_ast_accuracy": 63.18, "bfcl/bfcl.live.live_multiple_ast_accuracy": 71.42, "bfcl/bfcl.live.live_parallel_ast_accuracy": 87.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 66.67, "bfcl/bfcl.multi_turn.accuracy": 40.0, "bfcl/bfcl.multi_turn.base_accuracy": 56.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 34.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 29.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 41.0, "bfcl/bfcl.web_search.accuracy": 42.0, "bfcl/bfcl.web_search.base_accuracy": 47.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 37.0, "bfcl/bfcl.memory.accuracy": 45.16, "bfcl/bfcl.memory.kv_accuracy": 38.06, "bfcl/bfcl.memory.vector_accuracy": 58.06, "bfcl/bfcl.memory.recursive_summarization_accuracy": 39.35, "bfcl/bfcl.relevance.relevance_detection_accuracy": 100.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 74.2 } }, { "id": "nanbeige/nanbeige4-3b-thinking-2511-fc", "name": "Nanbeige4-3B-Thinking-2511 (FC)", "developer": "nanbeige", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 25.0, "bfcl/bfcl.overall.overall_accuracy": 51.4, "bfcl/bfcl.overall.total_cost_usd": 14.14, "bfcl/bfcl.overall.latency_mean_s": 13.46, "bfcl/bfcl.overall.latency_std_s": 26.41, "bfcl/bfcl.overall.latency_p95_s": 37.45, "bfcl/bfcl.non_live.ast_accuracy": 81.58, "bfcl/bfcl.non_live.simple_ast_accuracy": 63.83, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 84.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 84.5, "bfcl/bfcl.live.live_accuracy": 79.42, "bfcl/bfcl.live.live_simple_ast_accuracy": 86.05, "bfcl/bfcl.live.live_multiple_ast_accuracy": 78.06, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 51.12, "bfcl/bfcl.multi_turn.base_accuracy": 58.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 54.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 45.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 47.0, "bfcl/bfcl.web_search.accuracy": 21.5, "bfcl/bfcl.web_search.base_accuracy": 31.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 12.0, "bfcl/bfcl.memory.accuracy": 36.77, "bfcl/bfcl.memory.kv_accuracy": 31.61, "bfcl/bfcl.memory.vector_accuracy": 34.19, "bfcl/bfcl.memory.recursive_summarization_accuracy": 44.52, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 83.09 } }, { "id": "NAPS-ai/naps-gemma-2-27b-v-0.1.0", "name": "naps-gemma-2-27b-v-0.1.0", "developer": "NAPS-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0, "hfopenllm_v2/BBH": 0.2912, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3575, "hfopenllm_v2/MMLU-PRO": 0.1168 } }, { "id": "NAPS-ai/naps-gemma-2-27b-v0.1.0", "name": "naps-gemma-2-27b-v0.1.0", "developer": "NAPS-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0, "hfopenllm_v2/BBH": 0.2912, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3575, "hfopenllm_v2/MMLU-PRO": 0.1168 } }, { "id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.3", "name": "naps-llama-3_1-8b-instruct-v0.3", "developer": "NAPS-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5391, "hfopenllm_v2/BBH": 0.4901, "hfopenllm_v2/MATH Level 5": 0.1903, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.3787, "hfopenllm_v2/MMLU-PRO": 0.3398 } }, { "id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.4", "name": "naps-llama-3_1-8b-instruct-v0.4", "developer": "NAPS-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7344, "hfopenllm_v2/BBH": 0.4862, "hfopenllm_v2/MATH Level 5": 0.1964, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.4421, "hfopenllm_v2/MMLU-PRO": 0.3475 } }, { "id": "NAPS-ai/naps-llama-3_1-instruct-v0.5.0", "name": "naps-llama-3_1-instruct-v0.5.0", "developer": "NAPS-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.502, "hfopenllm_v2/BBH": 0.4148, "hfopenllm_v2/MATH Level 5": 0.0363, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3713, "hfopenllm_v2/MMLU-PRO": 0.2614 } }, { "id": "NAPS-ai/naps-llama-3_1_instruct-v0.6.0", "name": "naps-llama-3_1_instruct-v0.6.0", "developer": "NAPS-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.328, "hfopenllm_v2/BBH": 0.4528, "hfopenllm_v2/MATH Level 5": 0.0642, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.3739, "hfopenllm_v2/MMLU-PRO": 0.3241 } }, { "id": "NAPS-ai/naps-llama3.1-70B-v0.2-fp16", "name": "naps-llama3.1-70B-v0.2-fp16", "developer": "NAPS-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1845, "hfopenllm_v2/BBH": 0.3041, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2391, "hfopenllm_v2/MUSR": 0.3486, "hfopenllm_v2/MMLU-PRO": 0.1099 } }, { "id": "natong19/Mistral-Nemo-Instruct-2407-abliterated", "name": "Mistral-Nemo-Instruct-2407-abliterated", "developer": "natong19", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6392, "hfopenllm_v2/BBH": 0.5048, "hfopenllm_v2/MATH Level 5": 0.1322, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4033, "hfopenllm_v2/MMLU-PRO": 0.3518 } }, { "id": "natong19/Qwen2-7B-Instruct-abliterated", "name": "Qwen2-7B-Instruct-abliterated", "developer": "natong19", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5837, "hfopenllm_v2/BBH": 0.5553, "hfopenllm_v2/MATH Level 5": 0.2764, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4034, "hfopenllm_v2/MMLU-PRO": 0.3842 } }, { "id": "Naveenpoliasetty/llama3-8B-V2", "name": "llama3-8B-V2", "developer": "Naveenpoliasetty", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4123, "hfopenllm_v2/BBH": 0.5189, "hfopenllm_v2/MATH Level 5": 0.0785, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4081, "hfopenllm_v2/MMLU-PRO": 0.3738 } }, { "id": "nazimali/Mistral-Nemo-Kurdish", "name": "Mistral-Nemo-Kurdish", "developer": "nazimali", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3401, "hfopenllm_v2/BBH": 0.5133, "hfopenllm_v2/MATH Level 5": 0.0959, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4116, "hfopenllm_v2/MMLU-PRO": 0.3235 } }, { "id": "nazimali/Mistral-Nemo-Kurdish-Instruct", "name": "Mistral-Nemo-Kurdish-Instruct", "developer": "nazimali", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.486, "hfopenllm_v2/BBH": 0.4721, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4006, "hfopenllm_v2/MMLU-PRO": 0.3087 } }, { "id": "NbAiLab/nb-llama-3.1-8B-Instruct", "name": "nb-llama-3.1-8B-Instruct", "developer": "NbAiLab", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3625, "hfopenllm_v2/BBH": 0.3247, "hfopenllm_v2/MATH Level 5": 0.0227, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3208, "hfopenllm_v2/MMLU-PRO": 0.1197 } }, { "id": "NbAiLab/nb-llama-3.1-8B-sft", "name": "nb-llama-3.1-8B-sft", "developer": "NbAiLab", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3616, "hfopenllm_v2/BBH": 0.3282, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3287, "hfopenllm_v2/MMLU-PRO": 0.1222 } }, { "id": "nbeerbower/BigKartoffel-mistral-nemo-20B", "name": "BigKartoffel-mistral-nemo-20B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5857, "hfopenllm_v2/BBH": 0.5515, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.428, "hfopenllm_v2/MMLU-PRO": 0.353 } }, { "id": "nbeerbower/DoppelKartoffel-Mistral-Nemo-23B", "name": "DoppelKartoffel-Mistral-Nemo-23B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5191, "hfopenllm_v2/BBH": 0.5218, "hfopenllm_v2/MATH Level 5": 0.031, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3795, "hfopenllm_v2/MMLU-PRO": 0.308 } }, { "id": "nbeerbower/DoublePotato-Mistral-Nemo-13B", "name": "DoublePotato-Mistral-Nemo-13B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6796, "hfopenllm_v2/BBH": 0.5438, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.46, "hfopenllm_v2/MMLU-PRO": 0.3596 } }, { "id": "nbeerbower/Dumpling-Qwen2.5-1.5B", "name": "Dumpling-Qwen2.5-1.5B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3699, "hfopenllm_v2/BBH": 0.416, "hfopenllm_v2/MATH Level 5": 0.1171, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3728, "hfopenllm_v2/MMLU-PRO": 0.2772 } }, { "id": "nbeerbower/Dumpling-Qwen2.5-14B", "name": "Dumpling-Qwen2.5-14B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6064, "hfopenllm_v2/BBH": 0.6451, "hfopenllm_v2/MATH Level 5": 0.3097, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4354, "hfopenllm_v2/MMLU-PRO": 0.517 } }, { "id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r16", "name": "Dumpling-Qwen2.5-7B-1k-r16", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.486, "hfopenllm_v2/BBH": 0.5214, "hfopenllm_v2/MATH Level 5": 0.2364, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.423, "hfopenllm_v2/MMLU-PRO": 0.3959 } }, { "id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5", "name": "Dumpling-Qwen2.5-7B-1k-r64-2e-5", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4179, "hfopenllm_v2/BBH": 0.5301, "hfopenllm_v2/MATH Level 5": 0.2115, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.4486, "hfopenllm_v2/MMLU-PRO": 0.4122 } }, { "id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B", "name": "EVA-abliterated-TIES-Qwen2.5-1.5B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4115, "hfopenllm_v2/BBH": 0.3997, "hfopenllm_v2/MATH Level 5": 0.1375, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3502, "hfopenllm_v2/MMLU-PRO": 0.2712 } }, { "id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B", "name": "EVA-abliterated-TIES-Qwen2.5-14B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7836, "hfopenllm_v2/BBH": 0.6372, "hfopenllm_v2/MATH Level 5": 0.5045, "hfopenllm_v2/GPQA": 0.3549, "hfopenllm_v2/MUSR": 0.4407, "hfopenllm_v2/MMLU-PRO": 0.5211 } }, { "id": "nbeerbower/Flammades-Mistral-Nemo-12B", "name": "Flammades-Mistral-Nemo-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3842, "hfopenllm_v2/BBH": 0.53, "hfopenllm_v2/MATH Level 5": 0.0755, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4806, "hfopenllm_v2/MMLU-PRO": 0.3661 } }, { "id": "nbeerbower/gemma2-gutenberg-27B", "name": "gemma2-gutenberg-27B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2947, "hfopenllm_v2/BBH": 0.3797, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3727, "hfopenllm_v2/MMLU-PRO": 0.1982 } }, { "id": "nbeerbower/gemma2-gutenberg-9B", "name": "gemma2-gutenberg-9B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2796, "hfopenllm_v2/BBH": 0.5951, "hfopenllm_v2/MATH Level 5": 0.0808, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4595, "hfopenllm_v2/MMLU-PRO": 0.4192 } }, { "id": "nbeerbower/Gemma2-Gutenberg-Doppel-9B", "name": "Gemma2-Gutenberg-Doppel-9B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7171, "hfopenllm_v2/BBH": 0.587, "hfopenllm_v2/MATH Level 5": 0.1979, "hfopenllm_v2/GPQA": 0.3297, "hfopenllm_v2/MUSR": 0.4608, "hfopenllm_v2/MMLU-PRO": 0.4127 } }, { "id": "nbeerbower/Gutensuppe-mistral-nemo-12B", "name": "Gutensuppe-mistral-nemo-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2916, "hfopenllm_v2/BBH": 0.5487, "hfopenllm_v2/MATH Level 5": 0.1329, "hfopenllm_v2/GPQA": 0.3372, "hfopenllm_v2/MUSR": 0.429, "hfopenllm_v2/MMLU-PRO": 0.368 } }, { "id": "nbeerbower/Hermes2-Gutenberg2-Mistral-7B", "name": "Hermes2-Gutenberg2-Mistral-7B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3721, "hfopenllm_v2/BBH": 0.4981, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4623, "hfopenllm_v2/MMLU-PRO": 0.2993 } }, { "id": "nbeerbower/Kartoffel-Deepfry-12B", "name": "Kartoffel-Deepfry-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5022, "hfopenllm_v2/BBH": 0.5365, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4792, "hfopenllm_v2/MMLU-PRO": 0.3582 } }, { "id": "nbeerbower/llama-3-gutenberg-8B", "name": "llama-3-gutenberg-8B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4372, "hfopenllm_v2/BBH": 0.4994, "hfopenllm_v2/MATH Level 5": 0.0785, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4073, "hfopenllm_v2/MMLU-PRO": 0.3831 } }, { "id": "nbeerbower/Llama-3.1-Nemotron-lorablated-70B", "name": "Llama-3.1-Nemotron-lorablated-70B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7229, "hfopenllm_v2/BBH": 0.6825, "hfopenllm_v2/MATH Level 5": 0.3338, "hfopenllm_v2/GPQA": 0.3909, "hfopenllm_v2/MUSR": 0.4682, "hfopenllm_v2/MMLU-PRO": 0.5343 } }, { "id": "nbeerbower/llama3.1-cc-8B", "name": "llama3.1-cc-8B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5068, "hfopenllm_v2/BBH": 0.4871, "hfopenllm_v2/MATH Level 5": 0.071, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.3885, "hfopenllm_v2/MMLU-PRO": 0.3347 } }, { "id": "nbeerbower/Llama3.1-Gutenberg-Doppel-70B", "name": "Llama3.1-Gutenberg-Doppel-70B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7092, "hfopenllm_v2/BBH": 0.6661, "hfopenllm_v2/MATH Level 5": 0.2122, "hfopenllm_v2/GPQA": 0.3448, "hfopenllm_v2/MUSR": 0.4897, "hfopenllm_v2/MMLU-PRO": 0.4737 } }, { "id": "nbeerbower/llama3.1-kartoffeldes-70B", "name": "llama3.1-kartoffeldes-70B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.823, "hfopenllm_v2/BBH": 0.6894, "hfopenllm_v2/MATH Level 5": 0.3218, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4646, "hfopenllm_v2/MMLU-PRO": 0.4988 } }, { "id": "nbeerbower/Lyra-Gutenberg-mistral-nemo-12B", "name": "Lyra-Gutenberg-mistral-nemo-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3495, "hfopenllm_v2/BBH": 0.5586, "hfopenllm_v2/MATH Level 5": 0.1012, "hfopenllm_v2/GPQA": 0.3339, "hfopenllm_v2/MUSR": 0.4357, "hfopenllm_v2/MMLU-PRO": 0.3628 } }, { "id": "nbeerbower/Lyra4-Gutenberg-12B", "name": "Lyra4-Gutenberg-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2212, "hfopenllm_v2/BBH": 0.5387, "hfopenllm_v2/MATH Level 5": 0.1299, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4038, "hfopenllm_v2/MMLU-PRO": 0.3571 } }, { "id": "nbeerbower/Lyra4-Gutenberg2-12B", "name": "Lyra4-Gutenberg2-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2585, "hfopenllm_v2/BBH": 0.5345, "hfopenllm_v2/MATH Level 5": 0.1171, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.3972, "hfopenllm_v2/MMLU-PRO": 0.3565 } }, { "id": "nbeerbower/Mahou-1.5-mistral-nemo-12B-lorablated", "name": "Mahou-1.5-mistral-nemo-12B-lorablated", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6825, "hfopenllm_v2/BBH": 0.5496, "hfopenllm_v2/MATH Level 5": 0.0891, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.4522, "hfopenllm_v2/MMLU-PRO": 0.3574 } }, { "id": "nbeerbower/Mistral-Gutenberg-Doppel-7B-FFT", "name": "Mistral-Gutenberg-Doppel-7B-FFT", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5717, "hfopenllm_v2/BBH": 0.4076, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4059, "hfopenllm_v2/MMLU-PRO": 0.2729 } }, { "id": "nbeerbower/mistral-nemo-bophades-12B", "name": "mistral-nemo-bophades-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6794, "hfopenllm_v2/BBH": 0.4988, "hfopenllm_v2/MATH Level 5": 0.1231, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4178, "hfopenllm_v2/MMLU-PRO": 0.3501 } }, { "id": "nbeerbower/mistral-nemo-bophades3-12B", "name": "mistral-nemo-bophades3-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6578, "hfopenllm_v2/BBH": 0.5449, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4604, "hfopenllm_v2/MMLU-PRO": 0.3371 } }, { "id": "nbeerbower/mistral-nemo-cc-12B", "name": "mistral-nemo-cc-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1435, "hfopenllm_v2/BBH": 0.5399, "hfopenllm_v2/MATH Level 5": 0.0257, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.4424, "hfopenllm_v2/MMLU-PRO": 0.3598 } }, { "id": "nbeerbower/mistral-nemo-gutades-12B", "name": "mistral-nemo-gutades-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3425, "hfopenllm_v2/BBH": 0.5407, "hfopenllm_v2/MATH Level 5": 0.1178, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.404, "hfopenllm_v2/MMLU-PRO": 0.3561 } }, { "id": "nbeerbower/mistral-nemo-gutenberg-12B", "name": "mistral-nemo-gutenberg-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3504, "hfopenllm_v2/BBH": 0.5281, "hfopenllm_v2/MATH Level 5": 0.1163, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4171, "hfopenllm_v2/MMLU-PRO": 0.3562 } }, { "id": "nbeerbower/mistral-nemo-gutenberg-12B-v2", "name": "mistral-nemo-gutenberg-12B-v2", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6203, "hfopenllm_v2/BBH": 0.5397, "hfopenllm_v2/MATH Level 5": 0.1088, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4287, "hfopenllm_v2/MMLU-PRO": 0.3499 } }, { "id": "nbeerbower/mistral-nemo-gutenberg-12B-v3", "name": "mistral-nemo-gutenberg-12B-v3", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2183, "hfopenllm_v2/BBH": 0.5441, "hfopenllm_v2/MATH Level 5": 0.0597, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.445, "hfopenllm_v2/MMLU-PRO": 0.3644 } }, { "id": "nbeerbower/mistral-nemo-gutenberg-12B-v4", "name": "mistral-nemo-gutenberg-12B-v4", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2379, "hfopenllm_v2/BBH": 0.5269, "hfopenllm_v2/MATH Level 5": 0.1261, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4104, "hfopenllm_v2/MMLU-PRO": 0.3575 } }, { "id": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B", "name": "Mistral-Nemo-Gutenberg-Doppel-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3567, "hfopenllm_v2/BBH": 0.5275, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4132, "hfopenllm_v2/MMLU-PRO": 0.3579 } }, { "id": "nbeerbower/Mistral-Nemo-Gutenberg-Doppel-12B-v2", "name": "Mistral-Nemo-Gutenberg-Doppel-12B-v2", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6536, "hfopenllm_v2/BBH": 0.5374, "hfopenllm_v2/MATH Level 5": 0.1156, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.4233, "hfopenllm_v2/MMLU-PRO": 0.3546 } }, { "id": "nbeerbower/mistral-nemo-gutenberg2-12B-test", "name": "mistral-nemo-gutenberg2-12B-test", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3385, "hfopenllm_v2/BBH": 0.5255, "hfopenllm_v2/MATH Level 5": 0.1163, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4157, "hfopenllm_v2/MMLU-PRO": 0.3555 } }, { "id": "nbeerbower/mistral-nemo-kartoffel-12B", "name": "mistral-nemo-kartoffel-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7032, "hfopenllm_v2/BBH": 0.5484, "hfopenllm_v2/MATH Level 5": 0.0853, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4653, "hfopenllm_v2/MMLU-PRO": 0.3585 } }, { "id": "nbeerbower/Mistral-Nemo-Moderne-12B-FFT-experimental", "name": "Mistral-Nemo-Moderne-12B-FFT-experimental", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3352, "hfopenllm_v2/BBH": 0.5234, "hfopenllm_v2/MATH Level 5": 0.077, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3715, "hfopenllm_v2/MMLU-PRO": 0.3455 } }, { "id": "nbeerbower/mistral-nemo-narwhal-12B", "name": "mistral-nemo-narwhal-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5549, "hfopenllm_v2/BBH": 0.5057, "hfopenllm_v2/MATH Level 5": 0.0582, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3847, "hfopenllm_v2/MMLU-PRO": 0.3483 } }, { "id": "nbeerbower/Mistral-Nemo-Prism-12B", "name": "Mistral-Nemo-Prism-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6858, "hfopenllm_v2/BBH": 0.5475, "hfopenllm_v2/MATH Level 5": 0.0869, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4626, "hfopenllm_v2/MMLU-PRO": 0.3581 } }, { "id": "nbeerbower/Mistral-Nemo-Prism-12B-v2", "name": "Mistral-Nemo-Prism-12B-v2", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6974, "hfopenllm_v2/BBH": 0.5492, "hfopenllm_v2/MATH Level 5": 0.0891, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.46, "hfopenllm_v2/MMLU-PRO": 0.3567 } }, { "id": "nbeerbower/Mistral-Nemo-Prism-12B-v7", "name": "Mistral-Nemo-Prism-12B-v7", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6962, "hfopenllm_v2/BBH": 0.5521, "hfopenllm_v2/MATH Level 5": 0.0869, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4639, "hfopenllm_v2/MMLU-PRO": 0.359 } }, { "id": "nbeerbower/mistral-nemo-wissenschaft-12B", "name": "mistral-nemo-wissenschaft-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.652, "hfopenllm_v2/BBH": 0.504, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4178, "hfopenllm_v2/MMLU-PRO": 0.3532 } }, { "id": "nbeerbower/Mistral-Small-Drummer-22B", "name": "Mistral-Small-Drummer-22B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6331, "hfopenllm_v2/BBH": 0.5793, "hfopenllm_v2/MATH Level 5": 0.1888, "hfopenllm_v2/GPQA": 0.3431, "hfopenllm_v2/MUSR": 0.4064, "hfopenllm_v2/MMLU-PRO": 0.4095 } }, { "id": "nbeerbower/Mistral-Small-Gutenberg-Doppel-22B", "name": "Mistral-Small-Gutenberg-Doppel-22B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4893, "hfopenllm_v2/BBH": 0.5859, "hfopenllm_v2/MATH Level 5": 0.2183, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.3971, "hfopenllm_v2/MMLU-PRO": 0.4124 } }, { "id": "nbeerbower/Nemo-Loony-12B-experimental", "name": "Nemo-Loony-12B-experimental", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3734, "hfopenllm_v2/BBH": 0.3822, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3341, "hfopenllm_v2/MMLU-PRO": 0.1589 } }, { "id": "nbeerbower/Nemoties-ChatML-12B", "name": "Nemoties-ChatML-12B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6382, "hfopenllm_v2/BBH": 0.547, "hfopenllm_v2/MATH Level 5": 0.0785, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4509, "hfopenllm_v2/MMLU-PRO": 0.3551 } }, { "id": "nbeerbower/Qwen2.5-Gutenberg-Doppel-14B", "name": "Qwen2.5-Gutenberg-Doppel-14B", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8091, "hfopenllm_v2/BBH": 0.6382, "hfopenllm_v2/MATH Level 5": 0.5415, "hfopenllm_v2/GPQA": 0.3331, "hfopenllm_v2/MUSR": 0.4101, "hfopenllm_v2/MMLU-PRO": 0.4921 } }, { "id": "nbeerbower/SmolNemo-12B-FFT-experimental", "name": "SmolNemo-12B-FFT-experimental", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3348, "hfopenllm_v2/BBH": 0.3336, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3847, "hfopenllm_v2/MMLU-PRO": 0.1217 } }, { "id": "nbeerbower/Stella-mistral-nemo-12B-v2", "name": "Stella-mistral-nemo-12B-v2", "developer": "nbeerbower", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3274, "hfopenllm_v2/BBH": 0.5484, "hfopenllm_v2/MATH Level 5": 0.1163, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.4304, "hfopenllm_v2/MMLU-PRO": 0.3684 } }, { "id": "nbrahme/IndusQ", "name": "IndusQ", "developer": "nbrahme", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.244, "hfopenllm_v2/BBH": 0.3062, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3366, "hfopenllm_v2/MMLU-PRO": 0.112 } }, { "id": "NCSOFT/Llama-3-OffsetBias-8B", "name": "NCSOFT/Llama-3-OffsetBias-8B", "developer": "NCSOFT", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8397, "reward-bench/Chat": 0.9246, "reward-bench/Chat Hard": 0.8026, "reward-bench/Safety": 0.8676, "reward-bench/Reasoning": 0.7639 } }, { "id": "NCSOFT/Llama-3-OffsetBias-RM-8B", "name": "NCSOFT/Llama-3-OffsetBias-RM-8B", "developer": "NCSOFT", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8942, "reward-bench/Factuality": 0.6084, "reward-bench/Precise IF": 0.4, "reward-bench/Math": 0.5191, "reward-bench/Safety": 0.8676, "reward-bench/Focus": 0.9596, "reward-bench/Ties": 0.6786, "reward-bench/Chat": 0.9721, "reward-bench/Chat Hard": 0.818, "reward-bench/Reasoning": 0.9192 } }, { "id": "NCSOFT/Llama-VARCO-8B-Instruct", "name": "Llama-VARCO-8B-Instruct", "developer": "NCSOFT", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.447, "hfopenllm_v2/BBH": 0.5023, "hfopenllm_v2/MATH Level 5": 0.1065, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.3841, "hfopenllm_v2/MMLU-PRO": 0.319 } }, { "id": "necva/IE-cont-Llama3.1-8B", "name": "IE-cont-Llama3.1-8B", "developer": "necva", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2049, "hfopenllm_v2/BBH": 0.2912, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3575, "hfopenllm_v2/MMLU-PRO": 0.1167 } }, { "id": "necva/replica-IEPile", "name": "replica-IEPile", "developer": "necva", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4678, "hfopenllm_v2/BBH": 0.4779, "hfopenllm_v2/MATH Level 5": 0.1239, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.3998, "hfopenllm_v2/MMLU-PRO": 0.3561 } }, { "id": "Nekochu/Llama-3.1-8B-french-DPO", "name": "Llama-3.1-8B-french-DPO", "developer": "Nekochu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4656, "hfopenllm_v2/BBH": 0.5111, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4216, "hfopenllm_v2/MMLU-PRO": 0.3414 } }, { "id": "Nekochu/Llama-3.1-8B-German-ORPO", "name": "Llama-3.1-8B-German-ORPO", "developer": "Nekochu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4611, "hfopenllm_v2/BBH": 0.4983, "hfopenllm_v2/MATH Level 5": 0.1171, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4647, "hfopenllm_v2/MMLU-PRO": 0.3393 } }, { "id": "Nekochu/Luminia-13B-v3", "name": "Luminia-13B-v3", "developer": "Nekochu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2523, "hfopenllm_v2/BBH": 0.4112, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3983, "hfopenllm_v2/MMLU-PRO": 0.2215 } }, { "id": "Nekochu/Luminia-8B-RP", "name": "Luminia-8B-RP", "developer": "Nekochu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5574, "hfopenllm_v2/BBH": 0.5218, "hfopenllm_v2/MATH Level 5": 0.136, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.3998, "hfopenllm_v2/MMLU-PRO": 0.3631 } }, { "id": "neopolita/jessi-v0.1-bf16-falcon3-7b-instruct", "name": "jessi-v0.1-bf16-falcon3-7b-instruct", "developer": "neopolita", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7527, "hfopenllm_v2/BBH": 0.5516, "hfopenllm_v2/MATH Level 5": 0.3807, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4825, "hfopenllm_v2/MMLU-PRO": 0.3924 } }, { "id": "neopolita/jessi-v0.1-falcon3-10b-instruct", "name": "jessi-v0.1-falcon3-10b-instruct", "developer": "neopolita", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7552, "hfopenllm_v2/BBH": 0.5953, "hfopenllm_v2/MATH Level 5": 0.2002, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4279, "hfopenllm_v2/MMLU-PRO": 0.4188 } }, { "id": "neopolita/jessi-v0.1-qwen2.5-7b-instruct", "name": "jessi-v0.1-qwen2.5-7b-instruct", "developer": "neopolita", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7327, "hfopenllm_v2/BBH": 0.5292, "hfopenllm_v2/MATH Level 5": 0.4086, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.3914, "hfopenllm_v2/MMLU-PRO": 0.4228 } }, { "id": "neopolita/jessi-v0.1-virtuoso-small", "name": "jessi-v0.1-virtuoso-small", "developer": "neopolita", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7959, "hfopenllm_v2/BBH": 0.6443, "hfopenllm_v2/MATH Level 5": 0.3399, "hfopenllm_v2/GPQA": 0.3305, "hfopenllm_v2/MUSR": 0.4362, "hfopenllm_v2/MMLU-PRO": 0.513 } }, { "id": "neopolita/jessi-v0.2-falcon3-10b-instruct", "name": "jessi-v0.2-falcon3-10b-instruct", "developer": "neopolita", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7768, "hfopenllm_v2/BBH": 0.6205, "hfopenllm_v2/MATH Level 5": 0.2122, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4281, "hfopenllm_v2/MMLU-PRO": 0.4354 } }, { "id": "neopolita/jessi-v0.2-falcon3-7b-instruct", "name": "jessi-v0.2-falcon3-7b-instruct", "developer": "neopolita", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5771, "hfopenllm_v2/BBH": 0.5363, "hfopenllm_v2/MATH Level 5": 0.2538, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4479, "hfopenllm_v2/MMLU-PRO": 0.3905 } }, { "id": "neopolita/jessi-v0.3-falcon3-7b-instruct", "name": "jessi-v0.3-falcon3-7b-instruct", "developer": "neopolita", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7509, "hfopenllm_v2/BBH": 0.5388, "hfopenllm_v2/MATH Level 5": 0.1888, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4692, "hfopenllm_v2/MMLU-PRO": 0.397 } }, { "id": "neopolita/jessi-v0.4-falcon3-7b-instruct", "name": "jessi-v0.4-falcon3-7b-instruct", "developer": "neopolita", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7604, "hfopenllm_v2/BBH": 0.5522, "hfopenllm_v2/MATH Level 5": 0.3769, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4971, "hfopenllm_v2/MMLU-PRO": 0.4004 } }, { "id": "neopolita/jessi-v0.5-falcon3-7b-instruct", "name": "jessi-v0.5-falcon3-7b-instruct", "developer": "neopolita", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7412, "hfopenllm_v2/BBH": 0.559, "hfopenllm_v2/MATH Level 5": 0.3739, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4865, "hfopenllm_v2/MMLU-PRO": 0.3966 } }, { "id": "neopolita/jessi-v0.6-falcon3-7b-instruct", "name": "jessi-v0.6-falcon3-7b-instruct", "developer": "neopolita", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7402, "hfopenllm_v2/BBH": 0.5509, "hfopenllm_v2/MATH Level 5": 0.3565, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4904, "hfopenllm_v2/MMLU-PRO": 0.3957 } }, { "id": "neopolita/loki-v0.1-virtuoso", "name": "loki-v0.1-virtuoso", "developer": "neopolita", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7819, "hfopenllm_v2/BBH": 0.6467, "hfopenllm_v2/MATH Level 5": 0.3391, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.4375, "hfopenllm_v2/MMLU-PRO": 0.5129 } }, { "id": "netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b", "name": "DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.115, "hfopenllm_v2/BBH": 0.2877, "hfopenllm_v2/MATH Level 5": 0.0015, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3724, "hfopenllm_v2/MMLU-PRO": 0.109 } }, { "id": "netcat420/DeepSeek-R1-MFANN-TIES-unretrained-7b", "name": "DeepSeek-R1-MFANN-TIES-unretrained-7b", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2587, "hfopenllm_v2/BBH": 0.3086, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.255, "hfopenllm_v2/MUSR": 0.3527, "hfopenllm_v2/MMLU-PRO": 0.1145 } }, { "id": "netcat420/Llama3.1-MFANN-8b", "name": "Llama3.1-MFANN-8b", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.297, "hfopenllm_v2/BBH": 0.4281, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3379, "hfopenllm_v2/MMLU-PRO": 0.2725 } }, { "id": "netcat420/MFANN-abliterated-phi2-merge-unretrained", "name": "MFANN-abliterated-phi2-merge-unretrained", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3005, "hfopenllm_v2/BBH": 0.4104, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3183, "hfopenllm_v2/MMLU-PRO": 0.1478 } }, { "id": "netcat420/MFANN-llama3.1-Abliterated-SLERP", "name": "MFANN-llama3.1-Abliterated-SLERP", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2591, "hfopenllm_v2/BBH": 0.4574, "hfopenllm_v2/MATH Level 5": 0.0483, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3809, "hfopenllm_v2/MMLU-PRO": 0.2928 } }, { "id": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-TIES", "name": "MFANN-Llama3.1-Abliterated-Slerp-TIES", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4293, "hfopenllm_v2/BBH": 0.4968, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.3687, "hfopenllm_v2/MMLU-PRO": 0.3531 } }, { "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V2", "name": "MFANN-Llama3.1-Abliterated-SLERP-TIES-V2", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.421, "hfopenllm_v2/BBH": 0.4924, "hfopenllm_v2/MATH Level 5": 0.0763, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.3728, "hfopenllm_v2/MMLU-PRO": 0.3522 } }, { "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-TIES-V3", "name": "MFANN-Llama3.1-Abliterated-SLERP-TIES-V3", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4238, "hfopenllm_v2/BBH": 0.4914, "hfopenllm_v2/MATH Level 5": 0.0755, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.3741, "hfopenllm_v2/MMLU-PRO": 0.349 } }, { "id": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3", "name": "MFANN-llama3.1-abliterated-SLERP-v3", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3799, "hfopenllm_v2/BBH": 0.4931, "hfopenllm_v2/MATH Level 5": 0.0642, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.366, "hfopenllm_v2/MMLU-PRO": 0.3531 } }, { "id": "netcat420/MFANN-llama3.1-abliterated-SLERP-v3.1", "name": "MFANN-llama3.1-abliterated-SLERP-v3.1", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4202, "hfopenllm_v2/BBH": 0.4921, "hfopenllm_v2/MATH Level 5": 0.0695, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.3686, "hfopenllm_v2/MMLU-PRO": 0.3543 } }, { "id": "netcat420/MFANN-Llama3.1-Abliterated-Slerp-V3.2", "name": "MFANN-Llama3.1-Abliterated-Slerp-V3.2", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4128, "hfopenllm_v2/BBH": 0.4978, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3754, "hfopenllm_v2/MMLU-PRO": 0.3527 } }, { "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V4", "name": "MFANN-Llama3.1-Abliterated-SLERP-V4", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4169, "hfopenllm_v2/BBH": 0.4909, "hfopenllm_v2/MATH Level 5": 0.068, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.3821, "hfopenllm_v2/MMLU-PRO": 0.3516 } }, { "id": "netcat420/MFANN-Llama3.1-Abliterated-SLERP-V5", "name": "MFANN-Llama3.1-Abliterated-SLERP-V5", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4329, "hfopenllm_v2/BBH": 0.4952, "hfopenllm_v2/MATH Level 5": 0.0816, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.3781, "hfopenllm_v2/MMLU-PRO": 0.3445 } }, { "id": "netcat420/MFANN-llama3.1-abliterated-v2", "name": "MFANN-llama3.1-abliterated-v2", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4429, "hfopenllm_v2/BBH": 0.4941, "hfopenllm_v2/MATH Level 5": 0.074, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.3845, "hfopenllm_v2/MMLU-PRO": 0.3491 } }, { "id": "netcat420/MFANN-phigments-slerp-V2", "name": "MFANN-phigments-slerp-V2", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3232, "hfopenllm_v2/BBH": 0.4827, "hfopenllm_v2/MATH Level 5": 0.0317, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.4037, "hfopenllm_v2/MMLU-PRO": 0.2717 } }, { "id": "netcat420/MFANN-phigments-slerp-V3.2", "name": "MFANN-phigments-slerp-V3.2", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3524, "hfopenllm_v2/BBH": 0.4809, "hfopenllm_v2/MATH Level 5": 0.0332, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3708, "hfopenllm_v2/MMLU-PRO": 0.2705 } }, { "id": "netcat420/MFANN-phigments-slerp-V3.3", "name": "MFANN-phigments-slerp-V3.3", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3691, "hfopenllm_v2/BBH": 0.4895, "hfopenllm_v2/MATH Level 5": 0.0332, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3892, "hfopenllm_v2/MMLU-PRO": 0.2803 } }, { "id": "netcat420/MFANN-SFT", "name": "MFANN-SFT", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3682, "hfopenllm_v2/BBH": 0.4852, "hfopenllm_v2/MATH Level 5": 0.0597, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.3725, "hfopenllm_v2/MMLU-PRO": 0.3336 } }, { "id": "netcat420/MFANN3b", "name": "MFANN3b", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2524, "hfopenllm_v2/BBH": 0.4433, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.3606, "hfopenllm_v2/MMLU-PRO": 0.2306 } }, { "id": "netcat420/MFANN3bv0.15", "name": "MFANN3bv0.15", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2012, "hfopenllm_v2/BBH": 0.4539, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3958, "hfopenllm_v2/MMLU-PRO": 0.2468 } }, { "id": "netcat420/MFANN3bv0.18", "name": "MFANN3bv0.18", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2206, "hfopenllm_v2/BBH": 0.4514, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.4024, "hfopenllm_v2/MMLU-PRO": 0.25 } }, { "id": "netcat420/MFANN3bv0.19", "name": "MFANN3bv0.19", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2258, "hfopenllm_v2/BBH": 0.4516, "hfopenllm_v2/MATH Level 5": 0.0227, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.4024, "hfopenllm_v2/MMLU-PRO": 0.252 } }, { "id": "netcat420/MFANN3bv0.20", "name": "MFANN3bv0.20", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2193, "hfopenllm_v2/BBH": 0.4493, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.4077, "hfopenllm_v2/MMLU-PRO": 0.25 } }, { "id": "netcat420/MFANN3bv0.21", "name": "MFANN3bv0.21", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1909, "hfopenllm_v2/BBH": 0.447, "hfopenllm_v2/MATH Level 5": 0.0317, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3759, "hfopenllm_v2/MMLU-PRO": 0.2393 } }, { "id": "netcat420/MFANN3bv0.22", "name": "MFANN3bv0.22", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1979, "hfopenllm_v2/BBH": 0.4485, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3521, "hfopenllm_v2/MMLU-PRO": 0.2517 } }, { "id": "netcat420/MFANN3bv0.23", "name": "MFANN3bv0.23", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2048, "hfopenllm_v2/BBH": 0.4495, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3427, "hfopenllm_v2/MMLU-PRO": 0.2418 } }, { "id": "netcat420/MFANN3bv0.24", "name": "MFANN3bv0.24", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.22, "hfopenllm_v2/BBH": 0.4407, "hfopenllm_v2/MATH Level 5": 0.0279, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3521, "hfopenllm_v2/MMLU-PRO": 0.2352 } }, { "id": "netcat420/MFANN3bv1.1", "name": "MFANN3bv1.1", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2507, "hfopenllm_v2/BBH": 0.3397, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3223, "hfopenllm_v2/MMLU-PRO": 0.1159 } }, { "id": "netcat420/MFANN3bv1.2", "name": "MFANN3bv1.2", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2686, "hfopenllm_v2/BBH": 0.366, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3156, "hfopenllm_v2/MMLU-PRO": 0.145 } }, { "id": "netcat420/MFANN3bv1.3", "name": "MFANN3bv1.3", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2547, "hfopenllm_v2/BBH": 0.4456, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3299, "hfopenllm_v2/MMLU-PRO": 0.2276 } }, { "id": "netcat420/MFANN3bv1.4", "name": "MFANN3bv1.4", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3524, "hfopenllm_v2/BBH": 0.4809, "hfopenllm_v2/MATH Level 5": 0.037, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.3708, "hfopenllm_v2/MMLU-PRO": 0.2705 } }, { "id": "netcat420/MFANNv0.19", "name": "MFANNv0.19", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3057, "hfopenllm_v2/BBH": 0.4731, "hfopenllm_v2/MATH Level 5": 0.0415, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.3527, "hfopenllm_v2/MMLU-PRO": 0.2473 } }, { "id": "netcat420/MFANNv0.20", "name": "MFANNv0.20", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3479, "hfopenllm_v2/BBH": 0.4574, "hfopenllm_v2/MATH Level 5": 0.0498, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3874, "hfopenllm_v2/MMLU-PRO": 0.3202 } }, { "id": "netcat420/MFANNv0.21", "name": "MFANNv0.21", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3233, "hfopenllm_v2/BBH": 0.4576, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.3993, "hfopenllm_v2/MMLU-PRO": 0.3031 } }, { "id": "netcat420/MFANNv0.22.1", "name": "MFANNv0.22.1", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3089, "hfopenllm_v2/BBH": 0.4661, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3753, "hfopenllm_v2/MMLU-PRO": 0.3343 } }, { "id": "netcat420/MFANNv0.23", "name": "MFANNv0.23", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3127, "hfopenllm_v2/BBH": 0.4898, "hfopenllm_v2/MATH Level 5": 0.0498, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.3768, "hfopenllm_v2/MMLU-PRO": 0.3388 } }, { "id": "netcat420/MFANNv0.24", "name": "MFANNv0.24", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3162, "hfopenllm_v2/BBH": 0.479, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.3754, "hfopenllm_v2/MMLU-PRO": 0.3348 } }, { "id": "netcat420/MFANNv0.25", "name": "MFANNv0.25", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3467, "hfopenllm_v2/BBH": 0.4794, "hfopenllm_v2/MATH Level 5": 0.0582, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3688, "hfopenllm_v2/MMLU-PRO": 0.3343 } }, { "id": "netcat420/Qwen2.5-7b-MFANN-slerp", "name": "Qwen2.5-7b-MFANN-slerp", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6532, "hfopenllm_v2/BBH": 0.5089, "hfopenllm_v2/MATH Level 5": 0.287, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4073, "hfopenllm_v2/MMLU-PRO": 0.3417 } }, { "id": "netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp", "name": "Qwen2.5-7b-nerd-uncensored-MFANN-slerp", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1564, "hfopenllm_v2/BBH": 0.292, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3792, "hfopenllm_v2/MMLU-PRO": 0.11 } }, { "id": "netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN", "name": "Qwen2.5-7B-nerd-uncensored-v0.9-MFANN", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5878, "hfopenllm_v2/BBH": 0.5237, "hfopenllm_v2/MATH Level 5": 0.3376, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3926, "hfopenllm_v2/MMLU-PRO": 0.3904 } }, { "id": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN", "name": "Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5742, "hfopenllm_v2/BBH": 0.5071, "hfopenllm_v2/MATH Level 5": 0.2568, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4058, "hfopenllm_v2/MMLU-PRO": 0.3157 } }, { "id": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained", "name": "Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6486, "hfopenllm_v2/BBH": 0.5066, "hfopenllm_v2/MATH Level 5": 0.2991, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4152, "hfopenllm_v2/MMLU-PRO": 0.3432 } }, { "id": "netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b", "name": "Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2676, "hfopenllm_v2/BBH": 0.3789, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2324, "hfopenllm_v2/MUSR": 0.3528, "hfopenllm_v2/MMLU-PRO": 0.1677 } }, { "id": "netcat420/Qwen2.5-MFANN-7b", "name": "Qwen2.5-MFANN-7b", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6097, "hfopenllm_v2/BBH": 0.5054, "hfopenllm_v2/MATH Level 5": 0.2787, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4021, "hfopenllm_v2/MMLU-PRO": 0.3233 } }, { "id": "netcat420/qwen2.5-MFANN-7b-SLERP-V1.2", "name": "qwen2.5-MFANN-7b-SLERP-V1.2", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6606, "hfopenllm_v2/BBH": 0.5111, "hfopenllm_v2/MATH Level 5": 0.287, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4259, "hfopenllm_v2/MMLU-PRO": 0.3438 } }, { "id": "netcat420/qwen2.5-MFANN-7b-SLERPv1.1", "name": "qwen2.5-MFANN-7b-SLERPv1.1", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6555, "hfopenllm_v2/BBH": 0.5075, "hfopenllm_v2/MATH Level 5": 0.2968, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4126, "hfopenllm_v2/MMLU-PRO": 0.3448 } }, { "id": "netcat420/qwen2.5-MFANN-7b-v1.1", "name": "qwen2.5-MFANN-7b-v1.1", "developer": "netcat420", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6088, "hfopenllm_v2/BBH": 0.4967, "hfopenllm_v2/MATH Level 5": 0.2825, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.4114, "hfopenllm_v2/MMLU-PRO": 0.3248 } }, { "id": "netease-youdao/Confucius-o1-14B", "name": "Confucius-o1-14B", "developer": "netease-youdao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6378, "hfopenllm_v2/BBH": 0.63, "hfopenllm_v2/MATH Level 5": 0.4313, "hfopenllm_v2/GPQA": 0.3649, "hfopenllm_v2/MUSR": 0.4338, "hfopenllm_v2/MMLU-PRO": 0.5265 } }, { "id": "NeverSleep/Lumimaid-v0.2-12B", "name": "Lumimaid-v0.2-12B", "developer": "NeverSleep", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1099, "hfopenllm_v2/BBH": 0.5396, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4821, "hfopenllm_v2/MMLU-PRO": 0.3511 } }, { "id": "NeverSleep/Lumimaid-v0.2-8B", "name": "Lumimaid-v0.2-8B", "developer": "NeverSleep", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5038, "hfopenllm_v2/BBH": 0.5238, "hfopenllm_v2/MATH Level 5": 0.1435, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4303, "hfopenllm_v2/MMLU-PRO": 0.3636 } }, { "id": "newsbang/Homer-7B-v0.1", "name": "Homer-7B-v0.1", "developer": "newsbang", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6109, "hfopenllm_v2/BBH": 0.5601, "hfopenllm_v2/MATH Level 5": 0.386, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4357, "hfopenllm_v2/MMLU-PRO": 0.4475 } }, { "id": "newsbang/Homer-7B-v0.2", "name": "Homer-7B-v0.2", "developer": "newsbang", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7494, "hfopenllm_v2/BBH": 0.5517, "hfopenllm_v2/MATH Level 5": 0.2477, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.4298, "hfopenllm_v2/MMLU-PRO": 0.441 } }, { "id": "newsbang/Homer-v0.3-Qwen2.5-7B", "name": "Homer-v0.3-Qwen2.5-7B", "developer": "newsbang", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5154, "hfopenllm_v2/BBH": 0.5481, "hfopenllm_v2/MATH Level 5": 0.3089, "hfopenllm_v2/GPQA": 0.3339, "hfopenllm_v2/MUSR": 0.4744, "hfopenllm_v2/MMLU-PRO": 0.4456 } }, { "id": "newsbang/Homer-v0.4-Qwen2.5-7B", "name": "Homer-v0.4-Qwen2.5-7B", "developer": "newsbang", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7999, "hfopenllm_v2/BBH": 0.5533, "hfopenllm_v2/MATH Level 5": 0.2779, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.4311, "hfopenllm_v2/MMLU-PRO": 0.4363 } }, { "id": "newsbang/Homer-v0.5-Qwen2.5-7B", "name": "Homer-v0.5-Qwen2.5-7B", "developer": "newsbang", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7881, "hfopenllm_v2/BBH": 0.554, "hfopenllm_v2/MATH Level 5": 0.3724, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4193, "hfopenllm_v2/MMLU-PRO": 0.4369 } }, { "id": "newsbang/Homer-v1.0-Qwen2.5-72B", "name": "Homer-v1.0-Qwen2.5-72B", "developer": "newsbang", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7628, "hfopenllm_v2/BBH": 0.731, "hfopenllm_v2/MATH Level 5": 0.4902, "hfopenllm_v2/GPQA": 0.4161, "hfopenllm_v2/MUSR": 0.4677, "hfopenllm_v2/MMLU-PRO": 0.6145 } }, { "id": "newsbang/Homer-v1.0-Qwen2.5-7B", "name": "Homer-v1.0-Qwen2.5-7B", "developer": "newsbang", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6393, "hfopenllm_v2/BBH": 0.5655, "hfopenllm_v2/MATH Level 5": 0.3323, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.4278, "hfopenllm_v2/MMLU-PRO": 0.4535 } }, { "id": "Nexesenex/Dolphin3.0-Llama3.1-1B-abliterated", "name": "Dolphin3.0-Llama3.1-1B-abliterated", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5312, "hfopenllm_v2/BBH": 0.3241, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.2408, "hfopenllm_v2/MUSR": 0.3237, "hfopenllm_v2/MMLU-PRO": 0.1373 } }, { "id": "Nexesenex/Llama_3.1_8b_DeepDive_3_Prev_v1.0", "name": "Llama_3.1_8b_DeepDive_3_Prev_v1.0", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6809, "hfopenllm_v2/BBH": 0.5155, "hfopenllm_v2/MATH Level 5": 0.1866, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.3666, "hfopenllm_v2/MMLU-PRO": 0.3438 } }, { "id": "Nexesenex/Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0", "name": "Llama_3.1_8b_DeepDive_3_R1_Prev_v1.0", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7101, "hfopenllm_v2/BBH": 0.512, "hfopenllm_v2/MATH Level 5": 0.1926, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.3758, "hfopenllm_v2/MMLU-PRO": 0.3441 } }, { "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.01", "name": "Llama_3.1_8b_DoberWild_v2.01", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7996, "hfopenllm_v2/BBH": 0.5251, "hfopenllm_v2/MATH Level 5": 0.2002, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4012, "hfopenllm_v2/MMLU-PRO": 0.3791 } }, { "id": "Nexesenex/Llama_3.1_8b_DoberWild_v2.03", "name": "Llama_3.1_8b_DoberWild_v2.03", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7764, "hfopenllm_v2/BBH": 0.5294, "hfopenllm_v2/MATH Level 5": 0.2077, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.3906, "hfopenllm_v2/MMLU-PRO": 0.3722 } }, { "id": "Nexesenex/Llama_3.1_8b_DobHerWild_R1_v1.1R", "name": "Llama_3.1_8b_DobHerWild_R1_v1.1R", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.76, "hfopenllm_v2/BBH": 0.5257, "hfopenllm_v2/MATH Level 5": 0.2319, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.3852, "hfopenllm_v2/MMLU-PRO": 0.3688 } }, { "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.01", "name": "Llama_3.1_8b_DodoWild_v2.01", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7978, "hfopenllm_v2/BBH": 0.5253, "hfopenllm_v2/MATH Level 5": 0.1986, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.409, "hfopenllm_v2/MMLU-PRO": 0.3738 } }, { "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.02", "name": "Llama_3.1_8b_DodoWild_v2.02", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8017, "hfopenllm_v2/BBH": 0.5262, "hfopenllm_v2/MATH Level 5": 0.2273, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.3971, "hfopenllm_v2/MMLU-PRO": 0.3761 } }, { "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.03", "name": "Llama_3.1_8b_DodoWild_v2.03", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7941, "hfopenllm_v2/BBH": 0.5308, "hfopenllm_v2/MATH Level 5": 0.2221, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.3959, "hfopenllm_v2/MMLU-PRO": 0.3786 } }, { "id": "Nexesenex/Llama_3.1_8b_DodoWild_v2.10", "name": "Llama_3.1_8b_DodoWild_v2.10", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8054, "hfopenllm_v2/BBH": 0.5278, "hfopenllm_v2/MATH Level 5": 0.1971, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4157, "hfopenllm_v2/MMLU-PRO": 0.3855 } }, { "id": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.01", "name": "Llama_3.1_8b_Dolermed_R1_V1.01", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7534, "hfopenllm_v2/BBH": 0.5312, "hfopenllm_v2/MATH Level 5": 0.2017, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.3747, "hfopenllm_v2/MMLU-PRO": 0.3733 } }, { "id": "Nexesenex/Llama_3.1_8b_Dolermed_R1_V1.03", "name": "Llama_3.1_8b_Dolermed_R1_V1.03", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7564, "hfopenllm_v2/BBH": 0.5316, "hfopenllm_v2/MATH Level 5": 0.2092, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.38, "hfopenllm_v2/MMLU-PRO": 0.372 } }, { "id": "Nexesenex/Llama_3.1_8b_Dolermed_V1.01", "name": "Llama_3.1_8b_Dolermed_V1.01", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5087, "hfopenllm_v2/BBH": 0.5194, "hfopenllm_v2/MATH Level 5": 0.1344, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3945, "hfopenllm_v2/MMLU-PRO": 0.357 } }, { "id": "Nexesenex/Llama_3.1_8b_Dolerstormed_V1.04", "name": "Llama_3.1_8b_Dolerstormed_V1.04", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7889, "hfopenllm_v2/BBH": 0.5195, "hfopenllm_v2/MATH Level 5": 0.1926, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.403, "hfopenllm_v2/MMLU-PRO": 0.3889 } }, { "id": "Nexesenex/Llama_3.1_8b_Hermedash_R1_V1.04", "name": "Llama_3.1_8b_Hermedash_R1_V1.04", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7872, "hfopenllm_v2/BBH": 0.5192, "hfopenllm_v2/MATH Level 5": 0.1866, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4111, "hfopenllm_v2/MMLU-PRO": 0.3882 } }, { "id": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.01", "name": "Llama_3.1_8b_Hermedive_R1_V1.01", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5001, "hfopenllm_v2/BBH": 0.5171, "hfopenllm_v2/MATH Level 5": 0.1775, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4008, "hfopenllm_v2/MMLU-PRO": 0.3427 } }, { "id": "Nexesenex/Llama_3.1_8b_Hermedive_R1_V1.03", "name": "Llama_3.1_8b_Hermedive_R1_V1.03", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6648, "hfopenllm_v2/BBH": 0.5141, "hfopenllm_v2/MATH Level 5": 0.1858, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.3613, "hfopenllm_v2/MMLU-PRO": 0.3488 } }, { "id": "Nexesenex/Llama_3.1_8b_Hermedive_V1.01", "name": "Llama_3.1_8b_Hermedive_V1.01", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5062, "hfopenllm_v2/BBH": 0.4918, "hfopenllm_v2/MATH Level 5": 0.1647, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.3697, "hfopenllm_v2/MMLU-PRO": 0.3551 } }, { "id": "Nexesenex/Llama_3.1_8b_Mediver_V1.01", "name": "Llama_3.1_8b_Mediver_V1.01", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1885, "hfopenllm_v2/BBH": 0.4415, "hfopenllm_v2/MATH Level 5": 0.0015, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3898, "hfopenllm_v2/MMLU-PRO": 0.2994 } }, { "id": "Nexesenex/Llama_3.1_8b_Medusa_v1.01", "name": "Llama_3.1_8b_Medusa_v1.01", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7685, "hfopenllm_v2/BBH": 0.5018, "hfopenllm_v2/MATH Level 5": 0.1465, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4067, "hfopenllm_v2/MMLU-PRO": 0.3531 } }, { "id": "Nexesenex/Llama_3.1_8b_Smarteaz_0.2_R1", "name": "Llama_3.1_8b_Smarteaz_0.2_R1", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6346, "hfopenllm_v2/BBH": 0.5113, "hfopenllm_v2/MATH Level 5": 0.2606, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4188, "hfopenllm_v2/MMLU-PRO": 0.3645 } }, { "id": "Nexesenex/Llama_3.1_8b_Smarteaz_V1.01", "name": "Llama_3.1_8b_Smarteaz_V1.01", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8151, "hfopenllm_v2/BBH": 0.5241, "hfopenllm_v2/MATH Level 5": 0.2341, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.3789, "hfopenllm_v2/MMLU-PRO": 0.3736 } }, { "id": "Nexesenex/Llama_3.1_8b_Stormeder_v1.04", "name": "Llama_3.1_8b_Stormeder_v1.04", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7853, "hfopenllm_v2/BBH": 0.5207, "hfopenllm_v2/MATH Level 5": 0.185, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.3949, "hfopenllm_v2/MMLU-PRO": 0.3852 } }, { "id": "Nexesenex/Llama_3.1_8b_Typhoon_v1.03", "name": "Llama_3.1_8b_Typhoon_v1.03", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8078, "hfopenllm_v2/BBH": 0.5314, "hfopenllm_v2/MATH Level 5": 0.2273, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.3815, "hfopenllm_v2/MMLU-PRO": 0.3842 } }, { "id": "Nexesenex/Llama_3.2_1b_AquaSyn_0.1", "name": "Llama_3.2_1b_AquaSyn_0.1", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2741, "hfopenllm_v2/BBH": 0.3284, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.346, "hfopenllm_v2/MMLU-PRO": 0.1378 } }, { "id": "Nexesenex/Llama_3.2_1b_AquaSyn_0.11", "name": "Llama_3.2_1b_AquaSyn_0.11", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2431, "hfopenllm_v2/BBH": 0.3112, "hfopenllm_v2/MATH Level 5": 0.0234, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3368, "hfopenllm_v2/MMLU-PRO": 0.1116 } }, { "id": "Nexesenex/Llama_3.2_1b_Dolto_0.1", "name": "Llama_3.2_1b_Dolto_0.1", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5434, "hfopenllm_v2/BBH": 0.335, "hfopenllm_v2/MATH Level 5": 0.037, "hfopenllm_v2/GPQA": 0.2374, "hfopenllm_v2/MUSR": 0.3421, "hfopenllm_v2/MMLU-PRO": 0.1364 } }, { "id": "Nexesenex/Llama_3.2_1b_Odyssea_V1", "name": "Llama_3.2_1b_Odyssea_V1", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2553, "hfopenllm_v2/BBH": 0.301, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3394, "hfopenllm_v2/MMLU-PRO": 0.1153 } }, { "id": "Nexesenex/Llama_3.2_1b_Odyssea_V1.01", "name": "Llama_3.2_1b_Odyssea_V1.01", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2495, "hfopenllm_v2/BBH": 0.3045, "hfopenllm_v2/MATH Level 5": 0.0174, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.342, "hfopenllm_v2/MMLU-PRO": 0.1152 } }, { "id": "Nexesenex/Llama_3.2_1b_OpenTree_R1_0.1", "name": "Llama_3.2_1b_OpenTree_R1_0.1", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5366, "hfopenllm_v2/BBH": 0.328, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3131, "hfopenllm_v2/MMLU-PRO": 0.1675 } }, { "id": "Nexesenex/Llama_3.2_1b_OrcaSun_V1", "name": "Llama_3.2_1b_OrcaSun_V1", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5949, "hfopenllm_v2/BBH": 0.355, "hfopenllm_v2/MATH Level 5": 0.0597, "hfopenllm_v2/GPQA": 0.2366, "hfopenllm_v2/MUSR": 0.338, "hfopenllm_v2/MMLU-PRO": 0.1904 } }, { "id": "Nexesenex/Llama_3.2_1b_RandomLego_RP_R1_0.1", "name": "Llama_3.2_1b_RandomLego_RP_R1_0.1", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5543, "hfopenllm_v2/BBH": 0.3428, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3249, "hfopenllm_v2/MMLU-PRO": 0.1563 } }, { "id": "Nexesenex/Llama_3.2_1b_SunOrca_V1", "name": "Llama_3.2_1b_SunOrca_V1", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.543, "hfopenllm_v2/BBH": 0.3431, "hfopenllm_v2/MATH Level 5": 0.0672, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3262, "hfopenllm_v2/MMLU-PRO": 0.1884 } }, { "id": "Nexesenex/Llama_3.2_1b_Sydonia_0.1", "name": "Llama_3.2_1b_Sydonia_0.1", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2197, "hfopenllm_v2/BBH": 0.3121, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2282, "hfopenllm_v2/MUSR": 0.3382, "hfopenllm_v2/MMLU-PRO": 0.1224 } }, { "id": "Nexesenex/Llama_3.2_1b_Syneridol_0.2", "name": "Llama_3.2_1b_Syneridol_0.2", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2157, "hfopenllm_v2/BBH": 0.3139, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2349, "hfopenllm_v2/MUSR": 0.3343, "hfopenllm_v2/MMLU-PRO": 0.1227 } }, { "id": "Nexesenex/Llama_3.2_1b_Synopsys_0.1", "name": "Llama_3.2_1b_Synopsys_0.1", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1764, "hfopenllm_v2/BBH": 0.3162, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2391, "hfopenllm_v2/MUSR": 0.3461, "hfopenllm_v2/MMLU-PRO": 0.1231 } }, { "id": "Nexesenex/Llama_3.2_1b_Synopsys_0.11", "name": "Llama_3.2_1b_Synopsys_0.11", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2842, "hfopenllm_v2/BBH": 0.3102, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3513, "hfopenllm_v2/MMLU-PRO": 0.1123 } }, { "id": "Nexesenex/Llama_3.2_3b_Kermes_v1", "name": "Llama_3.2_3b_Kermes_v1", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4852, "hfopenllm_v2/BBH": 0.441, "hfopenllm_v2/MATH Level 5": 0.031, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.407, "hfopenllm_v2/MMLU-PRO": 0.2547 } }, { "id": "Nexesenex/Llama_3.2_3b_Kermes_v2", "name": "Llama_3.2_3b_Kermes_v2", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5754, "hfopenllm_v2/BBH": 0.4455, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3778, "hfopenllm_v2/MMLU-PRO": 0.2734 } }, { "id": "Nexesenex/Llama_3.2_3b_Kermes_v2.1", "name": "Llama_3.2_3b_Kermes_v2.1", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5584, "hfopenllm_v2/BBH": 0.4464, "hfopenllm_v2/MATH Level 5": 0.0521, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3964, "hfopenllm_v2/MMLU-PRO": 0.2692 } }, { "id": "Nexesenex/Nemotron_W_4b_Halo_0.1", "name": "Nemotron_W_4b_Halo_0.1", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3627, "hfopenllm_v2/BBH": 0.4135, "hfopenllm_v2/MATH Level 5": 0.0423, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.4165, "hfopenllm_v2/MMLU-PRO": 0.2505 } }, { "id": "Nexesenex/Nemotron_W_4b_MagLight_0.1", "name": "Nemotron_W_4b_MagLight_0.1", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.423, "hfopenllm_v2/BBH": 0.4231, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4112, "hfopenllm_v2/MMLU-PRO": 0.2545 } }, { "id": "Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL", "name": "pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.589, "hfopenllm_v2/BBH": 0.3562, "hfopenllm_v2/MATH Level 5": 0.0748, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3396, "hfopenllm_v2/MMLU-PRO": 0.1803 } }, { "id": "Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a", "name": "Qwen_2.5_3b_Smarteaz_0.01a", "developer": "Nexesenex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4012, "hfopenllm_v2/BBH": 0.4637, "hfopenllm_v2/MATH Level 5": 0.1805, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.432, "hfopenllm_v2/MMLU-PRO": 0.286 } }, { "id": "Nexusflow/NexusRaven-V2-13B", "name": "NexusRaven-V2-13B", "developer": "Nexusflow", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1791, "hfopenllm_v2/BBH": 0.3949, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3737, "hfopenllm_v2/MMLU-PRO": 0.1872 } }, { "id": "Nexusflow/Starling-RM-34B", "name": "Nexusflow/Starling-RM-34B", "developer": "Nexusflow", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8133, "reward-bench/Factuality": 0.4589, "reward-bench/Precise IF": 0.3187, "reward-bench/Math": 0.6175, "reward-bench/Safety": 0.877, "reward-bench/Focus": 0.4808, "reward-bench/Ties": 0.1004, "reward-bench/Chat": 0.9693, "reward-bench/Chat Hard": 0.5724, "reward-bench/Reasoning": 0.8845, "reward-bench/Prior Sets (0.5 weight)": 0.7137 } }, { "id": "nguyentd/FinancialAdvice-Qwen2.5-7B", "name": "FinancialAdvice-Qwen2.5-7B", "developer": "nguyentd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4496, "hfopenllm_v2/BBH": 0.4731, "hfopenllm_v2/MATH Level 5": 0.1148, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4025, "hfopenllm_v2/MMLU-PRO": 0.3752 } }, { "id": "ngxson/MiniThinky-1B-Llama-3.2", "name": "MiniThinky-1B-Llama-3.2", "developer": "ngxson", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2771, "hfopenllm_v2/BBH": 0.3142, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2391, "hfopenllm_v2/MUSR": 0.3434, "hfopenllm_v2/MMLU-PRO": 0.1147 } }, { "id": "ngxson/MiniThinky-v2-1B-Llama-3.2", "name": "MiniThinky-v2-1B-Llama-3.2", "developer": "ngxson", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2963, "hfopenllm_v2/BBH": 0.3205, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2399, "hfopenllm_v2/MUSR": 0.3356, "hfopenllm_v2/MMLU-PRO": 0.1116 } }, { "id": "nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314", "name": "merge_Qwen2.5-7B-Instruct_20241023_0314", "developer": "nhyha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5695, "hfopenllm_v2/BBH": 0.5559, "hfopenllm_v2/MATH Level 5": 0.3542, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4251, "hfopenllm_v2/MMLU-PRO": 0.4542 } }, { "id": "nhyha/N3N_Delirium-v1_1030_0227", "name": "N3N_Delirium-v1_1030_0227", "developer": "nhyha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8023, "hfopenllm_v2/BBH": 0.5891, "hfopenllm_v2/MATH Level 5": 0.2107, "hfopenllm_v2/GPQA": 0.3372, "hfopenllm_v2/MUSR": 0.4098, "hfopenllm_v2/MMLU-PRO": 0.415 } }, { "id": "nhyha/N3N_gemma-2-9b-it_20241029_1532", "name": "N3N_gemma-2-9b-it_20241029_1532", "developer": "nhyha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6752, "hfopenllm_v2/BBH": 0.5863, "hfopenllm_v2/MATH Level 5": 0.2122, "hfopenllm_v2/GPQA": 0.3406, "hfopenllm_v2/MUSR": 0.4594, "hfopenllm_v2/MMLU-PRO": 0.4122 } }, { "id": "nhyha/N3N_gemma-2-9b-it_20241110_2026", "name": "N3N_gemma-2-9b-it_20241110_2026", "developer": "nhyha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6283, "hfopenllm_v2/BBH": 0.5867, "hfopenllm_v2/MATH Level 5": 0.1609, "hfopenllm_v2/GPQA": 0.3364, "hfopenllm_v2/MUSR": 0.4073, "hfopenllm_v2/MMLU-PRO": 0.402 } }, { "id": "nhyha/N3N_Llama-3.1-8B-Instruct_1028_0216", "name": "N3N_Llama-3.1-8B-Instruct_1028_0216", "developer": "nhyha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4796, "hfopenllm_v2/BBH": 0.5054, "hfopenllm_v2/MATH Level 5": 0.1707, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.405, "hfopenllm_v2/MMLU-PRO": 0.3638 } }, { "id": "nicolinho/QRM-Gemma-2-27B", "name": "nicolinho/QRM-Gemma-2-27B", "developer": "nicolinho", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9444, "reward-bench/Factuality": 0.7853, "reward-bench/Precise IF": 0.3719, "reward-bench/Math": 0.6995, "reward-bench/Safety": 0.927, "reward-bench/Focus": 0.9535, "reward-bench/Ties": 0.8321, "reward-bench/Chat": 0.9665, "reward-bench/Chat Hard": 0.9013, "reward-bench/Reasoning": 0.9826 } }, { "id": "nicolinho/QRM-Llama3-8B", "name": "nicolinho/QRM-Llama3-8B", "developer": "nicolinho", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.911, "reward-bench/Chat": 0.9581, "reward-bench/Chat Hard": 0.8114, "reward-bench/Safety": 0.8986, "reward-bench/Reasoning": 0.9758 } }, { "id": "nicolinho/QRM-Llama3.1-8B", "name": "nicolinho/QRM-Llama3.1-8B", "developer": "nicolinho", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9306, "reward-bench/Chat": 0.9441, "reward-bench/Chat Hard": 0.8969, "reward-bench/Safety": 0.923, "reward-bench/Reasoning": 0.9583 } }, { "id": "nicolinho/QRM-Llama3.1-8B-v2", "name": "nicolinho/QRM-Llama3.1-8B-v2", "developer": "nicolinho", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7074, "reward-bench/Chat": 0.9637, "reward-bench/Chat Hard": 0.8684, "reward-bench/Safety": 0.9467, "reward-bench/Reasoning": 0.9677, "reward-bench/Factuality": 0.6653, "reward-bench/Precise IF": 0.4062, "reward-bench/Math": 0.612, "reward-bench/Focus": 0.8909, "reward-bench/Ties": 0.7234 } }, { "id": "nidum/Nidum-Limitless-Gemma-2B", "name": "Nidum-Limitless-Gemma-2B", "developer": "nidum", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2424, "hfopenllm_v2/BBH": 0.3079, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.374, "hfopenllm_v2/MMLU-PRO": 0.1174 } }, { "id": "NikolaSigmoid/AceMath-1.5B-Instruct-1epoch", "name": "AceMath-1.5B-Instruct-1epoch", "developer": "NikolaSigmoid", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2849, "hfopenllm_v2/BBH": 0.4263, "hfopenllm_v2/MATH Level 5": 0.3051, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3925, "hfopenllm_v2/MMLU-PRO": 0.2376 } }, { "id": "NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200", "name": "AceMath-1.5B-Instruct-dolphin-r1-200", "developer": "NikolaSigmoid", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1808, "hfopenllm_v2/BBH": 0.2815, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.375, "hfopenllm_v2/MMLU-PRO": 0.1143 } }, { "id": "NikolaSigmoid/acemath-200", "name": "acemath-200", "developer": "NikolaSigmoid", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2849, "hfopenllm_v2/BBH": 0.4263, "hfopenllm_v2/MATH Level 5": 0.3051, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3925, "hfopenllm_v2/MMLU-PRO": 0.2376 } }, { "id": "NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500", "name": "DeepSeek-R1-Distill-Qwen-1.5B-500", "developer": "NikolaSigmoid", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1749, "hfopenllm_v2/BBH": 0.2602, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.338, "hfopenllm_v2/MMLU-PRO": 0.1125 } }, { "id": "NikolaSigmoid/phi-4-14b", "name": "phi-4-14b", "developer": "NikolaSigmoid", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0561, "hfopenllm_v2/BBH": 0.6695, "hfopenllm_v2/MATH Level 5": 0.2938, "hfopenllm_v2/GPQA": 0.4035, "hfopenllm_v2/MUSR": 0.5047, "hfopenllm_v2/MMLU-PRO": 0.5278 } }, { "id": "NikolaSigmoid/phi-4-1steps", "name": "phi-4-1steps", "developer": "NikolaSigmoid", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0528, "hfopenllm_v2/BBH": 0.6707, "hfopenllm_v2/MATH Level 5": 0.2983, "hfopenllm_v2/GPQA": 0.4018, "hfopenllm_v2/MUSR": 0.5021, "hfopenllm_v2/MMLU-PRO": 0.5273 } }, { "id": "NikolaSigmoid/phi-4-300steps", "name": "phi-4-300steps", "developer": "NikolaSigmoid", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0561, "hfopenllm_v2/BBH": 0.6701, "hfopenllm_v2/MATH Level 5": 0.2946, "hfopenllm_v2/GPQA": 0.4052, "hfopenllm_v2/MUSR": 0.5034, "hfopenllm_v2/MMLU-PRO": 0.5288 } }, { "id": "nisten/franqwenstein-35b", "name": "franqwenstein-35b", "developer": "nisten", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3914, "hfopenllm_v2/BBH": 0.6591, "hfopenllm_v2/MATH Level 5": 0.3044, "hfopenllm_v2/GPQA": 0.3591, "hfopenllm_v2/MUSR": 0.4681, "hfopenllm_v2/MMLU-PRO": 0.5611 } }, { "id": "nisten/tqwendo-36b", "name": "tqwendo-36b", "developer": "nisten", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6778, "hfopenllm_v2/BBH": 0.6432, "hfopenllm_v2/MATH Level 5": 0.4154, "hfopenllm_v2/GPQA": 0.3314, "hfopenllm_v2/MUSR": 0.443, "hfopenllm_v2/MMLU-PRO": 0.4381 } }, { "id": "Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420", "name": "Captain-Eris-BMO_Violent-GRPO-v0.420", "developer": "Nitral-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6313, "hfopenllm_v2/BBH": 0.5079, "hfopenllm_v2/MATH Level 5": 0.1314, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4228, "hfopenllm_v2/MMLU-PRO": 0.3596 } }, { "id": "Nitral-AI/Captain-Eris_BMO-Violent-12B", "name": "Captain-Eris_BMO-Violent-12B", "developer": "Nitral-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6152, "hfopenllm_v2/BBH": 0.5104, "hfopenllm_v2/MATH Level 5": 0.1367, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4255, "hfopenllm_v2/MMLU-PRO": 0.3571 } }, { "id": "Nitral-AI/Captain-Eris_Violet-GRPO-v0.420", "name": "Captain-Eris_Violet-GRPO-v0.420", "developer": "Nitral-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6262, "hfopenllm_v2/BBH": 0.5159, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4279, "hfopenllm_v2/MMLU-PRO": 0.3535 } }, { "id": "Nitral-AI/Captain-Eris_Violet-V0.420-12B", "name": "Captain-Eris_Violet-V0.420-12B", "developer": "Nitral-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4339, "hfopenllm_v2/BBH": 0.5478, "hfopenllm_v2/MATH Level 5": 0.1073, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4331, "hfopenllm_v2/MMLU-PRO": 0.3723 } }, { "id": "Nitral-AI/Captain_BMO-12B", "name": "Captain_BMO-12B", "developer": "Nitral-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4751, "hfopenllm_v2/BBH": 0.5286, "hfopenllm_v2/MATH Level 5": 0.1397, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.3748, "hfopenllm_v2/MMLU-PRO": 0.3569 } }, { "id": "Nitral-AI/Hathor_Stable-v0.2-L3-8B", "name": "Hathor_Stable-v0.2-L3-8B", "developer": "Nitral-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7175, "hfopenllm_v2/BBH": 0.5286, "hfopenllm_v2/MATH Level 5": 0.105, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.3781, "hfopenllm_v2/MMLU-PRO": 0.3696 } }, { "id": "Nitral-AI/Hathor_Tahsin-L3-8B-v0.85", "name": "Hathor_Tahsin-L3-8B-v0.85", "developer": "Nitral-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.711, "hfopenllm_v2/BBH": 0.5279, "hfopenllm_v2/MATH Level 5": 0.1005, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.3647, "hfopenllm_v2/MMLU-PRO": 0.372 } }, { "id": "Nitral-AI/Nera_Noctis-12B", "name": "Nera_Noctis-12B", "developer": "Nitral-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4562, "hfopenllm_v2/BBH": 0.5194, "hfopenllm_v2/MATH Level 5": 0.0876, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3979, "hfopenllm_v2/MMLU-PRO": 0.3468 } }, { "id": "NJS26/NJS_777", "name": "NJS_777", "developer": "NJS26", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1881, "hfopenllm_v2/BBH": 0.2178, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2064, "hfopenllm_v2/MUSR": 0.3538, "hfopenllm_v2/MMLU-PRO": 0.1163 } }, { "id": "NLPark/AnFeng_v3.1-Avocet", "name": "AnFeng_v3.1-Avocet", "developer": "NLPark", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5096, "hfopenllm_v2/BBH": 0.5829, "hfopenllm_v2/MATH Level 5": 0.1594, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4476, "hfopenllm_v2/MMLU-PRO": 0.4438 } }, { "id": "NLPark/B-and-W_Flycatcher-3AD1E", "name": "B-and-W_Flycatcher-3AD1E", "developer": "NLPark", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4908, "hfopenllm_v2/BBH": 0.6065, "hfopenllm_v2/MATH Level 5": 0.2379, "hfopenllm_v2/GPQA": 0.3305, "hfopenllm_v2/MUSR": 0.4423, "hfopenllm_v2/MMLU-PRO": 0.4741 } }, { "id": "NLPark/Shi-Ci-Robin-Test_3AD80", "name": "Shi-Ci-Robin-Test_3AD80", "developer": "NLPark", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7227, "hfopenllm_v2/BBH": 0.6705, "hfopenllm_v2/MATH Level 5": 0.3157, "hfopenllm_v2/GPQA": 0.3599, "hfopenllm_v2/MUSR": 0.4696, "hfopenllm_v2/MMLU-PRO": 0.5121 } }, { "id": "nlpguy/Lion-Lamarck-v.1.0.8", "name": "Lion-Lamarck-v.1.0.8", "developer": "nlpguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4509, "hfopenllm_v2/BBH": 0.5869, "hfopenllm_v2/MATH Level 5": 0.5544, "hfopenllm_v2/GPQA": 0.3582, "hfopenllm_v2/MUSR": 0.4673, "hfopenllm_v2/MMLU-PRO": 0.4643 } }, { "id": "nlpguy/Lion-Lamarck-v.1.0.9", "name": "Lion-Lamarck-v.1.0.9", "developer": "nlpguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3409, "hfopenllm_v2/BBH": 0.5918, "hfopenllm_v2/MATH Level 5": 0.5642, "hfopenllm_v2/GPQA": 0.3901, "hfopenllm_v2/MUSR": 0.53, "hfopenllm_v2/MMLU-PRO": 0.4704 } }, { "id": "nlpguy/Lion-Lamarck-v.1.1.0", "name": "Lion-Lamarck-v.1.1.0", "developer": "nlpguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3658, "hfopenllm_v2/BBH": 0.5962, "hfopenllm_v2/MATH Level 5": 0.5755, "hfopenllm_v2/GPQA": 0.3926, "hfopenllm_v2/MUSR": 0.5325, "hfopenllm_v2/MMLU-PRO": 0.4631 } }, { "id": "nlpguy/Miisce-one", "name": "Miisce-one", "developer": "nlpguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6066, "hfopenllm_v2/BBH": 0.6505, "hfopenllm_v2/MATH Level 5": 0.4169, "hfopenllm_v2/GPQA": 0.3859, "hfopenllm_v2/MUSR": 0.482, "hfopenllm_v2/MMLU-PRO": 0.5412 } }, { "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v1", "name": "Mistral-NeMo-Minitron-Upscale-v1", "developer": "nlpguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1648, "hfopenllm_v2/BBH": 0.4468, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3804, "hfopenllm_v2/MMLU-PRO": 0.2537 } }, { "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v2", "name": "Mistral-NeMo-Minitron-Upscale-v2", "developer": "nlpguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1573, "hfopenllm_v2/BBH": 0.395, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3791, "hfopenllm_v2/MMLU-PRO": 0.1927 } }, { "id": "nlpguy/Mistral-NeMo-Minitron-Upscale-v3", "name": "Mistral-NeMo-Minitron-Upscale-v3", "developer": "nlpguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1412, "hfopenllm_v2/BBH": 0.3052, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.4098, "hfopenllm_v2/MMLU-PRO": 0.1171 } }, { "id": "nlpguy/StableProse", "name": "StableProse", "developer": "nlpguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1972, "hfopenllm_v2/BBH": 0.5117, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4067, "hfopenllm_v2/MMLU-PRO": 0.3468 } }, { "id": "nlpguy/StarFusion-alpha1", "name": "StarFusion-alpha1", "developer": "nlpguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.566, "hfopenllm_v2/BBH": 0.4429, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4081, "hfopenllm_v2/MMLU-PRO": 0.3191 } }, { "id": "Nohobby/MS-Schisandra-22B-v0.1", "name": "MS-Schisandra-22B-v0.1", "developer": "Nohobby", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6331, "hfopenllm_v2/BBH": 0.579, "hfopenllm_v2/MATH Level 5": 0.2228, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.3928, "hfopenllm_v2/MMLU-PRO": 0.4096 } }, { "id": "Nohobby/MS-Schisandra-22B-v0.2", "name": "MS-Schisandra-22B-v0.2", "developer": "Nohobby", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6383, "hfopenllm_v2/BBH": 0.5841, "hfopenllm_v2/MATH Level 5": 0.2032, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.4075, "hfopenllm_v2/MMLU-PRO": 0.4136 } }, { "id": "noname0202/gemma-2-2b-it-ties", "name": "gemma-2-2b-it-ties", "developer": "noname0202", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1266, "hfopenllm_v2/BBH": 0.4206, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3929, "hfopenllm_v2/MMLU-PRO": 0.2561 } }, { "id": "noname0202/gemma-2-9b-sft-jp-en-zh-v1", "name": "gemma-2-9b-sft-jp-en-zh-v1", "developer": "noname0202", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2988, "hfopenllm_v2/BBH": 0.4519, "hfopenllm_v2/MATH Level 5": 0.0891, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.408, "hfopenllm_v2/MMLU-PRO": 0.3125 } }, { "id": "noname0202/gemma-2-9b-sft-jp-en-zh-v2", "name": "gemma-2-9b-sft-jp-en-zh-v2", "developer": "noname0202", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3993, "hfopenllm_v2/BBH": 0.4515, "hfopenllm_v2/MATH Level 5": 0.1042, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3612, "hfopenllm_v2/MMLU-PRO": 0.3675 } }, { "id": "noname0202/Llama-3.2-4x3B-Instruct", "name": "Llama-3.2-4x3B-Instruct", "developer": "noname0202", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7067, "hfopenllm_v2/BBH": 0.4647, "hfopenllm_v2/MATH Level 5": 0.1586, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3674, "hfopenllm_v2/MMLU-PRO": 0.3285 } }, { "id": "noname0202/llama-math-1b-r16-0to512tokens-test", "name": "llama-math-1b-r16-0to512tokens-test", "developer": "noname0202", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.547, "hfopenllm_v2/BBH": 0.3488, "hfopenllm_v2/MATH Level 5": 0.0816, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3143, "hfopenllm_v2/MMLU-PRO": 0.1728 } }, { "id": "noname0202/llama-math-1b-r32-0to512tokens-test", "name": "llama-math-1b-r32-0to512tokens-test", "developer": "noname0202", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5683, "hfopenllm_v2/BBH": 0.3495, "hfopenllm_v2/MATH Level 5": 0.0906, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3209, "hfopenllm_v2/MMLU-PRO": 0.176 } }, { "id": "noname0202/llama-math-1b-r32-test", "name": "llama-math-1b-r32-test", "developer": "noname0202", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5819, "hfopenllm_v2/BBH": 0.3486, "hfopenllm_v2/MATH Level 5": 0.0725, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3156, "hfopenllm_v2/MMLU-PRO": 0.1781 } }, { "id": "noname0202/llama-math-1b-r8-512tokens-test", "name": "llama-math-1b-r8-512tokens-test", "developer": "noname0202", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5792, "hfopenllm_v2/BBH": 0.3496, "hfopenllm_v2/MATH Level 5": 0.0816, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3169, "hfopenllm_v2/MMLU-PRO": 0.1753 } }, { "id": "Norquinal/Alpha", "name": "Alpha", "developer": "Norquinal", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2803, "hfopenllm_v2/BBH": 0.3374, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3631, "hfopenllm_v2/MMLU-PRO": 0.3003 } }, { "id": "Norquinal/Bravo", "name": "Bravo", "developer": "Norquinal", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3025, "hfopenllm_v2/BBH": 0.3558, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.3869, "hfopenllm_v2/MMLU-PRO": 0.3127 } }, { "id": "Norquinal/Charlie", "name": "Charlie", "developer": "Norquinal", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3061, "hfopenllm_v2/BBH": 0.3515, "hfopenllm_v2/MATH Level 5": 0.0582, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3737, "hfopenllm_v2/MMLU-PRO": 0.3093 } }, { "id": "Norquinal/Delta", "name": "Delta", "developer": "Norquinal", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2538, "hfopenllm_v2/BBH": 0.3435, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3777, "hfopenllm_v2/MMLU-PRO": 0.2959 } }, { "id": "Norquinal/Echo", "name": "Echo", "developer": "Norquinal", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3158, "hfopenllm_v2/BBH": 0.353, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3804, "hfopenllm_v2/MMLU-PRO": 0.3095 } }, { "id": "Norquinal/Foxtrot", "name": "Foxtrot", "developer": "Norquinal", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3012, "hfopenllm_v2/BBH": 0.3558, "hfopenllm_v2/MATH Level 5": 0.0582, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.3804, "hfopenllm_v2/MMLU-PRO": 0.305 } }, { "id": "Norquinal/Golf", "name": "Golf", "developer": "Norquinal", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3534, "hfopenllm_v2/BBH": 0.3533, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.338, "hfopenllm_v2/MMLU-PRO": 0.3056 } }, { "id": "Norquinal/Hotel", "name": "Hotel", "developer": "Norquinal", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3215, "hfopenllm_v2/BBH": 0.3679, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3288, "hfopenllm_v2/MMLU-PRO": 0.3157 } }, { "id": "NotASI/FineTome-Llama3.2-1B-0929", "name": "FineTome-Llama3.2-1B-0929", "developer": "NotASI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3991, "hfopenllm_v2/BBH": 0.3246, "hfopenllm_v2/MATH Level 5": 0.0363, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3488, "hfopenllm_v2/MMLU-PRO": 0.1429 } }, { "id": "NotASI/FineTome-Llama3.2-3B-1002", "name": "FineTome-Llama3.2-3B-1002", "developer": "NotASI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5474, "hfopenllm_v2/BBH": 0.4319, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.3685, "hfopenllm_v2/MMLU-PRO": 0.2437 } }, { "id": "NotASI/FineTome-v1.5-Llama3.2-1B-1007", "name": "FineTome-v1.5-Llama3.2-1B-1007", "developer": "NotASI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3924, "hfopenllm_v2/BBH": 0.3241, "hfopenllm_v2/MATH Level 5": 0.0317, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3475, "hfopenllm_v2/MMLU-PRO": 0.1427 } }, { "id": "NotASI/FineTome-v1.5-Llama3.2-3B-1007", "name": "FineTome-v1.5-Llama3.2-3B-1007", "developer": "NotASI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5508, "hfopenllm_v2/BBH": 0.4312, "hfopenllm_v2/MATH Level 5": 0.0642, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3645, "hfopenllm_v2/MMLU-PRO": 0.2448 } }, { "id": "notbdq/Qwen2.5-14B-Instruct-1M-GRPO-Reasoning", "name": "Qwen2.5-14B-Instruct-1M-GRPO-Reasoning", "developer": "notbdq", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8414, "hfopenllm_v2/BBH": 0.6198, "hfopenllm_v2/MATH Level 5": 0.5302, "hfopenllm_v2/GPQA": 0.3431, "hfopenllm_v2/MUSR": 0.418, "hfopenllm_v2/MMLU-PRO": 0.485 } }, { "id": "nothingiisreal/L3.1-8B-Celeste-V1.5", "name": "L3.1-8B-Celeste-V1.5", "developer": "nothingiisreal", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7327, "hfopenllm_v2/BBH": 0.5012, "hfopenllm_v2/MATH Level 5": 0.1465, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.3749, "hfopenllm_v2/MMLU-PRO": 0.3704 } }, { "id": "nothingiisreal/MN-12B-Starcannon-v2", "name": "MN-12B-Starcannon-v2", "developer": "nothingiisreal", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3925, "hfopenllm_v2/BBH": 0.5004, "hfopenllm_v2/MATH Level 5": 0.0597, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.3978, "hfopenllm_v2/MMLU-PRO": 0.3128 } }, { "id": "nothingiisreal/MN-12B-Starcannon-v3", "name": "MN-12B-Starcannon-v3", "developer": "nothingiisreal", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3807, "hfopenllm_v2/BBH": 0.5171, "hfopenllm_v2/MATH Level 5": 0.0778, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.4046, "hfopenllm_v2/MMLU-PRO": 0.3265 } }, { "id": "NousResearch/DeepHermes-3-Mistral-24B-Preview", "name": "DeepHermes-3-Mistral-24B-Preview", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4536, "hfopenllm_v2/BBH": 0.6488, "hfopenllm_v2/MATH Level 5": 0.2576, "hfopenllm_v2/GPQA": 0.37, "hfopenllm_v2/MUSR": 0.4503, "hfopenllm_v2/MMLU-PRO": 0.459 } }, { "id": "NousResearch/Hermes-2-Pro-Llama-3-8B", "name": "Hermes-2-Pro-Llama-3-8B", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5362, "hfopenllm_v2/BBH": 0.5071, "hfopenllm_v2/MATH Level 5": 0.0838, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4262, "hfopenllm_v2/MMLU-PRO": 0.3052 } }, { "id": "NousResearch/Hermes-2-Pro-Mistral-7B", "name": "Hermes-2-Pro-Mistral-7B", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5668, "hfopenllm_v2/BBH": 0.4995, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.4376, "hfopenllm_v2/MMLU-PRO": 0.2946 } }, { "id": "NousResearch/Hermes-2-Theta-Llama-3-8B", "name": "Hermes-2-Theta-Llama-3-8B", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6518, "hfopenllm_v2/BBH": 0.5207, "hfopenllm_v2/MATH Level 5": 0.0967, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.3949, "hfopenllm_v2/MMLU-PRO": 0.3369 } }, { "id": "NousResearch/Hermes-3-Llama-3.1-70B", "name": "NousResearch/Hermes-3-Llama-3.1-70B", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7661, "hfopenllm_v2/BBH": 0.6756, "hfopenllm_v2/MATH Level 5": 0.21, "hfopenllm_v2/GPQA": 0.3616, "hfopenllm_v2/MUSR": 0.4949, "hfopenllm_v2/MMLU-PRO": 0.4727, "reward-bench/Score": 0.7847, "reward-bench/Chat": 0.9623, "reward-bench/Chat Hard": 0.5669, "reward-bench/Safety": 0.823, "reward-bench/Reasoning": 0.7867 } }, { "id": "NousResearch/Hermes-3-Llama-3.1-8B", "name": "Hermes-3-Llama-3.1-8B", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.617, "hfopenllm_v2/BBH": 0.5177, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4369, "hfopenllm_v2/MMLU-PRO": 0.3139 } }, { "id": "NousResearch/Hermes-3-Llama-3.2-3B", "name": "Hermes-3-Llama-3.2-3B", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3825, "hfopenllm_v2/BBH": 0.4352, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.403, "hfopenllm_v2/MMLU-PRO": 0.2544 } }, { "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO", "name": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5763, "hfopenllm_v2/BBH": 0.4853, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4, "hfopenllm_v2/MMLU-PRO": 0.3015, "reward-bench/Score": 0.7481, "reward-bench/Chat": 0.9218, "reward-bench/Chat Hard": 0.6053, "reward-bench/Safety": 0.8243, "reward-bench/Reasoning": 0.7375, "reward-bench/Prior Sets (0.5 weight)": 0.555 } }, { "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", "name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5897, "hfopenllm_v2/BBH": 0.5539, "hfopenllm_v2/MATH Level 5": 0.1224, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4595, "hfopenllm_v2/MMLU-PRO": 0.3666, "reward-bench/Score": 0.7138, "reward-bench/Chat": 0.9162, "reward-bench/Chat Hard": 0.6053, "reward-bench/Safety": 0.8149, "reward-bench/Reasoning": 0.6126, "reward-bench/Prior Sets (0.5 weight)": 0.5266 } }, { "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", "name": "Nous-Hermes-2-Mixtral-8x7B-SFT", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5731, "hfopenllm_v2/BBH": 0.5058, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4214, "hfopenllm_v2/MMLU-PRO": 0.3066 } }, { "id": "NousResearch/Nous-Hermes-2-SOLAR-10.7B", "name": "Nous-Hermes-2-SOLAR-10.7B", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5279, "hfopenllm_v2/BBH": 0.5414, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4373, "hfopenllm_v2/MMLU-PRO": 0.3458 } }, { "id": "NousResearch/Nous-Hermes-llama-2-7b", "name": "Nous-Hermes-llama-2-7b", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1729, "hfopenllm_v2/BBH": 0.3824, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.4257, "hfopenllm_v2/MMLU-PRO": 0.194 } }, { "id": "NousResearch/Yarn-Llama-2-13b-128k", "name": "Yarn-Llama-2-13b-128k", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1655, "hfopenllm_v2/BBH": 0.3827, "hfopenllm_v2/MATH Level 5": 0.0174, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3458, "hfopenllm_v2/MMLU-PRO": 0.232 } }, { "id": "NousResearch/Yarn-Llama-2-7b-128k", "name": "Yarn-Llama-2-7b-128k", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1485, "hfopenllm_v2/BBH": 0.3248, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3967, "hfopenllm_v2/MMLU-PRO": 0.1791 } }, { "id": "NousResearch/Yarn-Llama-2-7b-64k", "name": "Yarn-Llama-2-7b-64k", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.17, "hfopenllm_v2/BBH": 0.3326, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3939, "hfopenllm_v2/MMLU-PRO": 0.1799 } }, { "id": "NousResearch/Yarn-Mistral-7b-128k", "name": "Yarn-Mistral-7b-128k", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1934, "hfopenllm_v2/BBH": 0.4314, "hfopenllm_v2/MATH Level 5": 0.0317, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4071, "hfopenllm_v2/MMLU-PRO": 0.2893 } }, { "id": "NousResearch/Yarn-Mistral-7b-64k", "name": "Yarn-Mistral-7b-64k", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.208, "hfopenllm_v2/BBH": 0.4293, "hfopenllm_v2/MATH Level 5": 0.037, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4124, "hfopenllm_v2/MMLU-PRO": 0.2914 } }, { "id": "NousResearch/Yarn-Solar-10b-32k", "name": "Yarn-Solar-10b-32k", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1942, "hfopenllm_v2/BBH": 0.4987, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4146, "hfopenllm_v2/MMLU-PRO": 0.3272 } }, { "id": "NousResearch/Yarn-Solar-10b-64k", "name": "Yarn-Solar-10b-64k", "developer": "NousResearch", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1989, "hfopenllm_v2/BBH": 0.4922, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4014, "hfopenllm_v2/MMLU-PRO": 0.3148 } }, { "id": "Novaciano/ASTAROTH-3.2-1B", "name": "ASTAROTH-3.2-1B", "developer": "Novaciano", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5613, "hfopenllm_v2/BBH": 0.3543, "hfopenllm_v2/MATH Level 5": 0.0733, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3142, "hfopenllm_v2/MMLU-PRO": 0.1909 } }, { "id": "Novaciano/BLAST_PROCESSING-3.2-1B", "name": "BLAST_PROCESSING-3.2-1B", "developer": "Novaciano", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3922, "hfopenllm_v2/BBH": 0.346, "hfopenllm_v2/MATH Level 5": 0.0748, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3351, "hfopenllm_v2/MMLU-PRO": 0.1941 } }, { "id": "Novaciano/Cerberus-3.2-1B", "name": "Cerberus-3.2-1B", "developer": "Novaciano", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5017, "hfopenllm_v2/BBH": 0.4165, "hfopenllm_v2/MATH Level 5": 0.0582, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3289, "hfopenllm_v2/MMLU-PRO": 0.1663 } }, { "id": "Novaciano/Cultist-3.2-1B", "name": "Cultist-3.2-1B", "developer": "Novaciano", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5295, "hfopenllm_v2/BBH": 0.3399, "hfopenllm_v2/MATH Level 5": 0.0589, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.333, "hfopenllm_v2/MMLU-PRO": 0.1714 } }, { "id": "Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP", "name": "FuseChat-3.2-1B-GRPO_Creative_RP", "developer": "Novaciano", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5598, "hfopenllm_v2/BBH": 0.3488, "hfopenllm_v2/MATH Level 5": 0.0801, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3329, "hfopenllm_v2/MMLU-PRO": 0.1735 } }, { "id": "Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative", "name": "Fusetrix-3.2-1B-GRPO_RP_Creative", "developer": "Novaciano", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5366, "hfopenllm_v2/BBH": 0.3435, "hfopenllm_v2/MATH Level 5": 0.1148, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3209, "hfopenllm_v2/MMLU-PRO": 0.1758 } }, { "id": "Novaciano/Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP", "name": "Fusetrix-Dolphin-3.2-1B-GRPO_Creative_RP", "developer": "Novaciano", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5343, "hfopenllm_v2/BBH": 0.3502, "hfopenllm_v2/MATH Level 5": 0.105, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3183, "hfopenllm_v2/MMLU-PRO": 0.1823 } }, { "id": "Novaciano/HarmfulProject-3.2-1B", "name": "HarmfulProject-3.2-1B", "developer": "Novaciano", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3874, "hfopenllm_v2/BBH": 0.3274, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3419, "hfopenllm_v2/MMLU-PRO": 0.1823 } }, { "id": "Novaciano/La_Mejor_Mezcla-3.2-1B", "name": "La_Mejor_Mezcla-3.2-1B", "developer": "Novaciano", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.551, "hfopenllm_v2/BBH": 0.3488, "hfopenllm_v2/MATH Level 5": 0.0899, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3196, "hfopenllm_v2/MMLU-PRO": 0.1829 } }, { "id": "Novaciano/LEWD-Mental-Cultist-3.2-1B", "name": "LEWD-Mental-Cultist-3.2-1B", "developer": "Novaciano", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5309, "hfopenllm_v2/BBH": 0.3513, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3223, "hfopenllm_v2/MMLU-PRO": 0.1769 } }, { "id": "Novaciano/Sigil-Of-Satan-3.2-1B", "name": "Sigil-Of-Satan-3.2-1B", "developer": "Novaciano", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5494, "hfopenllm_v2/BBH": 0.3546, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3276, "hfopenllm_v2/MMLU-PRO": 0.1855 } }, { "id": "NTQAI/Nxcode-CQ-7B-orpo", "name": "Nxcode-CQ-7B-orpo", "developer": "NTQAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4007, "hfopenllm_v2/BBH": 0.4143, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.394, "hfopenllm_v2/MMLU-PRO": 0.1612 } }, { "id": "NTQAI/NxMobileLM-1.5B-SFT", "name": "NxMobileLM-1.5B-SFT", "developer": "NTQAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6392, "hfopenllm_v2/BBH": 0.3957, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3555, "hfopenllm_v2/MMLU-PRO": 0.2817 } }, { "id": "NucleusAI/nucleus-22B-token-500B", "name": "nucleus-22B-token-500B", "developer": "NucleusAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0257, "hfopenllm_v2/BBH": 0.292, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3511, "hfopenllm_v2/MMLU-PRO": 0.1162 } }, { "id": "nvidia/AceInstruct-1.5B", "name": "AceInstruct-1.5B", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3948, "hfopenllm_v2/BBH": 0.3932, "hfopenllm_v2/MATH Level 5": 0.3127, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.346, "hfopenllm_v2/MMLU-PRO": 0.2574 } }, { "id": "nvidia/AceInstruct-72B", "name": "AceInstruct-72B", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7119, "hfopenllm_v2/BBH": 0.6139, "hfopenllm_v2/MATH Level 5": 0.6261, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4206, "hfopenllm_v2/MMLU-PRO": 0.4874 } }, { "id": "nvidia/AceInstruct-7B", "name": "AceInstruct-7B", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5422, "hfopenllm_v2/BBH": 0.5501, "hfopenllm_v2/MATH Level 5": 0.5295, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4255, "hfopenllm_v2/MMLU-PRO": 0.4177 } }, { "id": "nvidia/AceMath-1.5B-Instruct", "name": "AceMath-1.5B-Instruct", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3212, "hfopenllm_v2/BBH": 0.4024, "hfopenllm_v2/MATH Level 5": 0.5287, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3607, "hfopenllm_v2/MMLU-PRO": 0.2064 } }, { "id": "nvidia/AceMath-72B-Instruct", "name": "AceMath-72B-Instruct", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.495, "hfopenllm_v2/BBH": 0.6402, "hfopenllm_v2/MATH Level 5": 0.7145, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.4062, "hfopenllm_v2/MMLU-PRO": 0.4411 } }, { "id": "nvidia/AceMath-72B-RM", "name": "AceMath-72B-RM", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1413, "hfopenllm_v2/BBH": 0.2717, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2341, "hfopenllm_v2/MUSR": 0.3351, "hfopenllm_v2/MMLU-PRO": 0.1179 } }, { "id": "nvidia/AceMath-7B-Instruct", "name": "AceMath-7B-Instruct", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4532, "hfopenllm_v2/BBH": 0.4994, "hfopenllm_v2/MATH Level 5": 0.6337, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4193, "hfopenllm_v2/MMLU-PRO": 0.3383 } }, { "id": "nvidia/AceMath-7B-RM", "name": "AceMath-7B-RM", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1494, "hfopenllm_v2/BBH": 0.2423, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.358, "hfopenllm_v2/MMLU-PRO": 0.1139 } }, { "id": "nvidia/Hymba-1.5B-Base", "name": "Hymba-1.5B-Base", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2295, "hfopenllm_v2/BBH": 0.3256, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3566, "hfopenllm_v2/MMLU-PRO": 0.1922 } }, { "id": "nvidia/Hymba-1.5B-Instruct", "name": "Hymba-1.5B-Instruct", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6009, "hfopenllm_v2/BBH": 0.3067, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.3316, "hfopenllm_v2/MMLU-PRO": 0.204 } }, { "id": "nvidia/llama-3-1-nemotron-ultra-253b-v1-fc", "name": "Llama-3.1-Nemotron-Ultra-253B-v1 (FC)", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 108.0, "bfcl/bfcl.overall.overall_accuracy": 10.0, "bfcl/bfcl.overall.total_cost_usd": 0.72, "bfcl/bfcl.overall.latency_mean_s": 1.42, "bfcl/bfcl.overall.latency_std_s": 1.84, "bfcl/bfcl.overall.latency_p95_s": 2.4, "bfcl/bfcl.non_live.ast_accuracy": 0.0, "bfcl/bfcl.non_live.simple_ast_accuracy": 0.0, "bfcl/bfcl.non_live.multiple_ast_accuracy": 0.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 0.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 0.0, "bfcl/bfcl.live.live_accuracy": 0.0, "bfcl/bfcl.live.live_simple_ast_accuracy": 0.0, "bfcl/bfcl.live.live_multiple_ast_accuracy": 0.0, "bfcl/bfcl.live.live_parallel_ast_accuracy": 0.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 0.0, "bfcl/bfcl.multi_turn.accuracy": 0.0, "bfcl/bfcl.multi_turn.base_accuracy": 0.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 0.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 0.0, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 0.0, "bfcl/bfcl.memory.kv_accuracy": 0.0, "bfcl/bfcl.memory.vector_accuracy": 0.0, "bfcl/bfcl.memory.recursive_summarization_accuracy": 0.0, "bfcl/bfcl.relevance.relevance_detection_accuracy": 0.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 100.0 } }, { "id": "nvidia/Llama-3.1-Minitron-4B-Depth-Base", "name": "Llama-3.1-Minitron-4B-Depth-Base", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1607, "hfopenllm_v2/BBH": 0.4171, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.4011, "hfopenllm_v2/MMLU-PRO": 0.2798 } }, { "id": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", "name": "Llama-3.1-Nemotron-70B-Instruct-HF", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7381, "hfopenllm_v2/BBH": 0.6316, "hfopenllm_v2/MATH Level 5": 0.4267, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.4328, "hfopenllm_v2/MMLU-PRO": 0.4919 } }, { "id": "nvidia/Llama-3.1-Nemotron-70B-Reward", "name": "nvidia/Llama-3.1-Nemotron-70B-Reward", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9411, "reward-bench/Chat": 0.9749, "reward-bench/Chat Hard": 0.8575, "reward-bench/Safety": 0.9514, "reward-bench/Reasoning": 0.9807 } }, { "id": "nvidia/Llama3-70B-SteerLM-RM", "name": "nvidia/Llama3-70B-SteerLM-RM", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8877, "reward-bench/Chat": 0.9134, "reward-bench/Chat Hard": 0.8026, "reward-bench/Safety": 0.9284, "reward-bench/Reasoning": 0.9064 } }, { "id": "nvidia/Minitron-4B-Base", "name": "Minitron-4B-Base", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2218, "hfopenllm_v2/BBH": 0.4084, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.4134, "hfopenllm_v2/MMLU-PRO": 0.262 } }, { "id": "nvidia/Minitron-8B-Base", "name": "Minitron-8B-Base", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2424, "hfopenllm_v2/BBH": 0.4395, "hfopenllm_v2/MATH Level 5": 0.0257, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.4026, "hfopenllm_v2/MMLU-PRO": 0.3181 } }, { "id": "nvidia/Mistral-NeMo-Minitron-8B-Base", "name": "Mistral-NeMo-Minitron-8B-Base", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1946, "hfopenllm_v2/BBH": 0.5219, "hfopenllm_v2/MATH Level 5": 0.0483, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.4092, "hfopenllm_v2/MMLU-PRO": 0.3796 } }, { "id": "nvidia/Mistral-NeMo-Minitron-8B-Instruct", "name": "Mistral-NeMo-Minitron-8B-Instruct", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5004, "hfopenllm_v2/BBH": 0.5321, "hfopenllm_v2/MATH Level 5": 0.1163, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3886, "hfopenllm_v2/MMLU-PRO": 0.3991 } }, { "id": "nvidia/Nemotron-4-340B-Reward", "name": "nvidia/Nemotron-4-340B-Reward", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.92, "reward-bench/Chat": 0.9581, "reward-bench/Chat Hard": 0.8706, "reward-bench/Safety": 0.9149, "reward-bench/Reasoning": 0.9363 } }, { "id": "nvidia/Nemotron-Mini-4B-Instruct", "name": "Nemotron-Mini-4B-Instruct", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6669, "hfopenllm_v2/BBH": 0.3865, "hfopenllm_v2/MATH Level 5": 0.0257, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3767, "hfopenllm_v2/MMLU-PRO": 0.2626 } }, { "id": "nvidia/OpenMath2-Llama3.1-8B", "name": "OpenMath2-Llama3.1-8B", "developer": "nvidia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2331, "hfopenllm_v2/BBH": 0.4096, "hfopenllm_v2/MATH Level 5": 0.2674, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3436, "hfopenllm_v2/MMLU-PRO": 0.1553 } }, { "id": "nxmwxm/Beast-Soul-new", "name": "Beast-Soul-new", "developer": "nxmwxm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4869, "hfopenllm_v2/BBH": 0.5227, "hfopenllm_v2/MATH Level 5": 0.074, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4459, "hfopenllm_v2/MMLU-PRO": 0.3102 } }, { "id": "NYTK/PULI-GPTrio", "name": "PULI-GPTrio", "developer": "NYTK", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.218, "hfopenllm_v2/BBH": 0.306, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3819, "hfopenllm_v2/MMLU-PRO": 0.1137 } }, { "id": "NYTK/PULI-LlumiX-32K", "name": "PULI-LlumiX-32K", "developer": "NYTK", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.17, "hfopenllm_v2/BBH": 0.3189, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3964, "hfopenllm_v2/MMLU-PRO": 0.1681 } }, { "id": "NyxKrage/Microsoft_Phi-4", "name": "Microsoft_Phi-4", "developer": "NyxKrage", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0585, "hfopenllm_v2/BBH": 0.6691, "hfopenllm_v2/MATH Level 5": 0.2991, "hfopenllm_v2/GPQA": 0.406, "hfopenllm_v2/MUSR": 0.5034, "hfopenllm_v2/MMLU-PRO": 0.5287 } }, { "id": "occiglot/occiglot-7b-es-en-instruct", "name": "occiglot-7b-es-en-instruct", "developer": "occiglot", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3485, "hfopenllm_v2/BBH": 0.4111, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3738, "hfopenllm_v2/MMLU-PRO": 0.2311 } }, { "id": "odyssey-labs/Astral-1-10B", "name": "Astral-1-10B", "developer": "odyssey-labs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3878, "hfopenllm_v2/BBH": 0.4873, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.428, "hfopenllm_v2/MMLU-PRO": 0.2985 } }, { "id": "OEvortex/Emotional-llama-8B", "name": "Emotional-llama-8B", "developer": "OEvortex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3516, "hfopenllm_v2/BBH": 0.4839, "hfopenllm_v2/MATH Level 5": 0.0816, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3659, "hfopenllm_v2/MMLU-PRO": 0.3535 } }, { "id": "OEvortex/HelpingAI-15B", "name": "HelpingAI-15B", "developer": "OEvortex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.203, "hfopenllm_v2/BBH": 0.2936, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3619, "hfopenllm_v2/MMLU-PRO": 0.1111 } }, { "id": "OEvortex/HelpingAI-3B-reloaded", "name": "HelpingAI-3B-reloaded", "developer": "OEvortex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4647, "hfopenllm_v2/BBH": 0.4129, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3524, "hfopenllm_v2/MMLU-PRO": 0.2595 } }, { "id": "OEvortex/HelpingAI2-9B", "name": "HelpingAI2-9B", "developer": "OEvortex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4413, "hfopenllm_v2/BBH": 0.4845, "hfopenllm_v2/MATH Level 5": 0.0589, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3711, "hfopenllm_v2/MMLU-PRO": 0.29 } }, { "id": "OEvortex/HelpingAI2.5-10B", "name": "HelpingAI2.5-10B", "developer": "OEvortex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3277, "hfopenllm_v2/BBH": 0.4496, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3738, "hfopenllm_v2/MMLU-PRO": 0.2575 } }, { "id": "olabs-ai/reflection_model", "name": "reflection_model", "developer": "olabs-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1599, "hfopenllm_v2/BBH": 0.4713, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.3508, "hfopenllm_v2/MMLU-PRO": 0.3311 } }, { "id": "OliveiraJLT/Sagui-7B-Instruct-v0.1", "name": "Sagui-7B-Instruct-v0.1", "developer": "OliveiraJLT", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2892, "hfopenllm_v2/BBH": 0.3111, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2424, "hfopenllm_v2/MUSR": 0.4191, "hfopenllm_v2/MMLU-PRO": 0.1485 } }, { "id": "Omkar1102/code-yi", "name": "code-yi", "developer": "Omkar1102", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2254, "hfopenllm_v2/BBH": 0.275, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3762, "hfopenllm_v2/MMLU-PRO": 0.1123 } }, { "id": "OmnicromsBrain/NeuralStar_FusionWriter_4x7b", "name": "NeuralStar_FusionWriter_4x7b", "developer": "OmnicromsBrain", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5964, "hfopenllm_v2/BBH": 0.4776, "hfopenllm_v2/MATH Level 5": 0.0491, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.4019, "hfopenllm_v2/MMLU-PRO": 0.2606 } }, { "id": "OnlyCheeini/greesychat-turbo", "name": "greesychat-turbo", "developer": "OnlyCheeini", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0233, "hfopenllm_v2/BBH": 0.3092, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3314, "hfopenllm_v2/MMLU-PRO": 0.1138 } }, { "id": "ontocord/Llama_3.2_1b-autoredteam_helpfulness-train", "name": "Llama_3.2_1b-autoredteam_helpfulness-train", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2765, "hfopenllm_v2/BBH": 0.3115, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3459, "hfopenllm_v2/MMLU-PRO": 0.1132 } }, { "id": "ontocord/merged_0.2_expert_0.8", "name": "merged_0.2_expert_0.8", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1743, "hfopenllm_v2/BBH": 0.3046, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3621, "hfopenllm_v2/MMLU-PRO": 0.1111 } }, { "id": "ontocord/merged_0.2_expert_0.8-stack_2x", "name": "merged_0.2_expert_0.8-stack_2x", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1796, "hfopenllm_v2/BBH": 0.3006, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3541, "hfopenllm_v2/MMLU-PRO": 0.1103 } }, { "id": "ontocord/merged_0.5_expert_0.5", "name": "merged_0.5_expert_0.5", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1787, "hfopenllm_v2/BBH": 0.3017, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3542, "hfopenllm_v2/MMLU-PRO": 0.1108 } }, { "id": "ontocord/ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful", "name": "ontocord_wide_3b-stage1_shuf_sample1_jsonl-pretrained-autoredteam_helpful-0.25_helpful", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1318, "hfopenllm_v2/BBH": 0.3004, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3631, "hfopenllm_v2/MMLU-PRO": 0.1142 } }, { "id": "ontocord/ontocord_wide_7b-stacked-stage1", "name": "ontocord_wide_7b-stacked-stage1", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1485, "hfopenllm_v2/BBH": 0.2897, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3604, "hfopenllm_v2/MMLU-PRO": 0.1105 } }, { "id": "ontocord/ontocord_wide_7b-stacked-stage1-instruct", "name": "ontocord_wide_7b-stacked-stage1-instruct", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.153, "hfopenllm_v2/BBH": 0.2854, "hfopenllm_v2/MATH Level 5": 0.0068, "hfopenllm_v2/GPQA": 0.2466, "hfopenllm_v2/MUSR": 0.3538, "hfopenllm_v2/MMLU-PRO": 0.1117 } }, { "id": "ontocord/RedPajama-3B-v1-AutoRedteam", "name": "RedPajama-3B-v1-AutoRedteam", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1343, "hfopenllm_v2/BBH": 0.3026, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2424, "hfopenllm_v2/MUSR": 0.3661, "hfopenllm_v2/MMLU-PRO": 0.1108 } }, { "id": "ontocord/RedPajama-3B-v1-AutoRedteam-Harmless-only", "name": "RedPajama-3B-v1-AutoRedteam-Harmless-only", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1525, "hfopenllm_v2/BBH": 0.3124, "hfopenllm_v2/MATH Level 5": 0.006, "hfopenllm_v2/GPQA": 0.2315, "hfopenllm_v2/MUSR": 0.3661, "hfopenllm_v2/MMLU-PRO": 0.11 } }, { "id": "ontocord/RedPajama3b_v1-autoredteam_helpfulness-train", "name": "RedPajama3b_v1-autoredteam_helpfulness-train", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2848, "hfopenllm_v2/BBH": 0.3093, "hfopenllm_v2/MATH Level 5": 0.0068, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.358, "hfopenllm_v2/MMLU-PRO": 0.1107 } }, { "id": "ontocord/starcoder2-29b-ls", "name": "starcoder2-29b-ls", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2149, "hfopenllm_v2/BBH": 0.3735, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.37, "hfopenllm_v2/MMLU-PRO": 0.1869 } }, { "id": "ontocord/starcoder2_3b-AutoRedteam", "name": "starcoder2_3b-AutoRedteam", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1574, "hfopenllm_v2/BBH": 0.3498, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3646, "hfopenllm_v2/MMLU-PRO": 0.1336 } }, { "id": "ontocord/wide_3b-merge_test", "name": "wide_3b-merge_test", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1763, "hfopenllm_v2/BBH": 0.3011, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2399, "hfopenllm_v2/MUSR": 0.342, "hfopenllm_v2/MMLU-PRO": 0.1066 } }, { "id": "ontocord/wide_3b-stage1_shuf_sample1_jsonl-pretrained", "name": "wide_3b-stage1_shuf_sample1_jsonl-pretrained", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1395, "hfopenllm_v2/BBH": 0.3004, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3632, "hfopenllm_v2/MMLU-PRO": 0.114 } }, { "id": "ontocord/wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge", "name": "wide_3b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1664, "hfopenllm_v2/BBH": 0.3031, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3845, "hfopenllm_v2/MMLU-PRO": 0.1111 } }, { "id": "ontocord/wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge", "name": "wide_3b_sft_stag1.2-lyrical_news_software_howto_formattedtext-merge", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1697, "hfopenllm_v2/BBH": 0.2975, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3778, "hfopenllm_v2/MMLU-PRO": 0.1125 } }, { "id": "ontocord/wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue", "name": "wide_3b_sft_stage1.1-ss1-no_redteam_skg_poem.no_issue", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.148, "hfopenllm_v2/BBH": 0.3095, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3579, "hfopenllm_v2/MMLU-PRO": 0.1108 } }, { "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue", "name": "wide_3b_sft_stage1.1-ss1-with_generics_intr.no_issue", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1237, "hfopenllm_v2/BBH": 0.306, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3673, "hfopenllm_v2/MMLU-PRO": 0.1111 } }, { "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue", "name": "wide_3b_sft_stage1.1-ss1-with_generics_intr_math.no_issue", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1192, "hfopenllm_v2/BBH": 0.2956, "hfopenllm_v2/MATH Level 5": 0.0068, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3553, "hfopenllm_v2/MMLU-PRO": 0.1183 } }, { "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", "name": "wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories.no_issue", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1128, "hfopenllm_v2/BBH": 0.3171, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.346, "hfopenllm_v2/MMLU-PRO": 0.1129 } }, { "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue", "name": "wide_3b_sft_stage1.1-ss1-with_generics_intr_math_stories_no_orig_instr.no_issue", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1317, "hfopenllm_v2/BBH": 0.3064, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3446, "hfopenllm_v2/MMLU-PRO": 0.1144 } }, { "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue", "name": "wide_3b_sft_stage1.1-ss1-with_generics_intr_stories.no_issue", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1182, "hfopenllm_v2/BBH": 0.3037, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3567, "hfopenllm_v2/MMLU-PRO": 0.1162 } }, { "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue", "name": "wide_3b_sft_stage1.1-ss1-with_generics_math.no_issue", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.124, "hfopenllm_v2/BBH": 0.3032, "hfopenllm_v2/MATH Level 5": 0.0076, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3487, "hfopenllm_v2/MMLU-PRO": 0.1128 } }, { "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_math.no_issue", "name": "wide_3b_sft_stage1.1-ss1-with_math.no_issue", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1298, "hfopenllm_v2/BBH": 0.3052, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3928, "hfopenllm_v2/MMLU-PRO": 0.1147 } }, { "id": "ontocord/wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue", "name": "wide_3b_sft_stage1.1-ss1-with_r1_generics_intr_math_stories.no_issue", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2049, "hfopenllm_v2/BBH": 0.2912, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3575, "hfopenllm_v2/MMLU-PRO": 0.1167 } }, { "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical", "name": "wide_3b_sft_stage1.2-ss1-expert_fictional_lyrical", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1461, "hfopenllm_v2/BBH": 0.2998, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3926, "hfopenllm_v2/MMLU-PRO": 0.1141 } }, { "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_formatted_text", "name": "wide_3b_sft_stage1.2-ss1-expert_formatted_text", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1487, "hfopenllm_v2/BBH": 0.3069, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3474, "hfopenllm_v2/MMLU-PRO": 0.1146 } }, { "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_how-to", "name": "wide_3b_sft_stage1.2-ss1-expert_how-to", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1245, "hfopenllm_v2/BBH": 0.3047, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3658, "hfopenllm_v2/MMLU-PRO": 0.1153 } }, { "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_math", "name": "wide_3b_sft_stage1.2-ss1-expert_math", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1915, "hfopenllm_v2/BBH": 0.306, "hfopenllm_v2/MATH Level 5": 0.0279, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.37, "hfopenllm_v2/MMLU-PRO": 0.1092 } }, { "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_news", "name": "wide_3b_sft_stage1.2-ss1-expert_news", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1658, "hfopenllm_v2/BBH": 0.2926, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3621, "hfopenllm_v2/MMLU-PRO": 0.1111 } }, { "id": "ontocord/wide_3b_sft_stage1.2-ss1-expert_software", "name": "wide_3b_sft_stage1.2-ss1-expert_software", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1734, "hfopenllm_v2/BBH": 0.298, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3569, "hfopenllm_v2/MMLU-PRO": 0.114 } }, { "id": "ontocord/wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked", "name": "wide_6.6b_sft_stag1.2-lyrical_law_news_software_howto_formattedtext_math_wiki-merge-stacked", "developer": "ontocord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1244, "hfopenllm_v2/BBH": 0.3026, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3686, "hfopenllm_v2/MMLU-PRO": 0.1115 } }, { "id": "oobabooga/CodeBooga-34B-v0.1", "name": "CodeBooga-34B-v0.1", "developer": "oobabooga", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.525, "hfopenllm_v2/BBH": 0.3427, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.431, "hfopenllm_v2/MMLU-PRO": 0.236 } }, { "id": "oopere/Llama-FinSent-S", "name": "Llama-FinSent-S", "developer": "oopere", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2164, "hfopenllm_v2/BBH": 0.3169, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3832, "hfopenllm_v2/MMLU-PRO": 0.1134 } }, { "id": "oopere/pruned10-llama-3.2-3B", "name": "pruned10-llama-3.2-3B", "developer": "oopere", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1776, "hfopenllm_v2/BBH": 0.334, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3722, "hfopenllm_v2/MMLU-PRO": 0.164 } }, { "id": "oopere/pruned20-llama-1b", "name": "pruned20-llama-1b", "developer": "oopere", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1994, "hfopenllm_v2/BBH": 0.3031, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3631, "hfopenllm_v2/MMLU-PRO": 0.1123 } }, { "id": "oopere/pruned20-llama-3.2-3b", "name": "pruned20-llama-3.2-3b", "developer": "oopere", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1789, "hfopenllm_v2/BBH": 0.3248, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3418, "hfopenllm_v2/MMLU-PRO": 0.128 } }, { "id": "oopere/pruned40-llama-1b", "name": "pruned40-llama-1b", "developer": "oopere", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2284, "hfopenllm_v2/BBH": 0.2969, "hfopenllm_v2/MATH Level 5": 0.0076, "hfopenllm_v2/GPQA": 0.2433, "hfopenllm_v2/MUSR": 0.4287, "hfopenllm_v2/MMLU-PRO": 0.1082 } }, { "id": "oopere/pruned40-llama-3.2-1B", "name": "pruned40-llama-3.2-1B", "developer": "oopere", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2266, "hfopenllm_v2/BBH": 0.2982, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.4352, "hfopenllm_v2/MMLU-PRO": 0.1115 } }, { "id": "oopere/pruned40-llama-3.2-3b", "name": "pruned40-llama-3.2-3b", "developer": "oopere", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2183, "hfopenllm_v2/BBH": 0.3167, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2299, "hfopenllm_v2/MUSR": 0.3539, "hfopenllm_v2/MMLU-PRO": 0.1177 } }, { "id": "oopere/pruned60-llama-1b", "name": "pruned60-llama-1b", "developer": "oopere", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1829, "hfopenllm_v2/BBH": 0.3016, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.4088, "hfopenllm_v2/MMLU-PRO": 0.1173 } }, { "id": "oopere/pruned60-llama-3.2-3b", "name": "pruned60-llama-3.2-3b", "developer": "oopere", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1825, "hfopenllm_v2/BBH": 0.3166, "hfopenllm_v2/MATH Level 5": 0.0038, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3633, "hfopenllm_v2/MMLU-PRO": 0.1131 } }, { "id": "open-atlas/Atlas-Flash-1.5B-Preview", "name": "Atlas-Flash-1.5B-Preview", "developer": "open-atlas", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.327, "hfopenllm_v2/BBH": 0.3215, "hfopenllm_v2/MATH Level 5": 0.2213, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3488, "hfopenllm_v2/MMLU-PRO": 0.1374 } }, { "id": "open-atlas/Atlas-Flash-7B-Preview", "name": "Atlas-Flash-7B-Preview", "developer": "open-atlas", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3908, "hfopenllm_v2/BBH": 0.3542, "hfopenllm_v2/MATH Level 5": 0.2576, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.3836, "hfopenllm_v2/MMLU-PRO": 0.2784 } }, { "id": "open-neo/Kyro-n1-3B", "name": "Kyro-n1-3B", "developer": "open-neo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4595, "hfopenllm_v2/BBH": 0.4685, "hfopenllm_v2/MATH Level 5": 0.2855, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4088, "hfopenllm_v2/MMLU-PRO": 0.3423 } }, { "id": "open-neo/Kyro-n1-7B", "name": "Kyro-n1-7B", "developer": "open-neo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5573, "hfopenllm_v2/BBH": 0.5387, "hfopenllm_v2/MATH Level 5": 0.3897, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3884, "hfopenllm_v2/MMLU-PRO": 0.4333 } }, { "id": "Open-Orca/Mistral-7B-OpenOrca", "name": "Mistral-7B-OpenOrca", "developer": "Open-Orca", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4978, "hfopenllm_v2/BBH": 0.4768, "hfopenllm_v2/MATH Level 5": 0.0355, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3858, "hfopenllm_v2/MMLU-PRO": 0.2653 } }, { "id": "open-thoughts/OpenThinker-7B", "name": "OpenThinker-7B", "developer": "open-thoughts", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4089, "hfopenllm_v2/BBH": 0.5343, "hfopenllm_v2/MATH Level 5": 0.426, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.382, "hfopenllm_v2/MMLU-PRO": 0.4165 } }, { "id": "openai-community/gpt2", "name": "gpt2", "developer": "openai-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.178, "hfopenllm_v2/BBH": 0.3017, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.439, "hfopenllm_v2/MMLU-PRO": 0.1165 } }, { "id": "openai-community/gpt2-large", "name": "gpt2-large", "developer": "openai-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2048, "hfopenllm_v2/BBH": 0.3069, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3789, "hfopenllm_v2/MMLU-PRO": 0.1142 } }, { "id": "openai-community/gpt2-medium", "name": "gpt2-medium", "developer": "openai-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2208, "hfopenllm_v2/BBH": 0.305, "hfopenllm_v2/MATH Level 5": 0.0076, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3884, "hfopenllm_v2/MMLU-PRO": 0.1182 } }, { "id": "openai-community/gpt2-xl", "name": "gpt2-xl", "developer": "openai-community", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2039, "hfopenllm_v2/BBH": 0.3009, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.371, "hfopenllm_v2/MMLU-PRO": 0.1131 } }, { "id": "openai/ada-350M", "name": "ada 350M", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.108, "helm_classic/MMLU": 0.243, "helm_classic/BoolQ": 0.581, "helm_classic/NarrativeQA": 0.326, "helm_classic/NaturalQuestions (open-book)": 0.365, "helm_classic/QuAC": 0.242, "helm_classic/HellaSwag": 0.435, "helm_classic/OpenbookQA": 0.38, "helm_classic/TruthfulQA": 0.215, "helm_classic/MS MARCO (TREC)": 0.29, "helm_classic/CNN/DailyMail": 0.09, "helm_classic/XSUM": 0.022, "helm_classic/IMDB": 0.849, "helm_classic/CivilComments": 0.517, "helm_classic/RAFT": 0.423 } }, { "id": "openai/babbage-1.3B", "name": "babbage 1.3B", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.114, "helm_classic/MMLU": 0.235, "helm_classic/BoolQ": 0.574, "helm_classic/NarrativeQA": 0.491, "helm_classic/NaturalQuestions (open-book)": 0.451, "helm_classic/QuAC": 0.273, "helm_classic/HellaSwag": 0.555, "helm_classic/OpenbookQA": 0.438, "helm_classic/TruthfulQA": 0.188, "helm_classic/MS MARCO (TREC)": 0.317, "helm_classic/CNN/DailyMail": 0.079, "helm_classic/XSUM": 0.045, "helm_classic/IMDB": 0.597, "helm_classic/CivilComments": 0.519, "helm_classic/RAFT": 0.455 } }, { "id": "openai/curie-6.7B", "name": "curie 6.7B", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.247, "helm_classic/MMLU": 0.243, "helm_classic/BoolQ": 0.656, "helm_classic/NarrativeQA": 0.604, "helm_classic/NaturalQuestions (open-book)": 0.552, "helm_classic/QuAC": 0.321, "helm_classic/HellaSwag": 0.682, "helm_classic/OpenbookQA": 0.502, "helm_classic/TruthfulQA": 0.232, "helm_classic/MS MARCO (TREC)": 0.3, "helm_classic/CNN/DailyMail": 0.113, "helm_classic/XSUM": 0.091, "helm_classic/IMDB": 0.889, "helm_classic/CivilComments": 0.539, "helm_classic/RAFT": 0.49 } }, { "id": "openai/davinci-175B", "name": "davinci 175B", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.538, "helm_classic/MMLU": 0.422, "helm_classic/BoolQ": 0.722, "helm_classic/NarrativeQA": 0.687, "helm_classic/NaturalQuestions (open-book)": 0.625, "helm_classic/QuAC": 0.36, "helm_classic/HellaSwag": 0.775, "helm_classic/OpenbookQA": 0.586, "helm_classic/TruthfulQA": 0.194, "helm_classic/MS MARCO (TREC)": 0.378, "helm_classic/CNN/DailyMail": 0.127, "helm_classic/XSUM": 0.126, "helm_classic/IMDB": 0.933, "helm_classic/CivilComments": 0.532, "helm_classic/RAFT": 0.642 } }, { "id": "openai/GPT 4o", "name": "GPT 4o", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "apex-v1/Overall Score": 0.359 } }, { "id": "openai/GPT 5", "name": "GPT 5", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Overall Pass@1": 0.183, "apex-agents/Overall Pass@8": 0.31, "apex-agents/Overall Mean Score": 0.329, "apex-agents/Investment Banking Pass@1": 0.273, "apex-agents/Management Consulting Pass@1": 0.123, "apex-agents/Corporate Law Pass@1": 0.153, "apex-agents/Corporate Lawyer Mean Score": 0.382, "ace/Overall Score": 0.561, "ace/DIY Score": 0.55, "ace/Food Score": 0.7, "ace/Gaming Score": 0.575, "apex-v1/Overall Score": 0.67, "apex-v1/Big Law Score": 0.78, "apex-v1/Medicine (MD) Score": 0.66, "apex-v1/Investment Banking Score": 0.61 } }, { "id": "openai/GPT 5 Codex", "name": "GPT 5 Codex", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Corporate Lawyer Mean Score": 0.362 } }, { "id": "openai/GPT 5.1", "name": "GPT 5.1", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Corporate Lawyer Mean Score": 0.376, "ace/Overall Score": 0.551, "ace/DIY Score": 0.56, "ace/Gaming Score": 0.61, "ace/Shopping Score": 0.45, "apex-v1/Big Law Score": 0.77 } }, { "id": "openai/GPT 5.1 Codex", "name": "GPT 5.1 Codex", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Corporate Lawyer Mean Score": 0.366 } }, { "id": "openai/GPT 5.2", "name": "GPT 5.2", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "ace/Overall Score": 0.515, "ace/Food Score": 0.65, "ace/Gaming Score": 0.578, "apex-agents/Overall Pass@1": 0.23, "apex-agents/Overall Pass@8": 0.4, "apex-agents/Overall Mean Score": 0.387, "apex-agents/Investment Banking Pass@1": 0.273, "apex-agents/Management Consulting Pass@1": 0.227, "apex-agents/Corporate Law Pass@1": 0.189, "apex-agents/Corporate Lawyer Mean Score": 0.443 } }, { "id": "openai/GPT 5.2 Codex", "name": "GPT 5.2 Codex", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Overall Pass@1": 0.276, "apex-agents/Corporate Lawyer Mean Score": 0.394 } }, { "id": "openai/GPT 5.2 Pro", "name": "GPT 5.2 Pro", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "apex-v1/Overall Score": 0.668, "apex-v1/Consulting Score": 0.64, "apex-v1/Medicine (MD) Score": 0.65, "apex-v1/Investment Banking Score": 0.64 } }, { "id": "openai/GPT 5.3 Codex", "name": "GPT 5.3 Codex", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Overall Pass@1": 0.317 } }, { "id": "openai/GPT OSS 120B", "name": "GPT OSS 120B", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Overall Pass@1": 0.047, "apex-agents/Overall Pass@8": 0.115, "apex-agents/Overall Mean Score": 0.145, "apex-agents/Investment Banking Pass@1": 0.027, "apex-agents/Management Consulting Pass@1": 0.035, "apex-agents/Corporate Law Pass@1": 0.078, "apex-agents/Corporate Lawyer Mean Score": 0.269 } }, { "id": "openai/gpt-3.5-turbo-0125", "name": "GPT-3.5 Turbo 0125", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_mmlu/MMLU All Subjects": 0.673, "helm_mmlu/Abstract Algebra": 0.31, "helm_mmlu/Anatomy": 0.696, "helm_mmlu/College Physics": 0.471, "helm_mmlu/Computer Security": 0.78, "helm_mmlu/Econometrics": 0.474, "helm_mmlu/Global Facts": 0.39, "helm_mmlu/Jurisprudence": 0.806, "helm_mmlu/Philosophy": 0.746, "helm_mmlu/Professional Psychology": 0.722, "helm_mmlu/Us Foreign Policy": 0.89, "helm_mmlu/Astronomy": 0.75, "helm_mmlu/Business Ethics": 0.75, "helm_mmlu/Clinical Knowledge": 0.755, "helm_mmlu/Conceptual Physics": 0.634, "helm_mmlu/Electrical Engineering": 0.669, "helm_mmlu/Elementary Mathematics": 0.534, "helm_mmlu/Formal Logic": 0.444, "helm_mmlu/High School World History": 0.819, "helm_mmlu/Human Sexuality": 0.779, "helm_mmlu/International Law": 0.81, "helm_mmlu/Logical Fallacies": 0.779, "helm_mmlu/Machine Learning": 0.455, "helm_mmlu/Management": 0.835, "helm_mmlu/Marketing": 0.91, "helm_mmlu/Medical Genetics": 0.73, "helm_mmlu/Miscellaneous": 0.89, "helm_mmlu/Moral Scenarios": 0.355, "helm_mmlu/Nutrition": 0.748, "helm_mmlu/Prehistory": 0.735, "helm_mmlu/Public Relations": 0.727, "helm_mmlu/Security Studies": 0.751, "helm_mmlu/Sociology": 0.861, "helm_mmlu/Virology": 0.536, "helm_mmlu/World Religions": 0.842, "helm_mmlu/Mean win rate": 0.493, "reward-bench/Score": 0.6534, "reward-bench/Chat": 0.9218, "reward-bench/Chat Hard": 0.4452, "reward-bench/Safety": 0.6547, "reward-bench/Reasoning": 0.5912, "reward-bench/Prior Sets (0.5 weight)": 0.6548 } }, { "id": "openai/gpt-3.5-turbo-0301", "name": "gpt-3.5-turbo-0301", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.76, "helm_classic/MMLU": 0.59, "helm_classic/BoolQ": 0.74, "helm_classic/NarrativeQA": 0.663, "helm_classic/NaturalQuestions (open-book)": 0.624, "helm_classic/QuAC": 0.512, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.609, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.899, "helm_classic/CivilComments": 0.674, "helm_classic/RAFT": 0.768 } }, { "id": "openai/gpt-3.5-turbo-0613", "name": "GPT-3.5 Turbo 0613", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.783, "helm_classic/MMLU": 0.391, "helm_classic/BoolQ": 0.87, "helm_classic/NarrativeQA": 0.625, "helm_classic/NaturalQuestions (open-book)": 0.675, "helm_classic/QuAC": 0.485, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.339, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.943, "helm_classic/CivilComments": 0.696, "helm_classic/RAFT": 0.748, "helm_instruct/Mean win rate": 0.689, "helm_instruct/Anthropic RLHF dataset": 4.964, "helm_instruct/Best ChatGPT Prompts": 4.986, "helm_instruct/Koala test dataset": 4.987, "helm_instruct/Open Assistant": 4.987, "helm_instruct/Self Instruct": 4.99, "helm_instruct/Vicuna": 4.992, "helm_lite/Mean win rate": 0.358, "helm_lite/NarrativeQA": 0.655, "helm_lite/NaturalQuestions (closed-book)": 0.335, "helm_lite/OpenbookQA": 0.838, "helm_lite/MMLU": 0.614, "helm_lite/MATH": 0.667, "helm_lite/GSM8K": 0.501, "helm_lite/LegalBench": 0.528, "helm_lite/MedQA": 0.622, "helm_lite/WMT 2014": 0.187, "helm_mmlu/MMLU All Subjects": 0.689, "helm_mmlu/Abstract Algebra": 0.38, "helm_mmlu/Anatomy": 0.659, "helm_mmlu/College Physics": 0.461, "helm_mmlu/Computer Security": 0.81, "helm_mmlu/Econometrics": 0.5, "helm_mmlu/Global Facts": 0.37, "helm_mmlu/Jurisprudence": 0.806, "helm_mmlu/Philosophy": 0.759, "helm_mmlu/Professional Psychology": 0.732, "helm_mmlu/Us Foreign Policy": 0.88, "helm_mmlu/Astronomy": 0.763, "helm_mmlu/Business Ethics": 0.75, "helm_mmlu/Clinical Knowledge": 0.777, "helm_mmlu/Conceptual Physics": 0.613, "helm_mmlu/Electrical Engineering": 0.648, "helm_mmlu/Elementary Mathematics": 0.5, "helm_mmlu/Formal Logic": 0.397, "helm_mmlu/High School World History": 0.857, "helm_mmlu/Human Sexuality": 0.786, "helm_mmlu/International Law": 0.843, "helm_mmlu/Logical Fallacies": 0.791, "helm_mmlu/Machine Learning": 0.455, "helm_mmlu/Management": 0.845, "helm_mmlu/Marketing": 0.91, "helm_mmlu/Medical Genetics": 0.8, "helm_mmlu/Miscellaneous": 0.893, "helm_mmlu/Moral Scenarios": 0.404, "helm_mmlu/Nutrition": 0.758, "helm_mmlu/Prehistory": 0.787, "helm_mmlu/Public Relations": 0.745, "helm_mmlu/Security Studies": 0.8, "helm_mmlu/Sociology": 0.871, "helm_mmlu/Virology": 0.542, "helm_mmlu/World Religions": 0.836, "helm_mmlu/Mean win rate": 0.589 } }, { "id": "openai/gpt-4-0125-preview", "name": "openai/gpt-4-0125-preview", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8434, "reward-bench/Chat": 0.9525, "reward-bench/Chat Hard": 0.7434, "reward-bench/Safety": 0.8757, "reward-bench/Reasoning": 0.8692, "reward-bench/Prior Sets (0.5 weight)": 0.7085 } }, { "id": "openai/gpt-4-0314", "name": "GPT-4 0314", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_instruct/Mean win rate": 0.611, "helm_instruct/Anthropic RLHF dataset": 4.934, "helm_instruct/Best ChatGPT Prompts": 4.973, "helm_instruct/Koala test dataset": 4.966, "helm_instruct/Open Assistant": 4.986, "helm_instruct/Self Instruct": 4.976, "helm_instruct/Vicuna": 4.995 } }, { "id": "openai/gpt-4-0613", "name": "GPT-4 0613", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.867, "helm_lite/NarrativeQA": 0.768, "helm_lite/NaturalQuestions (closed-book)": 0.457, "helm_lite/OpenbookQA": 0.96, "helm_lite/MMLU": 0.735, "helm_lite/MATH": 0.802, "helm_lite/GSM8K": 0.932, "helm_lite/LegalBench": 0.713, "helm_lite/MedQA": 0.815, "helm_lite/WMT 2014": 0.211, "helm_mmlu/MMLU All Subjects": 0.824, "helm_mmlu/Abstract Algebra": 0.63, "helm_mmlu/Anatomy": 0.8, "helm_mmlu/College Physics": 0.627, "helm_mmlu/Computer Security": 0.86, "helm_mmlu/Econometrics": 0.684, "helm_mmlu/Global Facts": 0.62, "helm_mmlu/Jurisprudence": 0.889, "helm_mmlu/Philosophy": 0.859, "helm_mmlu/Professional Psychology": 0.891, "helm_mmlu/Us Foreign Policy": 0.95, "helm_mmlu/Astronomy": 0.934, "helm_mmlu/Business Ethics": 0.79, "helm_mmlu/Clinical Knowledge": 0.845, "helm_mmlu/Conceptual Physics": 0.868, "helm_mmlu/Electrical Engineering": 0.786, "helm_mmlu/Elementary Mathematics": 0.807, "helm_mmlu/Formal Logic": 0.643, "helm_mmlu/High School World History": 0.945, "helm_mmlu/Human Sexuality": 0.908, "helm_mmlu/International Law": 0.917, "helm_mmlu/Logical Fallacies": 0.871, "helm_mmlu/Machine Learning": 0.759, "helm_mmlu/Management": 0.932, "helm_mmlu/Marketing": 0.962, "helm_mmlu/Medical Genetics": 0.94, "helm_mmlu/Miscellaneous": 0.949, "helm_mmlu/Moral Scenarios": 0.902, "helm_mmlu/Nutrition": 0.892, "helm_mmlu/Prehistory": 0.926, "helm_mmlu/Public Relations": 0.745, "helm_mmlu/Security Studies": 0.861, "helm_mmlu/Sociology": 0.93, "helm_mmlu/Virology": 0.596, "helm_mmlu/World Religions": 0.877, "helm_mmlu/Mean win rate": 0.517 } }, { "id": "openai/gpt-4-1-2025-04-14-fc", "name": "GPT-4.1-2025-04-14 (FC)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 20.0, "bfcl/bfcl.overall.overall_accuracy": 53.96, "bfcl/bfcl.overall.total_cost_usd": 100.75, "bfcl/bfcl.overall.latency_mean_s": 1.63, "bfcl/bfcl.overall.latency_std_s": 3.05, "bfcl/bfcl.overall.latency_p95_s": 4.01, "bfcl/bfcl.non_live.ast_accuracy": 82.79, "bfcl/bfcl.non_live.simple_ast_accuracy": 72.67, "bfcl/bfcl.non_live.multiple_ast_accuracy": 89.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 88.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 81.5, "bfcl/bfcl.live.live_accuracy": 69.95, "bfcl/bfcl.live.live_simple_ast_accuracy": 69.38, "bfcl/bfcl.live.live_multiple_ast_accuracy": 70.28, "bfcl/bfcl.live.live_parallel_ast_accuracy": 56.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 38.88, "bfcl/bfcl.multi_turn.base_accuracy": 47.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 32.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 32.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 43.0, "bfcl/bfcl.web_search.accuracy": 68.0, "bfcl/bfcl.web_search.base_accuracy": 67.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 69.0, "bfcl/bfcl.memory.accuracy": 23.87, "bfcl/bfcl.memory.kv_accuracy": 16.13, "bfcl/bfcl.memory.vector_accuracy": 18.06, "bfcl/bfcl.memory.recursive_summarization_accuracy": 37.42, "bfcl/bfcl.relevance.relevance_detection_accuracy": 87.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 86.52 } }, { "id": "openai/gpt-4-1-2025-04-14-prompt", "name": "GPT-4.1-2025-04-14 (Prompt)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 45.0, "bfcl/bfcl.overall.overall_accuracy": 39.38, "bfcl/bfcl.overall.total_cost_usd": 145.85, "bfcl/bfcl.overall.latency_mean_s": 1.2, "bfcl/bfcl.overall.latency_std_s": 3.23, "bfcl/bfcl.overall.latency_p95_s": 2.53, "bfcl/bfcl.non_live.ast_accuracy": 88.69, "bfcl/bfcl.non_live.simple_ast_accuracy": 78.25, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 94.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 89.0, "bfcl/bfcl.live.live_accuracy": 78.9, "bfcl/bfcl.live.live_simple_ast_accuracy": 84.88, "bfcl/bfcl.live.live_multiple_ast_accuracy": 77.4, "bfcl/bfcl.live.live_parallel_ast_accuracy": 87.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 75.0, "bfcl/bfcl.multi_turn.accuracy": 9.75, "bfcl/bfcl.multi_turn.base_accuracy": 10.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 11.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 8.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 9.5, "bfcl/bfcl.web_search.accuracy": 35.0, "bfcl/bfcl.web_search.base_accuracy": 40.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 30.0, "bfcl/bfcl.memory.accuracy": 21.51, "bfcl/bfcl.memory.kv_accuracy": 9.68, "bfcl/bfcl.memory.vector_accuracy": 19.35, "bfcl/bfcl.memory.recursive_summarization_accuracy": 35.48, "bfcl/bfcl.relevance.relevance_detection_accuracy": 100.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 83.99, "bfcl/bfcl.format_sensitivity.max_delta": 23.5, "bfcl/bfcl.format_sensitivity.stddev": 6.18 } }, { "id": "openai/gpt-4-1-mini-2025-04-14-fc", "name": "GPT-4.1-mini-2025-04-14 (FC)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 27.0, "bfcl/bfcl.overall.overall_accuracy": 50.45, "bfcl/bfcl.overall.total_cost_usd": 19.25, "bfcl/bfcl.overall.latency_mean_s": 1.32, "bfcl/bfcl.overall.latency_std_s": 3.65, "bfcl/bfcl.overall.latency_p95_s": 2.4, "bfcl/bfcl.non_live.ast_accuracy": 83.83, "bfcl/bfcl.non_live.simple_ast_accuracy": 73.33, "bfcl/bfcl.non_live.multiple_ast_accuracy": 89.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 91.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 82.0, "bfcl/bfcl.live.live_accuracy": 68.84, "bfcl/bfcl.live.live_simple_ast_accuracy": 67.05, "bfcl/bfcl.live.live_multiple_ast_accuracy": 69.8, "bfcl/bfcl.live.live_parallel_ast_accuracy": 43.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 62.5, "bfcl/bfcl.multi_turn.accuracy": 34.13, "bfcl/bfcl.multi_turn.base_accuracy": 43.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 22.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 30.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 40.0, "bfcl/bfcl.web_search.accuracy": 57.0, "bfcl/bfcl.web_search.base_accuracy": 62.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 52.0, "bfcl/bfcl.memory.accuracy": 26.88, "bfcl/bfcl.memory.kv_accuracy": 22.58, "bfcl/bfcl.memory.vector_accuracy": 16.13, "bfcl/bfcl.memory.recursive_summarization_accuracy": 41.94, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 81.69 } }, { "id": "openai/gpt-4-1-mini-2025-04-14-prompt", "name": "GPT-4.1-mini-2025-04-14 (Prompt)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 67.0, "bfcl/bfcl.overall.overall_accuracy": 29.73, "bfcl/bfcl.overall.total_cost_usd": 20.52, "bfcl/bfcl.overall.latency_mean_s": 1.36, "bfcl/bfcl.overall.latency_std_s": 4.5, "bfcl/bfcl.overall.latency_p95_s": 3.38, "bfcl/bfcl.non_live.ast_accuracy": 84.6, "bfcl/bfcl.non_live.simple_ast_accuracy": 74.92, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 87.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 83.5, "bfcl/bfcl.live.live_accuracy": 74.76, "bfcl/bfcl.live.live_simple_ast_accuracy": 80.62, "bfcl/bfcl.live.live_multiple_ast_accuracy": 73.31, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 2.5, "bfcl/bfcl.multi_turn.base_accuracy": 1.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 4.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 2.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 1.5, "bfcl/bfcl.web_search.accuracy": 4.0, "bfcl/bfcl.web_search.base_accuracy": 7.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 1.0, "bfcl/bfcl.memory.accuracy": 24.3, "bfcl/bfcl.memory.kv_accuracy": 20.65, "bfcl/bfcl.memory.vector_accuracy": 13.55, "bfcl/bfcl.memory.recursive_summarization_accuracy": 38.71, "bfcl/bfcl.relevance.relevance_detection_accuracy": 87.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 73.88, "bfcl/bfcl.format_sensitivity.max_delta": 45.0, "bfcl/bfcl.format_sensitivity.stddev": 13.33 } }, { "id": "openai/gpt-4-1-nano-2025-04-14-fc", "name": "GPT-4.1-nano-2025-04-14 (FC)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 58.0, "bfcl/bfcl.overall.overall_accuracy": 33.05, "bfcl/bfcl.overall.total_cost_usd": 5.66, "bfcl/bfcl.overall.latency_mean_s": 1.44, "bfcl/bfcl.overall.latency_std_s": 10.84, "bfcl/bfcl.overall.latency_p95_s": 2.26, "bfcl/bfcl.non_live.ast_accuracy": 72.98, "bfcl/bfcl.non_live.simple_ast_accuracy": 59.92, "bfcl/bfcl.non_live.multiple_ast_accuracy": 79.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 84.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 68.5, "bfcl/bfcl.live.live_accuracy": 60.77, "bfcl/bfcl.live.live_simple_ast_accuracy": 58.14, "bfcl/bfcl.live.live_multiple_ast_accuracy": 61.44, "bfcl/bfcl.live.live_parallel_ast_accuracy": 68.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 54.17, "bfcl/bfcl.multi_turn.accuracy": 23.62, "bfcl/bfcl.multi_turn.base_accuracy": 39.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 7.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 17.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 30.0, "bfcl/bfcl.web_search.accuracy": 11.0, "bfcl/bfcl.web_search.base_accuracy": 13.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 9.0, "bfcl/bfcl.memory.accuracy": 18.92, "bfcl/bfcl.memory.kv_accuracy": 10.32, "bfcl/bfcl.memory.vector_accuracy": 19.35, "bfcl/bfcl.memory.recursive_summarization_accuracy": 27.1, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 66.0 } }, { "id": "openai/gpt-4-1-nano-2025-04-14-prompt", "name": "GPT-4.1-nano-2025-04-14 (Prompt)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 90.0, "bfcl/bfcl.overall.overall_accuracy": 24.88, "bfcl/bfcl.overall.total_cost_usd": 7.42, "bfcl/bfcl.overall.latency_mean_s": 1.02, "bfcl/bfcl.overall.latency_std_s": 7.3, "bfcl/bfcl.overall.latency_p95_s": 1.88, "bfcl/bfcl.non_live.ast_accuracy": 72.44, "bfcl/bfcl.non_live.simple_ast_accuracy": 68.75, "bfcl/bfcl.non_live.multiple_ast_accuracy": 63.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 85.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 72.5, "bfcl/bfcl.live.live_accuracy": 50.33, "bfcl/bfcl.live.live_simple_ast_accuracy": 63.18, "bfcl/bfcl.live.live_multiple_ast_accuracy": 46.53, "bfcl/bfcl.live.live_parallel_ast_accuracy": 87.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 54.17, "bfcl/bfcl.multi_turn.accuracy": 2.0, "bfcl/bfcl.multi_turn.base_accuracy": 2.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 1.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 2.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 2.0, "bfcl/bfcl.web_search.accuracy": 1.5, "bfcl/bfcl.web_search.base_accuracy": 2.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 1.0, "bfcl/bfcl.memory.accuracy": 16.77, "bfcl/bfcl.memory.kv_accuracy": 9.03, "bfcl/bfcl.memory.vector_accuracy": 14.19, "bfcl/bfcl.memory.recursive_summarization_accuracy": 27.1, "bfcl/bfcl.relevance.relevance_detection_accuracy": 68.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 83.44, "bfcl/bfcl.format_sensitivity.max_delta": 73.0, "bfcl/bfcl.format_sensitivity.stddev": 17.08 } }, { "id": "openai/gpt-4-1106-preview", "name": "GPT-4 Turbo 1106 preview", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.698, "helm_lite/NarrativeQA": 0.727, "helm_lite/NaturalQuestions (closed-book)": 0.435, "helm_lite/OpenbookQA": 0.95, "helm_lite/MMLU": 0.699, "helm_lite/MATH": 0.857, "helm_lite/GSM8K": 0.668, "helm_lite/LegalBench": 0.626, "helm_lite/MedQA": 0.817, "helm_lite/WMT 2014": 0.205, "helm_mmlu/MMLU All Subjects": 0.796, "helm_mmlu/Abstract Algebra": 0.53, "helm_mmlu/Anatomy": 0.807, "helm_mmlu/College Physics": 0.402, "helm_mmlu/Computer Security": 0.86, "helm_mmlu/Econometrics": 0.675, "helm_mmlu/Global Facts": 0.58, "helm_mmlu/Jurisprudence": 0.889, "helm_mmlu/Philosophy": 0.852, "helm_mmlu/Professional Psychology": 0.887, "helm_mmlu/Us Foreign Policy": 0.96, "helm_mmlu/Astronomy": 0.941, "helm_mmlu/Business Ethics": 0.78, "helm_mmlu/Clinical Knowledge": 0.864, "helm_mmlu/Conceptual Physics": 0.894, "helm_mmlu/Electrical Engineering": 0.772, "helm_mmlu/Elementary Mathematics": 0.638, "helm_mmlu/Formal Logic": 0.651, "helm_mmlu/High School World History": 0.958, "helm_mmlu/Human Sexuality": 0.908, "helm_mmlu/International Law": 0.926, "helm_mmlu/Logical Fallacies": 0.865, "helm_mmlu/Machine Learning": 0.723, "helm_mmlu/Management": 0.913, "helm_mmlu/Marketing": 0.932, "helm_mmlu/Medical Genetics": 0.93, "helm_mmlu/Miscellaneous": 0.946, "helm_mmlu/Moral Scenarios": 0.816, "helm_mmlu/Nutrition": 0.879, "helm_mmlu/Prehistory": 0.917, "helm_mmlu/Public Relations": 0.782, "helm_mmlu/Security Studies": 0.841, "helm_mmlu/Sociology": 0.925, "helm_mmlu/Virology": 0.59, "helm_mmlu/World Religions": 0.854, "helm_mmlu/Mean win rate": 0.416 } }, { "id": "openai/gpt-4-turbo-2024-04-09", "name": "GPT-4 Turbo 2024-04-09", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.864, "helm_lite/NarrativeQA": 0.761, "helm_lite/NaturalQuestions (closed-book)": 0.482, "helm_lite/OpenbookQA": 0.97, "helm_lite/MMLU": 0.711, "helm_lite/MATH": 0.833, "helm_lite/GSM8K": 0.824, "helm_lite/LegalBench": 0.727, "helm_lite/MedQA": 0.783, "helm_lite/WMT 2014": 0.218, "helm_mmlu/MMLU All Subjects": 0.813, "helm_mmlu/Abstract Algebra": 0.56, "helm_mmlu/Anatomy": 0.822, "helm_mmlu/College Physics": 0.539, "helm_mmlu/Computer Security": 0.83, "helm_mmlu/Econometrics": 0.675, "helm_mmlu/Global Facts": 0.58, "helm_mmlu/Jurisprudence": 0.88, "helm_mmlu/Philosophy": 0.868, "helm_mmlu/Professional Psychology": 0.873, "helm_mmlu/Us Foreign Policy": 0.96, "helm_mmlu/Astronomy": 0.941, "helm_mmlu/Business Ethics": 0.82, "helm_mmlu/Clinical Knowledge": 0.83, "helm_mmlu/Conceptual Physics": 0.894, "helm_mmlu/Electrical Engineering": 0.752, "helm_mmlu/Elementary Mathematics": 0.72, "helm_mmlu/Formal Logic": 0.706, "helm_mmlu/High School World History": 0.941, "helm_mmlu/Human Sexuality": 0.901, "helm_mmlu/International Law": 0.942, "helm_mmlu/Logical Fallacies": 0.871, "helm_mmlu/Machine Learning": 0.741, "helm_mmlu/Management": 0.883, "helm_mmlu/Marketing": 0.949, "helm_mmlu/Medical Genetics": 0.92, "helm_mmlu/Miscellaneous": 0.945, "helm_mmlu/Moral Scenarios": 0.803, "helm_mmlu/Nutrition": 0.892, "helm_mmlu/Prehistory": 0.92, "helm_mmlu/Public Relations": 0.755, "helm_mmlu/Security Studies": 0.8, "helm_mmlu/Sociology": 0.915, "helm_mmlu/Virology": 0.602, "helm_mmlu/World Religions": 0.848, "helm_mmlu/Mean win rate": 0.351, "reward-bench/Score": 0.8395, "reward-bench/Chat": 0.9525, "reward-bench/Chat Hard": 0.7544, "reward-bench/Safety": 0.8757, "reward-bench/Reasoning": 0.827, "reward-bench/Prior Sets (0.5 weight)": 0.7363 } }, { "id": "openai/gpt-4.1", "name": "openai/gpt-4.1", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.0, "livecodebenchpro/Easy Problems": 0.19718309859154928 } }, { "id": "openai/gpt-4.1-2025-04-14", "name": "gpt-4.1-2025-04-14", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.8755, "global-mmlu-lite/Culturally Sensitive": 0.8541, "global-mmlu-lite/Culturally Agnostic": 0.8969, "global-mmlu-lite/Arabic": 0.88, "global-mmlu-lite/English": 0.8825, "global-mmlu-lite/Bengali": 0.8625, "global-mmlu-lite/German": 0.875, "global-mmlu-lite/French": 0.8875, "global-mmlu-lite/Hindi": 0.8775, "global-mmlu-lite/Indonesian": 0.885, "global-mmlu-lite/Italian": 0.88, "global-mmlu-lite/Japanese": 0.8725, "global-mmlu-lite/Korean": 0.87, "global-mmlu-lite/Portuguese": 0.875, "global-mmlu-lite/Spanish": 0.885, "global-mmlu-lite/Swahili": 0.8725, "global-mmlu-lite/Yoruba": 0.875, "global-mmlu-lite/Chinese": 0.87, "global-mmlu-lite/Burmese": 0.8575, "helm_capabilities/Mean score": 0.727, "helm_capabilities/MMLU-Pro": 0.811, "helm_capabilities/GPQA": 0.659, "helm_capabilities/IFEval": 0.838, "helm_capabilities/WildBench": 0.854, "helm_capabilities/Omni-MATH": 0.471, "reward-bench/Score": 0.7232, "reward-bench/Factuality": 0.8289, "reward-bench/Precise IF": 0.3974, "reward-bench/Math": 0.6521, "reward-bench/Safety": 0.8726, "reward-bench/Focus": 0.7338, "reward-bench/Ties": 0.8542 } }, { "id": "openai/gpt-4.1-mini-2025-04-14", "name": "GPT-4.1 mini 2025-04-14", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.726, "helm_capabilities/MMLU-Pro": 0.783, "helm_capabilities/GPQA": 0.614, "helm_capabilities/IFEval": 0.904, "helm_capabilities/WildBench": 0.838, "helm_capabilities/Omni-MATH": 0.491, "reward-bench/Score": 0.6573, "reward-bench/Factuality": 0.6084, "reward-bench/Precise IF": 0.4125, "reward-bench/Math": 0.7213, "reward-bench/Safety": 0.7265, "reward-bench/Focus": 0.7354, "reward-bench/Ties": 0.74 } }, { "id": "openai/gpt-4.1-nano-2025-04-14", "name": "GPT-4.1 nano 2025-04-14", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.616, "helm_capabilities/MMLU-Pro": 0.55, "helm_capabilities/GPQA": 0.507, "helm_capabilities/IFEval": 0.843, "helm_capabilities/WildBench": 0.811, "helm_capabilities/Omni-MATH": 0.367, "reward-bench/Score": 0.4849, "reward-bench/Factuality": 0.4646, "reward-bench/Precise IF": 0.2578, "reward-bench/Math": 0.5041, "reward-bench/Safety": 0.7156, "reward-bench/Focus": 0.466, "reward-bench/Ties": 0.5015 } }, { "id": "openai/gpt-4o-2024-05-13", "name": "GPT-4o 2024-05-13", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.938, "helm_lite/NarrativeQA": 0.804, "helm_lite/NaturalQuestions (closed-book)": 0.501, "helm_lite/OpenbookQA": 0.966, "helm_lite/MMLU": 0.748, "helm_lite/MATH": 0.829, "helm_lite/GSM8K": 0.905, "helm_lite/LegalBench": 0.733, "helm_lite/MedQA": 0.857, "helm_lite/WMT 2014": 0.231, "helm_mmlu/MMLU All Subjects": 0.842, "helm_mmlu/Abstract Algebra": 0.66, "helm_mmlu/Anatomy": 0.911, "helm_mmlu/College Physics": 0.686, "helm_mmlu/Computer Security": 0.85, "helm_mmlu/Econometrics": 0.693, "helm_mmlu/Global Facts": 0.64, "helm_mmlu/Jurisprudence": 0.898, "helm_mmlu/Philosophy": 0.9, "helm_mmlu/Professional Psychology": 0.905, "helm_mmlu/Us Foreign Policy": 0.96, "helm_mmlu/Astronomy": 0.941, "helm_mmlu/Business Ethics": 0.85, "helm_mmlu/Clinical Knowledge": 0.894, "helm_mmlu/Conceptual Physics": 0.911, "helm_mmlu/Electrical Engineering": 0.807, "helm_mmlu/Elementary Mathematics": 0.741, "helm_mmlu/Formal Logic": 0.683, "helm_mmlu/High School World History": 0.945, "helm_mmlu/Human Sexuality": 0.908, "helm_mmlu/International Law": 0.934, "helm_mmlu/Logical Fallacies": 0.883, "helm_mmlu/Machine Learning": 0.768, "helm_mmlu/Management": 0.942, "helm_mmlu/Marketing": 0.936, "helm_mmlu/Medical Genetics": 0.96, "helm_mmlu/Miscellaneous": 0.954, "helm_mmlu/Moral Scenarios": 0.841, "helm_mmlu/Nutrition": 0.899, "helm_mmlu/Prehistory": 0.938, "helm_mmlu/Public Relations": 0.809, "helm_mmlu/Security Studies": 0.837, "helm_mmlu/Sociology": 0.94, "helm_mmlu/Virology": 0.596, "helm_mmlu/World Religions": 0.889, "helm_mmlu/Mean win rate": 0.671, "reward-bench/Score": 0.8327, "reward-bench/Chat": 0.9665, "reward-bench/Chat Hard": 0.7039, "reward-bench/Safety": 0.8649, "reward-bench/Reasoning": 0.8487, "reward-bench/Prior Sets (0.5 weight)": 0.7262 } }, { "id": "openai/gpt-4o-2024-08-06", "name": "GPT-4o 2024-08-06", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.928, "helm_lite/NarrativeQA": 0.795, "helm_lite/NaturalQuestions (closed-book)": 0.496, "helm_lite/OpenbookQA": 0.968, "helm_lite/MMLU": 0.738, "helm_lite/MATH": 0.853, "helm_lite/GSM8K": 0.909, "helm_lite/LegalBench": 0.721, "helm_lite/MedQA": 0.863, "helm_lite/WMT 2014": 0.225, "helm_mmlu/MMLU All Subjects": 0.843, "helm_mmlu/Abstract Algebra": 0.58, "helm_mmlu/Anatomy": 0.911, "helm_mmlu/College Physics": 0.686, "helm_mmlu/Computer Security": 0.85, "helm_mmlu/Econometrics": 0.711, "helm_mmlu/Global Facts": 0.69, "helm_mmlu/Jurisprudence": 0.907, "helm_mmlu/Philosophy": 0.894, "helm_mmlu/Professional Psychology": 0.899, "helm_mmlu/Us Foreign Policy": 0.95, "helm_mmlu/Astronomy": 0.947, "helm_mmlu/Business Ethics": 0.89, "helm_mmlu/Clinical Knowledge": 0.894, "helm_mmlu/Conceptual Physics": 0.923, "helm_mmlu/Electrical Engineering": 0.793, "helm_mmlu/Elementary Mathematics": 0.775, "helm_mmlu/Formal Logic": 0.675, "helm_mmlu/High School World History": 0.941, "helm_mmlu/Human Sexuality": 0.901, "helm_mmlu/International Law": 0.942, "helm_mmlu/Logical Fallacies": 0.902, "helm_mmlu/Machine Learning": 0.777, "helm_mmlu/Management": 0.913, "helm_mmlu/Marketing": 0.94, "helm_mmlu/Medical Genetics": 0.98, "helm_mmlu/Miscellaneous": 0.958, "helm_mmlu/Moral Scenarios": 0.802, "helm_mmlu/Nutrition": 0.905, "helm_mmlu/Prehistory": 0.935, "helm_mmlu/Public Relations": 0.782, "helm_mmlu/Security Studies": 0.833, "helm_mmlu/Sociology": 0.945, "helm_mmlu/Virology": 0.578, "helm_mmlu/World Religions": 0.883, "helm_mmlu/Mean win rate": 0.52, "reward-bench/Score": 0.6493, "reward-bench/Chat": 0.9609, "reward-bench/Chat Hard": 0.761, "reward-bench/Safety": 0.8619, "reward-bench/Reasoning": 0.8661, "reward-bench/Factuality": 0.5684, "reward-bench/Precise IF": 0.3312, "reward-bench/Math": 0.623, "reward-bench/Focus": 0.7293, "reward-bench/Ties": 0.7819 } }, { "id": "openai/gpt-4o-2024-11-20", "name": "GPT-4o 2024-11-20", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.634, "helm_capabilities/MMLU-Pro": 0.713, "helm_capabilities/GPQA": 0.52, "helm_capabilities/IFEval": 0.817, "helm_capabilities/WildBench": 0.828, "helm_capabilities/Omni-MATH": 0.293, "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.0, "livecodebenchpro/Easy Problems": 0.07042253521126761 } }, { "id": "openai/gpt-4o-mini-2024-07-18", "name": "GPT-4o mini 2024-07-18", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.565, "helm_capabilities/MMLU-Pro": 0.603, "helm_capabilities/GPQA": 0.368, "helm_capabilities/IFEval": 0.782, "helm_capabilities/WildBench": 0.791, "helm_capabilities/Omni-MATH": 0.28, "helm_lite/Mean win rate": 0.701, "helm_lite/NarrativeQA": 0.768, "helm_lite/NaturalQuestions (closed-book)": 0.386, "helm_lite/OpenbookQA": 0.92, "helm_lite/MMLU": 0.668, "helm_lite/MATH": 0.802, "helm_lite/GSM8K": 0.843, "helm_lite/LegalBench": 0.653, "helm_lite/MedQA": 0.748, "helm_lite/WMT 2014": 0.206, "helm_mmlu/MMLU All Subjects": 0.767, "helm_mmlu/Abstract Algebra": 0.42, "helm_mmlu/Anatomy": 0.77, "helm_mmlu/College Physics": 0.559, "helm_mmlu/Computer Security": 0.85, "helm_mmlu/Econometrics": 0.649, "helm_mmlu/Global Facts": 0.45, "helm_mmlu/Jurisprudence": 0.87, "helm_mmlu/Philosophy": 0.772, "helm_mmlu/Professional Psychology": 0.833, "helm_mmlu/Us Foreign Policy": 0.91, "helm_mmlu/Astronomy": 0.849, "helm_mmlu/Business Ethics": 0.79, "helm_mmlu/Clinical Knowledge": 0.845, "helm_mmlu/Conceptual Physics": 0.791, "helm_mmlu/Electrical Engineering": 0.731, "helm_mmlu/Elementary Mathematics": 0.651, "helm_mmlu/Formal Logic": 0.556, "helm_mmlu/High School World History": 0.903, "helm_mmlu/Human Sexuality": 0.863, "helm_mmlu/International Law": 0.926, "helm_mmlu/Logical Fallacies": 0.871, "helm_mmlu/Machine Learning": 0.616, "helm_mmlu/Management": 0.845, "helm_mmlu/Marketing": 0.927, "helm_mmlu/Medical Genetics": 0.89, "helm_mmlu/Miscellaneous": 0.913, "helm_mmlu/Moral Scenarios": 0.485, "helm_mmlu/Nutrition": 0.827, "helm_mmlu/Prehistory": 0.833, "helm_mmlu/Public Relations": 0.791, "helm_mmlu/Security Studies": 0.788, "helm_mmlu/Sociology": 0.9, "helm_mmlu/Virology": 0.536, "helm_mmlu/World Religions": 0.86, "helm_mmlu/Mean win rate": 0.774, "reward-bench/Score": 0.8007, "reward-bench/Factuality": 0.4105, "reward-bench/Precise IF": 0.3438, "reward-bench/Math": 0.5191, "reward-bench/Safety": 0.8081, "reward-bench/Focus": 0.7414, "reward-bench/Ties": 0.6962, "reward-bench/Chat": 0.9497, "reward-bench/Chat Hard": 0.6075, "reward-bench/Reasoning": 0.8374 } }, { "id": "openai/gpt-5", "name": "GPT-5", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 49.6 } }, { "id": "openai/gpt-5-2-2025-12-11-fc", "name": "GPT-5.2-2025-12-11 (FC)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 16.0, "bfcl/bfcl.overall.overall_accuracy": 55.87, "bfcl/bfcl.overall.total_cost_usd": 85.65, "bfcl/bfcl.overall.latency_mean_s": 2.23, "bfcl/bfcl.overall.latency_std_s": 9.75, "bfcl/bfcl.overall.latency_p95_s": 5.26, "bfcl/bfcl.non_live.ast_accuracy": 81.85, "bfcl/bfcl.non_live.simple_ast_accuracy": 72.92, "bfcl/bfcl.non_live.multiple_ast_accuracy": 88.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 89.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 77.5, "bfcl/bfcl.live.live_accuracy": 70.39, "bfcl/bfcl.live.live_simple_ast_accuracy": 71.71, "bfcl/bfcl.live.live_multiple_ast_accuracy": 70.37, "bfcl/bfcl.live.live_parallel_ast_accuracy": 68.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 58.33, "bfcl/bfcl.multi_turn.accuracy": 28.12, "bfcl/bfcl.multi_turn.base_accuracy": 36.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 18.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 27.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 30.5, "bfcl/bfcl.web_search.accuracy": 75.5, "bfcl/bfcl.web_search.base_accuracy": 78.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 73.0, "bfcl/bfcl.memory.accuracy": 45.81, "bfcl/bfcl.memory.kv_accuracy": 33.55, "bfcl/bfcl.memory.vector_accuracy": 43.23, "bfcl/bfcl.memory.recursive_summarization_accuracy": 60.65, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 79.42 } }, { "id": "openai/gpt-5-2-2025-12-11-prompt", "name": "GPT-5.2-2025-12-11 (Prompt)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 38.0, "bfcl/bfcl.overall.overall_accuracy": 45.27, "bfcl/bfcl.overall.total_cost_usd": 164.58, "bfcl/bfcl.overall.latency_mean_s": 4.21, "bfcl/bfcl.overall.latency_std_s": 20.93, "bfcl/bfcl.overall.latency_p95_s": 10.58, "bfcl/bfcl.non_live.ast_accuracy": 78.29, "bfcl/bfcl.non_live.simple_ast_accuracy": 71.17, "bfcl/bfcl.non_live.multiple_ast_accuracy": 83.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 84.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 74.5, "bfcl/bfcl.live.live_accuracy": 67.14, "bfcl/bfcl.live.live_simple_ast_accuracy": 77.91, "bfcl/bfcl.live.live_multiple_ast_accuracy": 64.58, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 58.33, "bfcl/bfcl.multi_turn.accuracy": 43.75, "bfcl/bfcl.multi_turn.base_accuracy": 54.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 40.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 33.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 46.5, "bfcl/bfcl.web_search.accuracy": 40.5, "bfcl/bfcl.web_search.base_accuracy": 45.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 36.0, "bfcl/bfcl.memory.accuracy": 3.87, "bfcl/bfcl.memory.kv_accuracy": 2.58, "bfcl/bfcl.memory.vector_accuracy": 1.94, "bfcl/bfcl.memory.recursive_summarization_accuracy": 7.1, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 87.26, "bfcl/bfcl.format_sensitivity.max_delta": 13.0, "bfcl/bfcl.format_sensitivity.stddev": 3.25 } }, { "id": "openai/gpt-5-2025-08-07", "name": "gpt-5-2025-08-07", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.8895, "global-mmlu-lite/Culturally Sensitive": 0.8913, "global-mmlu-lite/Culturally Agnostic": 0.8878, "global-mmlu-lite/Arabic": 0.8925, "global-mmlu-lite/English": 0.8725, "global-mmlu-lite/Bengali": 0.9, "global-mmlu-lite/German": 0.91, "global-mmlu-lite/French": 0.9075, "global-mmlu-lite/Hindi": 0.865, "global-mmlu-lite/Indonesian": 0.795, "global-mmlu-lite/Italian": 0.9075, "global-mmlu-lite/Japanese": 0.8875, "global-mmlu-lite/Korean": 0.915, "global-mmlu-lite/Portuguese": 0.8875, "global-mmlu-lite/Spanish": 0.905, "global-mmlu-lite/Swahili": 0.865, "global-mmlu-lite/Yoruba": 0.9125, "global-mmlu-lite/Chinese": 0.895, "global-mmlu-lite/Burmese": 0.915, "helm_capabilities/Mean score": 0.807, "helm_capabilities/MMLU-Pro": 0.863, "helm_capabilities/GPQA": 0.791, "helm_capabilities/IFEval": 0.875, "helm_capabilities/WildBench": 0.857, "helm_capabilities/Omni-MATH": 0.647, "livecodebenchpro/Hard Problems": 0.0423, "livecodebenchpro/Medium Problems": 0.4085, "livecodebenchpro/Easy Problems": 0.9014 } }, { "id": "openai/gpt-5-codex", "name": "GPT-5-Codex", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 44.3 } }, { "id": "openai/gpt-5-mini", "name": "GPT-5-Mini", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 31.9 } }, { "id": "openai/gpt-5-mini-2025-08-07", "name": "GPT-5 mini 2025-08-07", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.819, "helm_capabilities/MMLU-Pro": 0.835, "helm_capabilities/GPQA": 0.756, "helm_capabilities/IFEval": 0.927, "helm_capabilities/WildBench": 0.855, "helm_capabilities/Omni-MATH": 0.722 } }, { "id": "openai/gpt-5-mini-2025-08-07-fc", "name": "GPT-5-mini-2025-08-07 (FC)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 17.0, "bfcl/bfcl.overall.overall_accuracy": 55.46, "bfcl/bfcl.overall.total_cost_usd": 22.18, "bfcl/bfcl.overall.latency_mean_s": 8.32, "bfcl/bfcl.overall.latency_std_s": 17.35, "bfcl/bfcl.overall.latency_p95_s": 19.8, "bfcl/bfcl.non_live.ast_accuracy": 69.85, "bfcl/bfcl.non_live.simple_ast_accuracy": 59.92, "bfcl/bfcl.non_live.multiple_ast_accuracy": 69.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 80.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 70.5, "bfcl/bfcl.live.live_accuracy": 58.62, "bfcl/bfcl.live.live_simple_ast_accuracy": 62.02, "bfcl/bfcl.live.live_multiple_ast_accuracy": 58.02, "bfcl/bfcl.live.live_parallel_ast_accuracy": 62.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 45.83, "bfcl/bfcl.multi_turn.accuracy": 27.5, "bfcl/bfcl.multi_turn.base_accuracy": 36.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 17.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 23.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 33.0, "bfcl/bfcl.web_search.accuracy": 82.0, "bfcl/bfcl.web_search.base_accuracy": 87.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 77.0, "bfcl/bfcl.memory.accuracy": 44.3, "bfcl/bfcl.memory.kv_accuracy": 36.77, "bfcl/bfcl.memory.vector_accuracy": 43.87, "bfcl/bfcl.memory.recursive_summarization_accuracy": 52.26, "bfcl/bfcl.relevance.relevance_detection_accuracy": 62.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 91.01 } }, { "id": "openai/gpt-5-mini-2025-08-07-prompt", "name": "GPT-5-mini-2025-08-07 (Prompt)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 77.0, "bfcl/bfcl.overall.overall_accuracy": 27.83, "bfcl/bfcl.overall.total_cost_usd": 82.74, "bfcl/bfcl.overall.latency_mean_s": 8.89, "bfcl/bfcl.overall.latency_std_s": 11.08, "bfcl/bfcl.overall.latency_p95_s": 19.72, "bfcl/bfcl.non_live.ast_accuracy": 68.04, "bfcl/bfcl.non_live.simple_ast_accuracy": 59.17, "bfcl/bfcl.non_live.multiple_ast_accuracy": 72.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 71.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 69.0, "bfcl/bfcl.live.live_accuracy": 62.55, "bfcl/bfcl.live.live_simple_ast_accuracy": 69.77, "bfcl/bfcl.live.live_multiple_ast_accuracy": 61.16, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 37.5, "bfcl/bfcl.multi_turn.accuracy": 5.5, "bfcl/bfcl.multi_turn.base_accuracy": 5.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 5.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 4.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 7.0, "bfcl/bfcl.web_search.accuracy": 8.5, "bfcl/bfcl.web_search.base_accuracy": 11.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 6.0, "bfcl/bfcl.memory.accuracy": 29.25, "bfcl/bfcl.memory.kv_accuracy": 19.35, "bfcl/bfcl.memory.vector_accuracy": 29.68, "bfcl/bfcl.memory.recursive_summarization_accuracy": 38.71, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 55.71, "bfcl/bfcl.format_sensitivity.max_delta": 16.0, "bfcl/bfcl.format_sensitivity.stddev": 3.78 } }, { "id": "openai/gpt-5-nano", "name": "GPT-5-Nano", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 7.0 } }, { "id": "openai/gpt-5-nano-2025-08-07", "name": "GPT-5 nano 2025-08-07", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.748, "helm_capabilities/MMLU-Pro": 0.778, "helm_capabilities/GPQA": 0.679, "helm_capabilities/IFEval": 0.932, "helm_capabilities/WildBench": 0.806, "helm_capabilities/Omni-MATH": 0.547 } }, { "id": "openai/gpt-5-nano-2025-08-07-fc", "name": "GPT-5-nano-2025-08-07 (FC)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 24.0, "bfcl/bfcl.overall.overall_accuracy": 51.45, "bfcl/bfcl.overall.total_cost_usd": 8.79, "bfcl/bfcl.overall.latency_mean_s": 10.36, "bfcl/bfcl.overall.latency_std_s": 10.37, "bfcl/bfcl.overall.latency_p95_s": 23.56, "bfcl/bfcl.non_live.ast_accuracy": 68.0, "bfcl/bfcl.non_live.simple_ast_accuracy": 57.0, "bfcl/bfcl.non_live.multiple_ast_accuracy": 64.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 79.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 71.5, "bfcl/bfcl.live.live_accuracy": 59.44, "bfcl/bfcl.live.live_simple_ast_accuracy": 58.91, "bfcl/bfcl.live.live_multiple_ast_accuracy": 59.83, "bfcl/bfcl.live.live_parallel_ast_accuracy": 50.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 54.17, "bfcl/bfcl.multi_turn.accuracy": 34.5, "bfcl/bfcl.multi_turn.base_accuracy": 44.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 23.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 32.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 38.0, "bfcl/bfcl.web_search.accuracy": 72.5, "bfcl/bfcl.web_search.base_accuracy": 74.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 71.0, "bfcl/bfcl.memory.accuracy": 24.73, "bfcl/bfcl.memory.kv_accuracy": 18.06, "bfcl/bfcl.memory.vector_accuracy": 27.1, "bfcl/bfcl.memory.recursive_summarization_accuracy": 29.03, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 89.1 } }, { "id": "openai/gpt-5-nano-2025-08-07-prompt", "name": "GPT-5-nano-2025-08-07 (Prompt)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 79.0, "bfcl/bfcl.overall.overall_accuracy": 27.55, "bfcl/bfcl.overall.total_cost_usd": 21.47, "bfcl/bfcl.overall.latency_mean_s": 10.67, "bfcl/bfcl.overall.latency_std_s": 7.68, "bfcl/bfcl.overall.latency_p95_s": 23.28, "bfcl/bfcl.non_live.ast_accuracy": 80.81, "bfcl/bfcl.non_live.simple_ast_accuracy": 69.25, "bfcl/bfcl.non_live.multiple_ast_accuracy": 86.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 87.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 80.5, "bfcl/bfcl.live.live_accuracy": 70.69, "bfcl/bfcl.live.live_simple_ast_accuracy": 76.36, "bfcl/bfcl.live.live_multiple_ast_accuracy": 69.71, "bfcl/bfcl.live.live_parallel_ast_accuracy": 68.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 54.17, "bfcl/bfcl.multi_turn.accuracy": 0.75, "bfcl/bfcl.multi_turn.base_accuracy": 1.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 1.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 0.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 1.0, "bfcl/bfcl.web_search.accuracy": 13.5, "bfcl/bfcl.web_search.base_accuracy": 10.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 17.0, "bfcl/bfcl.memory.accuracy": 24.52, "bfcl/bfcl.memory.kv_accuracy": 20.65, "bfcl/bfcl.memory.vector_accuracy": 31.61, "bfcl/bfcl.memory.recursive_summarization_accuracy": 21.29, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 45.75, "bfcl/bfcl.format_sensitivity.max_delta": 8.5, "bfcl/bfcl.format_sensitivity.stddev": 2.57 } }, { "id": "openai/gpt-5.1", "name": "GPT-5.1", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 47.6 } }, { "id": "openai/gpt-5.1-codex", "name": "GPT-5.1-Codex", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 53.5 } }, { "id": "openai/gpt-5.1-codex-max", "name": "GPT-5.1-Codex-Max", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 60.4 } }, { "id": "openai/gpt-5.1-codex-mini", "name": "GPT-5.1-Codex-Mini", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 43.1 } }, { "id": "openai/gpt-5.2", "name": "GPT-5.2", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 64.9 } }, { "id": "openai/gpt-5.2-2025-12-11", "name": "gpt-5.2-2025-12-11", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "appworld_test_normal/appworld/test_normal": 0.0, "browsecompplus/browsecompplus": 0.26, "livecodebenchpro/Hard Problems": 0.1594, "livecodebenchpro/Medium Problems": 0.5211, "livecodebenchpro/Easy Problems": 0.9014, "swe-bench/swe-bench": 0.57, "tau-bench-2_airline/tau-bench-2/airline": 0.54, "tau-bench-2_retail/tau-bench-2/retail": 0.68, "tau-bench-2_telecom/tau-bench-2/telecom": 0.5354 } }, { "id": "openai/gpt-5.2-codex", "name": "GPT-5.2-Codex", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 66.5 } }, { "id": "openai/gpt-5.3-codex", "name": "GPT-5.3-Codex", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 74.6 } }, { "id": "openai/GPT-J-6B", "name": "GPT-J 6B", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.273, "helm_classic/MMLU": 0.249, "helm_classic/BoolQ": 0.649, "helm_classic/NarrativeQA": 0.545, "helm_classic/NaturalQuestions (open-book)": 0.559, "helm_classic/QuAC": 0.33, "helm_classic/HellaSwag": 0.663, "helm_classic/OpenbookQA": 0.514, "helm_classic/TruthfulQA": 0.199, "helm_classic/MS MARCO (TREC)": 0.345, "helm_classic/CNN/DailyMail": 0.131, "helm_classic/XSUM": 0.096, "helm_classic/IMDB": 0.939, "helm_classic/CivilComments": 0.52, "helm_classic/RAFT": 0.619 } }, { "id": "openai/GPT-NeoX-20B", "name": "GPT-NeoX 20B", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.351, "helm_classic/MMLU": 0.276, "helm_classic/BoolQ": 0.683, "helm_classic/NarrativeQA": 0.599, "helm_classic/NaturalQuestions (open-book)": 0.596, "helm_classic/QuAC": 0.326, "helm_classic/HellaSwag": 0.718, "helm_classic/OpenbookQA": 0.524, "helm_classic/TruthfulQA": 0.216, "helm_classic/MS MARCO (TREC)": 0.398, "helm_classic/CNN/DailyMail": 0.123, "helm_classic/XSUM": 0.102, "helm_classic/IMDB": 0.948, "helm_classic/CivilComments": 0.516, "helm_classic/RAFT": 0.505 } }, { "id": "openai/gpt-oss-120b", "name": "GPT-OSS-120B", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.77, "helm_capabilities/MMLU-Pro": 0.795, "helm_capabilities/GPQA": 0.684, "helm_capabilities/IFEval": 0.836, "helm_capabilities/WildBench": 0.845, "helm_capabilities/Omni-MATH": 0.688, "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.11267605633802817, "livecodebenchpro/Easy Problems": 0.6619718309859155, "terminal-bench-2.0/terminal-bench-2.0": 18.7 } }, { "id": "openai/gpt-oss-20b", "name": "GPT-OSS-20B", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.674, "helm_capabilities/MMLU-Pro": 0.74, "helm_capabilities/GPQA": 0.594, "helm_capabilities/IFEval": 0.732, "helm_capabilities/WildBench": 0.737, "helm_capabilities/Omni-MATH": 0.565, "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.056338028169014086, "livecodebenchpro/Easy Problems": 0.5070422535211268, "terminal-bench-2.0/terminal-bench-2.0": 3.4 } }, { "id": "openai/o3", "name": "o3", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "ace/Overall Score": 0.529, "ace/Gaming Score": 0.585, "ace/Shopping Score": 0.45, "apex-v1/Big Law Score": 0.76 } }, { "id": "openai/o3 Pro", "name": "o3 Pro", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "ace/Overall Score": 0.552, "ace/DIY Score": 0.54, "ace/Food Score": 0.6, "ace/Gaming Score": 0.613, "ace/Shopping Score": 0.45 } }, { "id": "openai/o3-2025-04-16", "name": "o3-2025-04-16", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.811, "helm_capabilities/MMLU-Pro": 0.859, "helm_capabilities/GPQA": 0.753, "helm_capabilities/IFEval": 0.869, "helm_capabilities/WildBench": 0.861, "helm_capabilities/Omni-MATH": 0.714, "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.22535211267605634, "livecodebenchpro/Easy Problems": 0.7183098591549296 } }, { "id": "openai/o3-2025-04-16-fc", "name": "o3-2025-04-16 (FC)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 30.0, "bfcl/bfcl.overall.overall_accuracy": 48.56, "bfcl/bfcl.overall.total_cost_usd": 133.45, "bfcl/bfcl.overall.latency_mean_s": 3.5, "bfcl/bfcl.overall.latency_std_s": 8.69, "bfcl/bfcl.overall.latency_p95_s": 8.39, "bfcl/bfcl.non_live.ast_accuracy": 40.38, "bfcl/bfcl.non_live.simple_ast_accuracy": 74.5, "bfcl/bfcl.non_live.multiple_ast_accuracy": 87.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 0.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 0.0, "bfcl/bfcl.live.live_accuracy": 66.17, "bfcl/bfcl.live.live_simple_ast_accuracy": 70.54, "bfcl/bfcl.live.live_multiple_ast_accuracy": 67.62, "bfcl/bfcl.live.live_parallel_ast_accuracy": 0.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 0.0, "bfcl/bfcl.multi_turn.accuracy": 14.75, "bfcl/bfcl.multi_turn.base_accuracy": 16.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 11.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 14.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 16.5, "bfcl/bfcl.web_search.accuracy": 77.0, "bfcl/bfcl.web_search.base_accuracy": 79.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 75.0, "bfcl/bfcl.memory.accuracy": 47.31, "bfcl/bfcl.memory.kv_accuracy": 24.52, "bfcl/bfcl.memory.vector_accuracy": 44.52, "bfcl/bfcl.memory.recursive_summarization_accuracy": 72.9, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 86.13 } }, { "id": "openai/o3-2025-04-16-prompt", "name": "o3-2025-04-16 (Prompt)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 8.0, "bfcl/bfcl.overall.overall_accuracy": 63.05, "bfcl/bfcl.overall.total_cost_usd": 234.64, "bfcl/bfcl.overall.latency_mean_s": 4.83, "bfcl/bfcl.overall.latency_std_s": 7.01, "bfcl/bfcl.overall.latency_p95_s": 11.7, "bfcl/bfcl.non_live.ast_accuracy": 81.94, "bfcl/bfcl.non_live.simple_ast_accuracy": 74.25, "bfcl/bfcl.non_live.multiple_ast_accuracy": 89.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 86.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 78.0, "bfcl/bfcl.live.live_accuracy": 73.21, "bfcl/bfcl.live.live_simple_ast_accuracy": 83.33, "bfcl/bfcl.live.live_multiple_ast_accuracy": 70.75, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 62.25, "bfcl/bfcl.multi_turn.base_accuracy": 68.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 63.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 54.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 63.0, "bfcl/bfcl.web_search.accuracy": 50.5, "bfcl/bfcl.web_search.base_accuracy": 51.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 50.0, "bfcl/bfcl.memory.accuracy": 51.83, "bfcl/bfcl.memory.kv_accuracy": 33.55, "bfcl/bfcl.memory.vector_accuracy": 50.32, "bfcl/bfcl.memory.recursive_summarization_accuracy": 71.61, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 83.98, "bfcl/bfcl.format_sensitivity.max_delta": 8.5, "bfcl/bfcl.format_sensitivity.stddev": 2.75 } }, { "id": "openai/o3-mini-2025-01-31", "name": "o3-mini-2025-01-31", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.78, "global-mmlu-lite/Culturally Sensitive": 0.765, "global-mmlu-lite/Culturally Agnostic": 0.795, "global-mmlu-lite/Arabic": 0.7725, "global-mmlu-lite/English": 0.8025, "global-mmlu-lite/Bengali": 0.77, "global-mmlu-lite/German": 0.7525, "global-mmlu-lite/French": 0.74, "global-mmlu-lite/Hindi": 0.7525, "global-mmlu-lite/Indonesian": 0.7425, "global-mmlu-lite/Italian": 0.8, "global-mmlu-lite/Japanese": 0.81, "global-mmlu-lite/Korean": 0.8075, "global-mmlu-lite/Portuguese": 0.7975, "global-mmlu-lite/Spanish": 0.775, "global-mmlu-lite/Swahili": 0.765, "global-mmlu-lite/Yoruba": 0.7725, "global-mmlu-lite/Chinese": 0.8125, "global-mmlu-lite/Burmese": 0.8075 } }, { "id": "openai/o4-mini-2025-04-16", "name": "o4-mini-2025-04-16", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.8705, "global-mmlu-lite/Culturally Sensitive": 0.8503, "global-mmlu-lite/Culturally Agnostic": 0.8906, "global-mmlu-lite/Arabic": 0.865, "global-mmlu-lite/English": 0.8675, "global-mmlu-lite/Bengali": 0.8875, "global-mmlu-lite/German": 0.8775, "global-mmlu-lite/French": 0.87, "global-mmlu-lite/Hindi": 0.87, "global-mmlu-lite/Indonesian": 0.8675, "global-mmlu-lite/Italian": 0.855, "global-mmlu-lite/Japanese": 0.885, "global-mmlu-lite/Korean": 0.88, "global-mmlu-lite/Portuguese": 0.88, "global-mmlu-lite/Spanish": 0.855, "global-mmlu-lite/Swahili": 0.8525, "global-mmlu-lite/Yoruba": 0.8525, "global-mmlu-lite/Chinese": 0.89, "global-mmlu-lite/Burmese": 0.8725, "helm_capabilities/Mean score": 0.812, "helm_capabilities/MMLU-Pro": 0.82, "helm_capabilities/GPQA": 0.735, "helm_capabilities/IFEval": 0.929, "helm_capabilities/WildBench": 0.854, "helm_capabilities/Omni-MATH": 0.72, "livecodebenchpro/Hard Problems": 0.0143, "livecodebenchpro/Medium Problems": 0.2923, "livecodebenchpro/Easy Problems": 0.8571 } }, { "id": "openai/o4-mini-2025-04-16-fc", "name": "o4-mini-2025-04-16 (FC)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 21.0, "bfcl/bfcl.overall.overall_accuracy": 53.24, "bfcl/bfcl.overall.total_cost_usd": 81.91, "bfcl/bfcl.overall.latency_mean_s": 3.71, "bfcl/bfcl.overall.latency_std_s": 7.18, "bfcl/bfcl.overall.latency_p95_s": 9.33, "bfcl/bfcl.non_live.ast_accuracy": 37.73, "bfcl/bfcl.non_live.simple_ast_accuracy": 66.92, "bfcl/bfcl.non_live.multiple_ast_accuracy": 84.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 0.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 0.0, "bfcl/bfcl.live.live_accuracy": 66.1, "bfcl/bfcl.live.live_simple_ast_accuracy": 69.38, "bfcl/bfcl.live.live_multiple_ast_accuracy": 67.81, "bfcl/bfcl.live.live_parallel_ast_accuracy": 0.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 0.0, "bfcl/bfcl.multi_turn.accuracy": 41.75, "bfcl/bfcl.multi_turn.base_accuracy": 51.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 30.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 40.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 45.5, "bfcl/bfcl.web_search.accuracy": 75.5, "bfcl/bfcl.web_search.base_accuracy": 75.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 76.0, "bfcl/bfcl.memory.accuracy": 34.19, "bfcl/bfcl.memory.kv_accuracy": 19.35, "bfcl/bfcl.memory.vector_accuracy": 24.52, "bfcl/bfcl.memory.recursive_summarization_accuracy": 58.71, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 83.91 } }, { "id": "openai/o4-mini-2025-04-16-prompt", "name": "o4-mini-2025-04-16 (Prompt)", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 28.0, "bfcl/bfcl.overall.overall_accuracy": 50.26, "bfcl/bfcl.overall.total_cost_usd": 133.63, "bfcl/bfcl.overall.latency_mean_s": 4.47, "bfcl/bfcl.overall.latency_std_s": 5.19, "bfcl/bfcl.overall.latency_p95_s": 10.19, "bfcl/bfcl.non_live.ast_accuracy": 81.29, "bfcl/bfcl.non_live.simple_ast_accuracy": 72.67, "bfcl/bfcl.non_live.multiple_ast_accuracy": 88.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 84.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 80.0, "bfcl/bfcl.live.live_accuracy": 70.76, "bfcl/bfcl.live.live_simple_ast_accuracy": 79.46, "bfcl/bfcl.live.live_multiple_ast_accuracy": 68.76, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 62.5, "bfcl/bfcl.multi_turn.accuracy": 16.62, "bfcl/bfcl.multi_turn.base_accuracy": 16.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 18.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 17.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 14.5, "bfcl/bfcl.web_search.accuracy": 71.5, "bfcl/bfcl.web_search.base_accuracy": 73.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 70.0, "bfcl/bfcl.memory.accuracy": 35.27, "bfcl/bfcl.memory.kv_accuracy": 22.58, "bfcl/bfcl.memory.vector_accuracy": 25.16, "bfcl/bfcl.memory.recursive_summarization_accuracy": 58.06, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 87.16, "bfcl/bfcl.format_sensitivity.max_delta": 9.5, "bfcl/bfcl.format_sensitivity.stddev": 2.6 } }, { "id": "openai/text-ada-001", "name": "text-ada-001", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.107, "helm_classic/MMLU": 0.238, "helm_classic/BoolQ": 0.464, "helm_classic/NarrativeQA": 0.238, "helm_classic/NaturalQuestions (open-book)": 0.149, "helm_classic/QuAC": 0.176, "helm_classic/HellaSwag": 0.429, "helm_classic/OpenbookQA": 0.346, "helm_classic/TruthfulQA": 0.232, "helm_classic/MS MARCO (TREC)": 0.302, "helm_classic/CNN/DailyMail": 0.136, "helm_classic/XSUM": 0.034, "helm_classic/IMDB": 0.822, "helm_classic/CivilComments": 0.503, "helm_classic/RAFT": 0.406 } }, { "id": "openai/text-babbage-001", "name": "text-babbage-001", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.229, "helm_classic/MMLU": 0.229, "helm_classic/BoolQ": 0.451, "helm_classic/NarrativeQA": 0.429, "helm_classic/NaturalQuestions (open-book)": 0.33, "helm_classic/QuAC": 0.284, "helm_classic/HellaSwag": 0.561, "helm_classic/OpenbookQA": 0.452, "helm_classic/TruthfulQA": 0.233, "helm_classic/MS MARCO (TREC)": 0.449, "helm_classic/CNN/DailyMail": 0.151, "helm_classic/XSUM": 0.046, "helm_classic/IMDB": 0.913, "helm_classic/CivilComments": 0.499, "helm_classic/RAFT": 0.509 } }, { "id": "openai/text-curie-001", "name": "text-curie-001", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.36, "helm_classic/MMLU": 0.237, "helm_classic/BoolQ": 0.62, "helm_classic/NarrativeQA": 0.582, "helm_classic/NaturalQuestions (open-book)": 0.571, "helm_classic/QuAC": 0.358, "helm_classic/HellaSwag": 0.676, "helm_classic/OpenbookQA": 0.514, "helm_classic/TruthfulQA": 0.257, "helm_classic/MS MARCO (TREC)": 0.507, "helm_classic/CNN/DailyMail": 0.152, "helm_classic/XSUM": 0.076, "helm_classic/IMDB": 0.923, "helm_classic/CivilComments": 0.537, "helm_classic/RAFT": 0.489 } }, { "id": "openai/text-davinci-002", "name": "GPT-3.5 text-davinci-002", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.905, "helm_classic/MMLU": 0.568, "helm_classic/BoolQ": 0.877, "helm_classic/NarrativeQA": 0.727, "helm_classic/NaturalQuestions (open-book)": 0.713, "helm_classic/QuAC": 0.445, "helm_classic/HellaSwag": 0.815, "helm_classic/OpenbookQA": 0.594, "helm_classic/TruthfulQA": 0.61, "helm_classic/MS MARCO (TREC)": 0.664, "helm_classic/CNN/DailyMail": 0.153, "helm_classic/XSUM": 0.144, "helm_classic/IMDB": 0.948, "helm_classic/CivilComments": 0.668, "helm_classic/RAFT": 0.733, "helm_lite/Mean win rate": 0.336, "helm_lite/NarrativeQA": 0.719, "helm_lite/NaturalQuestions (closed-book)": 0.394, "helm_lite/OpenbookQA": 0.796, "helm_lite/MMLU": 0.568, "helm_lite/MATH": 0.428, "helm_lite/GSM8K": 0.479, "helm_lite/LegalBench": 0.58, "helm_lite/MedQA": 0.525, "helm_lite/WMT 2014": 0.174 } }, { "id": "openai/text-davinci-003", "name": "GPT-3.5 text-davinci-003", "developer": "OpenAI", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.872, "helm_classic/MMLU": 0.569, "helm_classic/BoolQ": 0.881, "helm_classic/NarrativeQA": 0.727, "helm_classic/NaturalQuestions (open-book)": 0.77, "helm_classic/QuAC": 0.525, "helm_classic/HellaSwag": 0.822, "helm_classic/OpenbookQA": 0.646, "helm_classic/TruthfulQA": 0.593, "helm_classic/MS MARCO (TREC)": 0.644, "helm_classic/CNN/DailyMail": 0.156, "helm_classic/XSUM": 0.124, "helm_classic/IMDB": 0.848, "helm_classic/CivilComments": 0.684, "helm_classic/RAFT": 0.759, "helm_lite/Mean win rate": 0.439, "helm_lite/NarrativeQA": 0.731, "helm_lite/NaturalQuestions (closed-book)": 0.413, "helm_lite/OpenbookQA": 0.828, "helm_lite/MMLU": 0.555, "helm_lite/MATH": 0.449, "helm_lite/GSM8K": 0.615, "helm_lite/LegalBench": 0.622, "helm_lite/MedQA": 0.531, "helm_lite/WMT 2014": 0.191 } }, { "id": "OpenAssistant/oasst-rm-2-pythia-6.9b-epoch-1", "name": "OpenAssistant/oasst-rm-2-pythia-6.9b-epoch-1", "developer": "OpenAssistant", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.2653, "reward-bench/Chat": 0.9246, "reward-bench/Chat Hard": 0.3728, "reward-bench/Safety": 0.3289, "reward-bench/Reasoning": 0.5855, "reward-bench/Prior Sets (0.5 weight)": 0.6801, "reward-bench/Factuality": 0.3979, "reward-bench/Precise IF": 0.2875, "reward-bench/Math": 0.377, "reward-bench/Focus": 0.1535, "reward-bench/Ties": 0.047 } }, { "id": "OpenAssistant/oasst-rm-2.1-pythia-1.4b-epoch-2.5", "name": "OpenAssistant/oasst-rm-2.1-pythia-1.4b-epoch-2.5", "developer": "OpenAssistant", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.2648, "reward-bench/Chat": 0.8855, "reward-bench/Chat Hard": 0.4868, "reward-bench/Safety": 0.3244, "reward-bench/Reasoning": 0.7752, "reward-bench/Prior Sets (0.5 weight)": 0.6533, "reward-bench/Factuality": 0.3179, "reward-bench/Precise IF": 0.2625, "reward-bench/Math": 0.3934, "reward-bench/Focus": 0.2707, "reward-bench/Ties": 0.0198 } }, { "id": "OpenAssistant/oasst-sft-1-pythia-12b", "name": "oasst-sft-1-pythia-12b", "developer": "OpenAssistant", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1055, "hfopenllm_v2/BBH": 0.3147, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3327, "hfopenllm_v2/MMLU-PRO": 0.1113 } }, { "id": "OpenAssistant/reward-model-deberta-v3-large-v2", "name": "OpenAssistant/reward-model-deberta-v3-large-v2", "developer": "OpenAssistant", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6126, "reward-bench/Factuality": 0.3853, "reward-bench/Precise IF": 0.2687, "reward-bench/Math": 0.5027, "reward-bench/Safety": 0.7338, "reward-bench/Focus": 0.2768, "reward-bench/Ties": 0.12, "reward-bench/Chat": 0.8939, "reward-bench/Chat Hard": 0.4518, "reward-bench/Reasoning": 0.3855, "reward-bench/Prior Sets (0.5 weight)": 0.5836 } }, { "id": "openbmb/Eurus-7b-kto", "name": "openbmb/Eurus-7b-kto", "developer": "openbmb", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.69, "reward-bench/Chat": 0.9525, "reward-bench/Chat Hard": 0.5373, "reward-bench/Safety": 0.6054, "reward-bench/Reasoning": 0.7467, "reward-bench/Prior Sets (0.5 weight)": 0.5261 } }, { "id": "openbmb/Eurus-RM-7b", "name": "openbmb/Eurus-RM-7b", "developer": "openbmb", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8159, "reward-bench/Factuality": 0.6, "reward-bench/Precise IF": 0.3438, "reward-bench/Math": 0.5683, "reward-bench/Safety": 0.8135, "reward-bench/Focus": 0.7475, "reward-bench/Ties": 0.5972, "reward-bench/Chat": 0.9804, "reward-bench/Chat Hard": 0.6557, "reward-bench/Reasoning": 0.8633, "reward-bench/Prior Sets (0.5 weight)": 0.7172 } }, { "id": "openbmb/MiniCPM-2B-dpo-fp32", "name": "openbmb/MiniCPM-2B-dpo-fp32", "developer": "openbmb", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.673, "reward-bench/Chat": 0.8911, "reward-bench/Chat Hard": 0.4934, "reward-bench/Safety": 0.573, "reward-bench/Reasoning": 0.8233, "reward-bench/Prior Sets (0.5 weight)": 0.4958 } }, { "id": "openbmb/MiniCPM-S-1B-sft-llama-format", "name": "MiniCPM-S-1B-sft-llama-format", "developer": "openbmb", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3329, "hfopenllm_v2/BBH": 0.3049, "hfopenllm_v2/MATH Level 5": 0.031, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3317, "hfopenllm_v2/MMLU-PRO": 0.1858 } }, { "id": "openbmb/minicpm3-4b-fc-fc", "name": "MiniCPM3-4B-FC (FC)", "developer": "openbmb", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 86.0, "bfcl/bfcl.overall.overall_accuracy": 25.55, "bfcl/bfcl.overall.total_cost_usd": 54.05, "bfcl/bfcl.overall.latency_mean_s": 118.62, "bfcl/bfcl.overall.latency_std_s": 143.98, "bfcl/bfcl.overall.latency_p95_s": 388.67, "bfcl/bfcl.non_live.ast_accuracy": 81.75, "bfcl/bfcl.non_live.simple_ast_accuracy": 70.5, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 84.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 80.5, "bfcl/bfcl.live.live_accuracy": 65.21, "bfcl/bfcl.live.live_simple_ast_accuracy": 73.26, "bfcl/bfcl.live.live_multiple_ast_accuracy": 63.53, "bfcl/bfcl.live.live_parallel_ast_accuracy": 50.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 62.5, "bfcl/bfcl.multi_turn.accuracy": 3.88, "bfcl/bfcl.multi_turn.base_accuracy": 6.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 2.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 4.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 2.5, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 12.04, "bfcl/bfcl.memory.kv_accuracy": 9.68, "bfcl/bfcl.memory.vector_accuracy": 15.48, "bfcl/bfcl.memory.recursive_summarization_accuracy": 10.97, "bfcl/bfcl.relevance.relevance_detection_accuracy": 68.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 72.84 } }, { "id": "openbmb/minicpm3-4b-prompt", "name": "MiniCPM3-4B (Prompt)", "developer": "openbmb", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 97.0, "bfcl/bfcl.overall.overall_accuracy": 22.08, "bfcl/bfcl.overall.total_cost_usd": 29.83, "bfcl/bfcl.overall.latency_mean_s": 31.18, "bfcl/bfcl.overall.latency_std_s": 35.61, "bfcl/bfcl.overall.latency_p95_s": 102.02, "bfcl/bfcl.non_live.ast_accuracy": 70.54, "bfcl/bfcl.non_live.simple_ast_accuracy": 66.17, "bfcl/bfcl.non_live.multiple_ast_accuracy": 77.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 70.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 69.0, "bfcl/bfcl.live.live_accuracy": 43.15, "bfcl/bfcl.live.live_simple_ast_accuracy": 47.67, "bfcl/bfcl.live.live_multiple_ast_accuracy": 42.17, "bfcl/bfcl.live.live_parallel_ast_accuracy": 43.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 37.5, "bfcl/bfcl.multi_turn.accuracy": 3.5, "bfcl/bfcl.multi_turn.base_accuracy": 4.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 4.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 2.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 3.0, "bfcl/bfcl.web_search.accuracy": 2.0, "bfcl/bfcl.web_search.base_accuracy": 2.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 2.0, "bfcl/bfcl.memory.accuracy": 9.46, "bfcl/bfcl.memory.kv_accuracy": 8.39, "bfcl/bfcl.memory.vector_accuracy": 10.32, "bfcl/bfcl.memory.recursive_summarization_accuracy": 9.68, "bfcl/bfcl.relevance.relevance_detection_accuracy": 56.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 73.71, "bfcl/bfcl.format_sensitivity.max_delta": 68.0, "bfcl/bfcl.format_sensitivity.stddev": 16.55 } }, { "id": "openbmb/UltraRM-13b", "name": "openbmb/UltraRM-13b", "developer": "openbmb", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.4683, "reward-bench/Chat": 0.9637, "reward-bench/Chat Hard": 0.5548, "reward-bench/Safety": 0.5089, "reward-bench/Reasoning": 0.6244, "reward-bench/Prior Sets (0.5 weight)": 0.7294, "reward-bench/Factuality": 0.5063, "reward-bench/Precise IF": 0.3312, "reward-bench/Math": 0.5519, "reward-bench/Focus": 0.6081, "reward-bench/Ties": 0.3036 } }, { "id": "OpenBuddy/openbuddy-falcon3-10b-v24.2-131k", "name": "openbuddy-falcon3-10b-v24.2-131k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5086, "hfopenllm_v2/BBH": 0.6004, "hfopenllm_v2/MATH Level 5": 0.213, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4186, "hfopenllm_v2/MMLU-PRO": 0.3834 } }, { "id": "OpenBuddy/openbuddy-llama3-70b-v21.2-32k", "name": "openbuddy-llama3-70b-v21.2-32k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.701, "hfopenllm_v2/BBH": 0.6507, "hfopenllm_v2/MATH Level 5": 0.2032, "hfopenllm_v2/GPQA": 0.3423, "hfopenllm_v2/MUSR": 0.458, "hfopenllm_v2/MMLU-PRO": 0.4832 } }, { "id": "OpenBuddy/openbuddy-llama3-8b-v21.1-8k", "name": "openbuddy-llama3-8b-v21.1-8k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.557, "hfopenllm_v2/BBH": 0.4788, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3988, "hfopenllm_v2/MMLU-PRO": 0.2955 } }, { "id": "OpenBuddy/openbuddy-llama3-8b-v21.2-32k", "name": "openbuddy-llama3-8b-v21.2-32k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6192, "hfopenllm_v2/BBH": 0.4856, "hfopenllm_v2/MATH Level 5": 0.0785, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3779, "hfopenllm_v2/MMLU-PRO": 0.3299 } }, { "id": "OpenBuddy/openbuddy-llama3.1-70b-v22.1-131k", "name": "openbuddy-llama3.1-70b-v22.1-131k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7333, "hfopenllm_v2/BBH": 0.6698, "hfopenllm_v2/MATH Level 5": 0.395, "hfopenllm_v2/GPQA": 0.375, "hfopenllm_v2/MUSR": 0.463, "hfopenllm_v2/MMLU-PRO": 0.5304 } }, { "id": "OpenBuddy/openbuddy-llama3.1-8b-v22.2-131k", "name": "openbuddy-llama3.1-8b-v22.2-131k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6657, "hfopenllm_v2/BBH": 0.5007, "hfopenllm_v2/MATH Level 5": 0.1148, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.4081, "hfopenllm_v2/MMLU-PRO": 0.331 } }, { "id": "OpenBuddy/openbuddy-llama3.1-8b-v22.3-131k", "name": "openbuddy-llama3.1-8b-v22.3-131k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5997, "hfopenllm_v2/BBH": 0.5066, "hfopenllm_v2/MATH Level 5": 0.1208, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.4015, "hfopenllm_v2/MMLU-PRO": 0.3277 } }, { "id": "OpenBuddy/openbuddy-llama3.2-1b-v23.1-131k", "name": "openbuddy-llama3.2-1b-v23.1-131k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.359, "hfopenllm_v2/BBH": 0.3267, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3342, "hfopenllm_v2/MMLU-PRO": 0.184 } }, { "id": "OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k", "name": "openbuddy-llama3.2-3b-v23.2-131k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4319, "hfopenllm_v2/BBH": 0.4073, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3263, "hfopenllm_v2/MMLU-PRO": 0.2479 } }, { "id": "OpenBuddy/openbuddy-llama3.3-70b-v24.1-131k", "name": "openbuddy-llama3.3-70b-v24.1-131k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8121, "hfopenllm_v2/BBH": 0.6858, "hfopenllm_v2/MATH Level 5": 0.4411, "hfopenllm_v2/GPQA": 0.4346, "hfopenllm_v2/MUSR": 0.4869, "hfopenllm_v2/MMLU-PRO": 0.5327 } }, { "id": "OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k", "name": "openbuddy-mixtral-7bx8-v18.1-32k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5493, "hfopenllm_v2/BBH": 0.4656, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.3831, "hfopenllm_v2/MMLU-PRO": 0.3804 } }, { "id": "OpenBuddy/openbuddy-nemotron-70b-v23.1-131k", "name": "openbuddy-nemotron-70b-v23.1-131k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7555, "hfopenllm_v2/BBH": 0.6749, "hfopenllm_v2/MATH Level 5": 0.321, "hfopenllm_v2/GPQA": 0.3633, "hfopenllm_v2/MUSR": 0.4538, "hfopenllm_v2/MMLU-PRO": 0.5175 } }, { "id": "OpenBuddy/openbuddy-nemotron-70b-v23.2-131k", "name": "openbuddy-nemotron-70b-v23.2-131k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7227, "hfopenllm_v2/BBH": 0.6705, "hfopenllm_v2/MATH Level 5": 0.3157, "hfopenllm_v2/GPQA": 0.3599, "hfopenllm_v2/MUSR": 0.4696, "hfopenllm_v2/MMLU-PRO": 0.5121 } }, { "id": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k", "name": "openbuddy-qwen2.5llamaify-14b-v23.1-200k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6309, "hfopenllm_v2/BBH": 0.6013, "hfopenllm_v2/MATH Level 5": 0.2538, "hfopenllm_v2/GPQA": 0.3331, "hfopenllm_v2/MUSR": 0.424, "hfopenllm_v2/MMLU-PRO": 0.4673 } }, { "id": "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k", "name": "openbuddy-qwen2.5llamaify-14b-v23.3-200k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6131, "hfopenllm_v2/BBH": 0.6081, "hfopenllm_v2/MATH Level 5": 0.2311, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.4346, "hfopenllm_v2/MMLU-PRO": 0.4795 } }, { "id": "OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k", "name": "openbuddy-qwen2.5llamaify-7b-v23.1-200k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5673, "hfopenllm_v2/BBH": 0.5509, "hfopenllm_v2/MATH Level 5": 0.1888, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4363, "hfopenllm_v2/MMLU-PRO": 0.3948 } }, { "id": "OpenBuddy/openbuddy-qwq-32b-v24.1-200k", "name": "openbuddy-qwq-32b-v24.1-200k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5937, "hfopenllm_v2/BBH": 0.6798, "hfopenllm_v2/MATH Level 5": 0.3739, "hfopenllm_v2/GPQA": 0.3809, "hfopenllm_v2/MUSR": 0.4849, "hfopenllm_v2/MMLU-PRO": 0.549 } }, { "id": "OpenBuddy/openbuddy-qwq-32b-v24.2-200k", "name": "openbuddy-qwq-32b-v24.2-200k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.597, "hfopenllm_v2/BBH": 0.6772, "hfopenllm_v2/MATH Level 5": 0.3776, "hfopenllm_v2/GPQA": 0.3767, "hfopenllm_v2/MUSR": 0.4718, "hfopenllm_v2/MMLU-PRO": 0.5446 } }, { "id": "OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k", "name": "openbuddy-yi1.5-34b-v21.3-32k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.542, "hfopenllm_v2/BBH": 0.6163, "hfopenllm_v2/MATH Level 5": 0.1782, "hfopenllm_v2/GPQA": 0.349, "hfopenllm_v2/MUSR": 0.4439, "hfopenllm_v2/MMLU-PRO": 0.4599 } }, { "id": "OpenBuddy/openbuddy-zero-14b-v22.3-32k", "name": "openbuddy-zero-14b-v22.3-32k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3753, "hfopenllm_v2/BBH": 0.486, "hfopenllm_v2/MATH Level 5": 0.0937, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4166, "hfopenllm_v2/MMLU-PRO": 0.3187 } }, { "id": "OpenBuddy/openbuddy-zero-3b-v21.2-32k", "name": "openbuddy-zero-3b-v21.2-32k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3802, "hfopenllm_v2/BBH": 0.3935, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3566, "hfopenllm_v2/MMLU-PRO": 0.2034 } }, { "id": "OpenBuddy/openbuddy-zero-56b-v21.2-32k", "name": "openbuddy-zero-56b-v21.2-32k", "developer": "OpenBuddy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5057, "hfopenllm_v2/BBH": 0.6128, "hfopenllm_v2/MATH Level 5": 0.1624, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.4305, "hfopenllm_v2/MMLU-PRO": 0.4399 } }, { "id": "openchat/openchat-3.5-0106", "name": "openchat-3.5-0106", "developer": "openchat", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5967, "hfopenllm_v2/BBH": 0.4617, "hfopenllm_v2/MATH Level 5": 0.0763, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4254, "hfopenllm_v2/MMLU-PRO": 0.3291 } }, { "id": "openchat/openchat-3.5-1210", "name": "openchat-3.5-1210", "developer": "openchat", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6037, "hfopenllm_v2/BBH": 0.4535, "hfopenllm_v2/MATH Level 5": 0.0785, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4414, "hfopenllm_v2/MMLU-PRO": 0.3142 } }, { "id": "openchat/openchat-3.6-8b-20240522", "name": "openchat-3.6-8b-20240522", "developer": "openchat", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5343, "hfopenllm_v2/BBH": 0.5338, "hfopenllm_v2/MATH Level 5": 0.0997, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.3999, "hfopenllm_v2/MMLU-PRO": 0.3229 } }, { "id": "openchat/openchat_3.5", "name": "openchat_3.5", "developer": "openchat", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5931, "hfopenllm_v2/BBH": 0.4426, "hfopenllm_v2/MATH Level 5": 0.0725, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4229, "hfopenllm_v2/MMLU-PRO": 0.3153 } }, { "id": "openchat/openchat_v3.2", "name": "openchat_v3.2", "developer": "openchat", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2981, "hfopenllm_v2/BBH": 0.4331, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.4336, "hfopenllm_v2/MMLU-PRO": 0.2422 } }, { "id": "openchat/openchat_v3.2_super", "name": "openchat_v3.2_super", "developer": "openchat", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2862, "hfopenllm_v2/BBH": 0.4221, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.4161, "hfopenllm_v2/MMLU-PRO": 0.2425 } }, { "id": "opencompass/CompassJudger-1-1.5B-Instruct", "name": "opencompass/CompassJudger-1-1.5B-Instruct", "developer": "opencompass", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7344, "reward-bench/Chat": 0.9637, "reward-bench/Chat Hard": 0.4923, "reward-bench/Safety": 0.7818, "reward-bench/Reasoning": 0.6999 } }, { "id": "opencompass/CompassJudger-1-14B-Instruct", "name": "opencompass/CompassJudger-1-14B-Instruct", "developer": "opencompass", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8409, "reward-bench/Chat": 0.9749, "reward-bench/Chat Hard": 0.6228, "reward-bench/Safety": 0.8392, "reward-bench/Reasoning": 0.9268 } }, { "id": "opencompass/CompassJudger-1-32B-Instruct", "name": "opencompass/CompassJudger-1-32B-Instruct", "developer": "opencompass", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8522, "reward-bench/Chat": 0.9804, "reward-bench/Chat Hard": 0.6513, "reward-bench/Safety": 0.8527, "reward-bench/Reasoning": 0.9244 } }, { "id": "opencompass/CompassJudger-1-7B-Instruct", "name": "opencompass/CompassJudger-1-7B-Instruct", "developer": "opencompass", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8317, "reward-bench/Chat": 0.9777, "reward-bench/Chat Hard": 0.6096, "reward-bench/Safety": 0.8446, "reward-bench/Reasoning": 0.8948 } }, { "id": "OpenGenerativeAI/Bifrost", "name": "Bifrost", "developer": "OpenGenerativeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6348, "hfopenllm_v2/BBH": 0.6849, "hfopenllm_v2/MATH Level 5": 0.2545, "hfopenllm_v2/GPQA": 0.3683, "hfopenllm_v2/MUSR": 0.4598, "hfopenllm_v2/MMLU-PRO": 0.516 } }, { "id": "OpenGenerativeAI/Bifrost-14B", "name": "Bifrost-14B", "developer": "OpenGenerativeAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6615, "hfopenllm_v2/BBH": 0.6845, "hfopenllm_v2/MATH Level 5": 0.2356, "hfopenllm_v2/GPQA": 0.3792, "hfopenllm_v2/MUSR": 0.4624, "hfopenllm_v2/MMLU-PRO": 0.5074 } }, { "id": "OpenLeecher/llama3-8b-lima", "name": "llama3-8b-lima", "developer": "OpenLeecher", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4371, "hfopenllm_v2/BBH": 0.4296, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.2383, "hfopenllm_v2/MUSR": 0.3713, "hfopenllm_v2/MMLU-PRO": 0.2626 } }, { "id": "OpenLLM-France/Lucie-7B", "name": "Lucie-7B", "developer": "OpenLLM-France", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2496, "hfopenllm_v2/BBH": 0.3492, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3923, "hfopenllm_v2/MMLU-PRO": 0.1498 } }, { "id": "OpenLLM-France/Lucie-7B-Instruct", "name": "Lucie-7B-Instruct", "developer": "OpenLLM-France", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2796, "hfopenllm_v2/BBH": 0.3254, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3662, "hfopenllm_v2/MMLU-PRO": 0.1556 } }, { "id": "OpenLLM-France/Lucie-7B-Instruct-human-data", "name": "Lucie-7B-Instruct-human-data", "developer": "OpenLLM-France", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2946, "hfopenllm_v2/BBH": 0.3284, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3729, "hfopenllm_v2/MMLU-PRO": 0.143 } }, { "id": "OpenLLM-France/Lucie-7B-Instruct-v1.1", "name": "Lucie-7B-Instruct-v1.1", "developer": "OpenLLM-France", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3039, "hfopenllm_v2/BBH": 0.3816, "hfopenllm_v2/MATH Level 5": 0.0317, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.375, "hfopenllm_v2/MMLU-PRO": 0.1864 } }, { "id": "OpenScholar/Llama-3.1_OpenScholar-8B", "name": "Llama-3.1_OpenScholar-8B", "developer": "OpenScholar", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6064, "hfopenllm_v2/BBH": 0.5208, "hfopenllm_v2/MATH Level 5": 0.1654, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4275, "hfopenllm_v2/MMLU-PRO": 0.3708 } }, { "id": "orai-nlp/Llama-eus-8B", "name": "Llama-eus-8B", "developer": "orai-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2161, "hfopenllm_v2/BBH": 0.4418, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.3919, "hfopenllm_v2/MMLU-PRO": 0.3058 } }, { "id": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored", "name": "Llama-3.1-8B-Lexi-Uncensored", "developer": "Orenguteng", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7777, "hfopenllm_v2/BBH": 0.5057, "hfopenllm_v2/MATH Level 5": 0.1571, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3871, "hfopenllm_v2/MMLU-PRO": 0.379 } }, { "id": "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2", "name": "Llama-3.1-8B-Lexi-Uncensored-V2", "developer": "Orenguteng", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7792, "hfopenllm_v2/BBH": 0.5084, "hfopenllm_v2/MATH Level 5": 0.1971, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.3843, "hfopenllm_v2/MMLU-PRO": 0.3781 } }, { "id": "Orion-zhen/phi-4-abliterated", "name": "phi-4-abliterated", "developer": "Orion-zhen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0576, "hfopenllm_v2/BBH": 0.6698, "hfopenllm_v2/MATH Level 5": 0.3021, "hfopenllm_v2/GPQA": 0.4044, "hfopenllm_v2/MUSR": 0.5006, "hfopenllm_v2/MMLU-PRO": 0.5292 } }, { "id": "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored", "name": "Qwen2.5-7B-Instruct-Uncensored", "developer": "Orion-zhen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7204, "hfopenllm_v2/BBH": 0.5474, "hfopenllm_v2/MATH Level 5": 0.4773, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4361, "hfopenllm_v2/MMLU-PRO": 0.4427 } }, { "id": "oxyapi/oxy-1-small", "name": "oxy-1-small", "developer": "oxyapi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6245, "hfopenllm_v2/BBH": 0.5885, "hfopenllm_v2/MATH Level 5": 0.3603, "hfopenllm_v2/GPQA": 0.3716, "hfopenllm_v2/MUSR": 0.4487, "hfopenllm_v2/MMLU-PRO": 0.5001 } }, { "id": "ozone-ai/0x-lite", "name": "0x-lite", "developer": "ozone-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.774, "hfopenllm_v2/BBH": 0.6341, "hfopenllm_v2/MATH Level 5": 0.5045, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4221, "hfopenllm_v2/MMLU-PRO": 0.5184 } }, { "id": "ozone-research/Chirp-01", "name": "Chirp-01", "developer": "ozone-research", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6348, "hfopenllm_v2/BBH": 0.465, "hfopenllm_v2/MATH Level 5": 0.3467, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.4487, "hfopenllm_v2/MMLU-PRO": 0.3508 } }, { "id": "P0x0/Astra-v1-12B", "name": "Astra-v1-12B", "developer": "P0x0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2806, "hfopenllm_v2/BBH": 0.5215, "hfopenllm_v2/MATH Level 5": 0.1133, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4052, "hfopenllm_v2/MMLU-PRO": 0.3461 } }, { "id": "paloalma/ECE-TW3-JRGL-V1", "name": "ECE-TW3-JRGL-V1", "developer": "paloalma", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5535, "hfopenllm_v2/BBH": 0.6284, "hfopenllm_v2/MATH Level 5": 0.1314, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4621, "hfopenllm_v2/MMLU-PRO": 0.4221 } }, { "id": "paloalma/ECE-TW3-JRGL-V2", "name": "ECE-TW3-JRGL-V2", "developer": "paloalma", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2255, "hfopenllm_v2/BBH": 0.6031, "hfopenllm_v2/MATH Level 5": 0.185, "hfopenllm_v2/GPQA": 0.3314, "hfopenllm_v2/MUSR": 0.4793, "hfopenllm_v2/MMLU-PRO": 0.4588 } }, { "id": "paloalma/ECE-TW3-JRGL-V5", "name": "ECE-TW3-JRGL-V5", "developer": "paloalma", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4553, "hfopenllm_v2/BBH": 0.6025, "hfopenllm_v2/MATH Level 5": 0.1835, "hfopenllm_v2/GPQA": 0.3414, "hfopenllm_v2/MUSR": 0.4621, "hfopenllm_v2/MMLU-PRO": 0.4648 } }, { "id": "paloalma/Le_Triomphant-ECE-TW3", "name": "Le_Triomphant-ECE-TW3", "developer": "paloalma", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5402, "hfopenllm_v2/BBH": 0.6112, "hfopenllm_v2/MATH Level 5": 0.1949, "hfopenllm_v2/GPQA": 0.349, "hfopenllm_v2/MUSR": 0.4725, "hfopenllm_v2/MMLU-PRO": 0.4763 } }, { "id": "paloalma/TW3-JRGL-v2", "name": "TW3-JRGL-v2", "developer": "paloalma", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5316, "hfopenllm_v2/BBH": 0.6138, "hfopenllm_v2/MATH Level 5": 0.179, "hfopenllm_v2/GPQA": 0.3591, "hfopenllm_v2/MUSR": 0.4858, "hfopenllm_v2/MMLU-PRO": 0.4858 } }, { "id": "pankajmathur/Al_Dente_v1_8b", "name": "Al_Dente_v1_8b", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3694, "hfopenllm_v2/BBH": 0.4835, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.3987, "hfopenllm_v2/MMLU-PRO": 0.286 } }, { "id": "pankajmathur/model_007_13b_v2", "name": "model_007_13b_v2", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3056, "hfopenllm_v2/BBH": 0.4702, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4611, "hfopenllm_v2/MMLU-PRO": 0.2461 } }, { "id": "pankajmathur/orca_mini_3b", "name": "orca_mini_3b", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0742, "hfopenllm_v2/BBH": 0.3196, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.3349, "hfopenllm_v2/MMLU-PRO": 0.1145 } }, { "id": "pankajmathur/orca_mini_7b", "name": "orca_mini_7b", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0412, "hfopenllm_v2/BBH": 0.3332, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3698, "hfopenllm_v2/MMLU-PRO": 0.1246 } }, { "id": "pankajmathur/orca_mini_phi-4", "name": "orca_mini_phi-4", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7781, "hfopenllm_v2/BBH": 0.6856, "hfopenllm_v2/MATH Level 5": 0.2953, "hfopenllm_v2/GPQA": 0.3742, "hfopenllm_v2/MUSR": 0.4703, "hfopenllm_v2/MMLU-PRO": 0.5255 } }, { "id": "pankajmathur/orca_mini_v2_7b", "name": "orca_mini_v2_7b", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1358, "hfopenllm_v2/BBH": 0.3536, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.3593, "hfopenllm_v2/MMLU-PRO": 0.1542 } }, { "id": "pankajmathur/orca_mini_v3_13b", "name": "orca_mini_v3_13b", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2897, "hfopenllm_v2/BBH": 0.4711, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.4598, "hfopenllm_v2/MMLU-PRO": 0.2305 } }, { "id": "pankajmathur/orca_mini_v3_70b", "name": "orca_mini_v3_70b", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4015, "hfopenllm_v2/BBH": 0.5949, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.5079, "hfopenllm_v2/MMLU-PRO": 0.3757 } }, { "id": "pankajmathur/orca_mini_v3_7b", "name": "orca_mini_v3_7b", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2821, "hfopenllm_v2/BBH": 0.4095, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2466, "hfopenllm_v2/MUSR": 0.4982, "hfopenllm_v2/MMLU-PRO": 0.2084 } }, { "id": "pankajmathur/orca_mini_v5_8b", "name": "orca_mini_v5_8b", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4806, "hfopenllm_v2/BBH": 0.5064, "hfopenllm_v2/MATH Level 5": 0.0989, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4, "hfopenllm_v2/MMLU-PRO": 0.3076 } }, { "id": "pankajmathur/orca_mini_v5_8b_dpo", "name": "orca_mini_v5_8b_dpo", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4896, "hfopenllm_v2/BBH": 0.5075, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3894, "hfopenllm_v2/MMLU-PRO": 0.3116 } }, { "id": "pankajmathur/orca_mini_v5_8b_orpo", "name": "orca_mini_v5_8b_orpo", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0824, "hfopenllm_v2/BBH": 0.4964, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4131, "hfopenllm_v2/MMLU-PRO": 0.2947 } }, { "id": "pankajmathur/orca_mini_v6_8b", "name": "orca_mini_v6_8b", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0111, "hfopenllm_v2/BBH": 0.3029, "hfopenllm_v2/MATH Level 5": 0.0038, "hfopenllm_v2/GPQA": 0.2383, "hfopenllm_v2/MUSR": 0.3555, "hfopenllm_v2/MMLU-PRO": 0.1125 } }, { "id": "pankajmathur/orca_mini_v6_8b_dpo", "name": "orca_mini_v6_8b_dpo", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3883, "hfopenllm_v2/BBH": 0.5203, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.409, "hfopenllm_v2/MMLU-PRO": 0.3596 } }, { "id": "pankajmathur/orca_mini_v7_72b", "name": "orca_mini_v7_72b", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.593, "hfopenllm_v2/BBH": 0.6842, "hfopenllm_v2/MATH Level 5": 0.0937, "hfopenllm_v2/GPQA": 0.3851, "hfopenllm_v2/MUSR": 0.507, "hfopenllm_v2/MMLU-PRO": 0.5622 } }, { "id": "pankajmathur/orca_mini_v7_7b", "name": "orca_mini_v7_7b", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4388, "hfopenllm_v2/BBH": 0.5275, "hfopenllm_v2/MATH Level 5": 0.1208, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.436, "hfopenllm_v2/MMLU-PRO": 0.4167 } }, { "id": "pankajmathur/orca_mini_v8_1_70b", "name": "orca_mini_v8_1_70b", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8571, "hfopenllm_v2/BBH": 0.6781, "hfopenllm_v2/MATH Level 5": 0.3527, "hfopenllm_v2/GPQA": 0.4329, "hfopenllm_v2/MUSR": 0.4437, "hfopenllm_v2/MMLU-PRO": 0.4983 } }, { "id": "pankajmathur/orca_mini_v9_0_3B-Instruct", "name": "orca_mini_v9_0_3B-Instruct", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5754, "hfopenllm_v2/BBH": 0.4413, "hfopenllm_v2/MATH Level 5": 0.1465, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.3659, "hfopenllm_v2/MMLU-PRO": 0.2603 } }, { "id": "pankajmathur/orca_mini_v9_1_1B-Instruct", "name": "orca_mini_v9_1_1B-Instruct", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3629, "hfopenllm_v2/BBH": 0.3205, "hfopenllm_v2/MATH Level 5": 0.0461, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3381, "hfopenllm_v2/MMLU-PRO": 0.1374 } }, { "id": "pankajmathur/orca_mini_v9_2_14B", "name": "orca_mini_v9_2_14B", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7781, "hfopenllm_v2/BBH": 0.6856, "hfopenllm_v2/MATH Level 5": 0.2953, "hfopenllm_v2/GPQA": 0.3742, "hfopenllm_v2/MUSR": 0.4703, "hfopenllm_v2/MMLU-PRO": 0.5255 } }, { "id": "pankajmathur/orca_mini_v9_2_70b", "name": "orca_mini_v9_2_70b", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8383, "hfopenllm_v2/BBH": 0.6745, "hfopenllm_v2/MATH Level 5": 0.2938, "hfopenllm_v2/GPQA": 0.3733, "hfopenllm_v2/MUSR": 0.471, "hfopenllm_v2/MMLU-PRO": 0.4821 } }, { "id": "pankajmathur/orca_mini_v9_4_70B", "name": "orca_mini_v9_4_70B", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8015, "hfopenllm_v2/BBH": 0.6419, "hfopenllm_v2/MATH Level 5": 0.3263, "hfopenllm_v2/GPQA": 0.3658, "hfopenllm_v2/MUSR": 0.4647, "hfopenllm_v2/MMLU-PRO": 0.4536 } }, { "id": "pankajmathur/orca_mini_v9_5_1B-Instruct", "name": "orca_mini_v9_5_1B-Instruct", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4638, "hfopenllm_v2/BBH": 0.3337, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3182, "hfopenllm_v2/MMLU-PRO": 0.137 } }, { "id": "pankajmathur/orca_mini_v9_5_1B-Instruct_preview", "name": "orca_mini_v9_5_1B-Instruct_preview", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3936, "hfopenllm_v2/BBH": 0.3277, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3395, "hfopenllm_v2/MMLU-PRO": 0.1327 } }, { "id": "pankajmathur/orca_mini_v9_5_3B-Instruct", "name": "orca_mini_v9_5_3B-Instruct", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7207, "hfopenllm_v2/BBH": 0.4496, "hfopenllm_v2/MATH Level 5": 0.1322, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.427, "hfopenllm_v2/MMLU-PRO": 0.2882 } }, { "id": "pankajmathur/orca_mini_v9_6_1B-Instruct", "name": "orca_mini_v9_6_1B-Instruct", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6086, "hfopenllm_v2/BBH": 0.3561, "hfopenllm_v2/MATH Level 5": 0.077, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3396, "hfopenllm_v2/MMLU-PRO": 0.1809 } }, { "id": "pankajmathur/orca_mini_v9_6_3B-Instruct", "name": "orca_mini_v9_6_3B-Instruct", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7316, "hfopenllm_v2/BBH": 0.4568, "hfopenllm_v2/MATH Level 5": 0.1329, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4068, "hfopenllm_v2/MMLU-PRO": 0.2851 } }, { "id": "pankajmathur/orca_mini_v9_7_1B-Instruct", "name": "orca_mini_v9_7_1B-Instruct", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.561, "hfopenllm_v2/BBH": 0.3182, "hfopenllm_v2/MATH Level 5": 0.0446, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3527, "hfopenllm_v2/MMLU-PRO": 0.1345 } }, { "id": "pankajmathur/orca_mini_v9_7_3B-Instruct", "name": "orca_mini_v9_7_3B-Instruct", "developer": "pankajmathur", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5618, "hfopenllm_v2/BBH": 0.3297, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3619, "hfopenllm_v2/MMLU-PRO": 0.1375 } }, { "id": "Parissa3/test-model", "name": "test-model", "developer": "Parissa3", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3883, "hfopenllm_v2/BBH": 0.5194, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4685, "hfopenllm_v2/MMLU-PRO": 0.3057 } }, { "id": "paulml/ECE-ILAB-Q1", "name": "ECE-ILAB-Q1", "developer": "paulml", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7865, "hfopenllm_v2/BBH": 0.6718, "hfopenllm_v2/MATH Level 5": 0.3557, "hfopenllm_v2/GPQA": 0.3867, "hfopenllm_v2/MUSR": 0.4614, "hfopenllm_v2/MMLU-PRO": 0.5505 } }, { "id": "phronetic-ai/rzn-t-prompt", "name": "RZN-T (Prompt)", "developer": "phronetic-ai", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 96.0, "bfcl/bfcl.overall.overall_accuracy": 22.25, "bfcl/bfcl.overall.total_cost_usd": 12.31, "bfcl/bfcl.overall.latency_mean_s": 12.32, "bfcl/bfcl.overall.latency_std_s": 27.53, "bfcl/bfcl.overall.latency_p95_s": 39.84, "bfcl/bfcl.non_live.ast_accuracy": 67.94, "bfcl/bfcl.non_live.simple_ast_accuracy": 63.75, "bfcl/bfcl.non_live.multiple_ast_accuracy": 75.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 69.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 63.5, "bfcl/bfcl.live.live_accuracy": 49.74, "bfcl/bfcl.live.live_simple_ast_accuracy": 61.24, "bfcl/bfcl.live.live_multiple_ast_accuracy": 47.2, "bfcl/bfcl.live.live_parallel_ast_accuracy": 43.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 41.67, "bfcl/bfcl.multi_turn.accuracy": 2.88, "bfcl/bfcl.multi_turn.base_accuracy": 4.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 2.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 2.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 2.5, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 6.88, "bfcl/bfcl.memory.kv_accuracy": 9.03, "bfcl/bfcl.memory.vector_accuracy": 6.45, "bfcl/bfcl.memory.recursive_summarization_accuracy": 5.16, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 82.41, "bfcl/bfcl.format_sensitivity.max_delta": 63.5, "bfcl/bfcl.format_sensitivity.stddev": 25.53 } }, { "id": "Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B", "name": "PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B", "developer": "Pinkstack", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5085, "hfopenllm_v2/BBH": 0.4711, "hfopenllm_v2/MATH Level 5": 0.1692, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4479, "hfopenllm_v2/MMLU-PRO": 0.3511 } }, { "id": "Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ", "name": "SuperThoughts-CoT-14B-16k-o1-QwQ", "developer": "Pinkstack", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0515, "hfopenllm_v2/BBH": 0.672, "hfopenllm_v2/MATH Level 5": 0.4199, "hfopenllm_v2/GPQA": 0.3926, "hfopenllm_v2/MUSR": 0.4914, "hfopenllm_v2/MMLU-PRO": 0.5268 } }, { "id": "Pinkstack/Superthoughts-lite-1.8B-experimental-o1", "name": "Superthoughts-lite-1.8B-experimental-o1", "developer": "Pinkstack", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0375, "hfopenllm_v2/BBH": 0.3435, "hfopenllm_v2/MATH Level 5": 0.0317, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3354, "hfopenllm_v2/MMLU-PRO": 0.1851 } }, { "id": "Pinkstack/Superthoughts-lite-v1", "name": "Superthoughts-lite-v1", "developer": "Pinkstack", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1659, "hfopenllm_v2/BBH": 0.3466, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3672, "hfopenllm_v2/MMLU-PRO": 0.1755 } }, { "id": "pints-ai/1.5-Pints-16K-v0.1", "name": "1.5-Pints-16K-v0.1", "developer": "pints-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1636, "hfopenllm_v2/BBH": 0.3133, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2357, "hfopenllm_v2/MUSR": 0.3579, "hfopenllm_v2/MMLU-PRO": 0.1119 } }, { "id": "pints-ai/1.5-Pints-2K-v0.1", "name": "1.5-Pints-2K-v0.1", "developer": "pints-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1762, "hfopenllm_v2/BBH": 0.298, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.3502, "hfopenllm_v2/MMLU-PRO": 0.1104 } }, { "id": "piotr25691/thea-3b-25r", "name": "thea-3b-25r", "developer": "piotr25691", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7344, "hfopenllm_v2/BBH": 0.4484, "hfopenllm_v2/MATH Level 5": 0.1782, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3315, "hfopenllm_v2/MMLU-PRO": 0.3182 } }, { "id": "piotr25691/thea-c-3b-25r", "name": "thea-c-3b-25r", "developer": "piotr25691", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7402, "hfopenllm_v2/BBH": 0.4532, "hfopenllm_v2/MATH Level 5": 0.1526, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3315, "hfopenllm_v2/MMLU-PRO": 0.3178 } }, { "id": "piotr25691/thea-rp-3b-25r", "name": "thea-rp-3b-25r", "developer": "piotr25691", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6578, "hfopenllm_v2/BBH": 0.439, "hfopenllm_v2/MATH Level 5": 0.1322, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3819, "hfopenllm_v2/MMLU-PRO": 0.306 } }, { "id": "PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B", "name": "L3.2-Instruct-Thinking-v0.1-1B", "developer": "PJMixers-Dev", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4628, "hfopenllm_v2/BBH": 0.3302, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3262, "hfopenllm_v2/MMLU-PRO": 0.1483 } }, { "id": "PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B", "name": "LLaMa-3.1-Instruct-Interleaved-Zeroed-13B", "developer": "PJMixers-Dev", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7871, "hfopenllm_v2/BBH": 0.5073, "hfopenllm_v2/MATH Level 5": 0.2002, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.387, "hfopenllm_v2/MMLU-PRO": 0.3767 } }, { "id": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest-8B", "name": "LLaMa-3.1-RomboTiesTest-8B", "developer": "PJMixers-Dev", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7825, "hfopenllm_v2/BBH": 0.5073, "hfopenllm_v2/MATH Level 5": 0.2002, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.387, "hfopenllm_v2/MMLU-PRO": 0.3767 } }, { "id": "PJMixers-Dev/LLaMa-3.1-RomboTiesTest2-8B", "name": "LLaMa-3.1-RomboTiesTest2-8B", "developer": "PJMixers-Dev", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7825, "hfopenllm_v2/BBH": 0.5073, "hfopenllm_v2/MATH Level 5": 0.2002, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.387, "hfopenllm_v2/MMLU-PRO": 0.3767 } }, { "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B", "name": "LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B", "developer": "PJMixers-Dev", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6931, "hfopenllm_v2/BBH": 0.4556, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.37, "hfopenllm_v2/MMLU-PRO": 0.3127 } }, { "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B", "name": "LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B", "developer": "PJMixers-Dev", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6292, "hfopenllm_v2/BBH": 0.4581, "hfopenllm_v2/MATH Level 5": 0.1299, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3659, "hfopenllm_v2/MMLU-PRO": 0.3115 } }, { "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B", "name": "LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B", "developer": "PJMixers-Dev", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6504, "hfopenllm_v2/BBH": 0.4511, "hfopenllm_v2/MATH Level 5": 0.1261, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3687, "hfopenllm_v2/MMLU-PRO": 0.3108 } }, { "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B", "name": "LLaMa-3.2-Instruct-JankMixBread-v0.1-3B", "developer": "PJMixers-Dev", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5041, "hfopenllm_v2/BBH": 0.4483, "hfopenllm_v2/MATH Level 5": 0.1307, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.3516, "hfopenllm_v2/MMLU-PRO": 0.3083 } }, { "id": "PJMixers-Dev/Qwen2.5-RomboTiesTest-7B", "name": "Qwen2.5-RomboTiesTest-7B", "developer": "PJMixers-Dev", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7558, "hfopenllm_v2/BBH": 0.5399, "hfopenllm_v2/MATH Level 5": 0.4962, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4034, "hfopenllm_v2/MMLU-PRO": 0.4285 } }, { "id": "PJMixers/LLaMa-3-CursedStock-v2.0-8B", "name": "LLaMa-3-CursedStock-v2.0-8B", "developer": "PJMixers", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6331, "hfopenllm_v2/BBH": 0.5271, "hfopenllm_v2/MATH Level 5": 0.0944, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3856, "hfopenllm_v2/MMLU-PRO": 0.3556 } }, { "id": "PKU-Alignment/beaver-7b-v1.0-cost", "name": "PKU-Alignment/beaver-7b-v1.0-cost", "developer": "PKU-Alignment", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5798, "reward-bench/Factuality": 0.3263, "reward-bench/Precise IF": 0.2313, "reward-bench/Math": 0.3989, "reward-bench/Safety": 0.7351, "reward-bench/Focus": 0.2939, "reward-bench/Ties": -0.01, "reward-bench/Chat": 0.6173, "reward-bench/Chat Hard": 0.4232, "reward-bench/Reasoning": 0.5482, "reward-bench/Prior Sets (0.5 weight)": 0.57 } }, { "id": "PKU-Alignment/beaver-7b-v1.0-reward", "name": "PKU-Alignment/beaver-7b-v1.0-reward", "developer": "PKU-Alignment", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.4727, "reward-bench/Factuality": 0.2105, "reward-bench/Precise IF": 0.2938, "reward-bench/Math": 0.2623, "reward-bench/Safety": 0.3757, "reward-bench/Focus": 0.0646, "reward-bench/Ties": -0.01, "reward-bench/Chat": 0.8184, "reward-bench/Chat Hard": 0.2873, "reward-bench/Reasoning": 0.346, "reward-bench/Prior Sets (0.5 weight)": 0.5993 } }, { "id": "PKU-Alignment/beaver-7b-v2.0-cost", "name": "PKU-Alignment/beaver-7b-v2.0-cost", "developer": "PKU-Alignment", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5957, "reward-bench/Factuality": 0.3789, "reward-bench/Precise IF": 0.275, "reward-bench/Math": 0.3333, "reward-bench/Safety": 0.7608, "reward-bench/Focus": 0.2828, "reward-bench/Ties": -0.01, "reward-bench/Chat": 0.5726, "reward-bench/Chat Hard": 0.4561, "reward-bench/Reasoning": 0.6211, "reward-bench/Prior Sets (0.5 weight)": 0.5397 } }, { "id": "PKU-Alignment/beaver-7b-v2.0-reward", "name": "PKU-Alignment/beaver-7b-v2.0-reward", "developer": "PKU-Alignment", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6366, "reward-bench/Factuality": 0.2168, "reward-bench/Precise IF": 0.2562, "reward-bench/Math": 0.3825, "reward-bench/Safety": 0.6041, "reward-bench/Focus": 0.2606, "reward-bench/Ties": 0.0944, "reward-bench/Chat": 0.8994, "reward-bench/Chat Hard": 0.364, "reward-bench/Reasoning": 0.6887, "reward-bench/Prior Sets (0.5 weight)": 0.6171 } }, { "id": "PocketDoc/Dans-Instruct-CoreCurriculum-12b", "name": "Dans-Instruct-CoreCurriculum-12b", "developer": "PocketDoc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2191, "hfopenllm_v2/BBH": 0.3789, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4096, "hfopenllm_v2/MMLU-PRO": 0.1219 } }, { "id": "PocketDoc/Dans-PersonalityEngine-v1.0.0-8b", "name": "Dans-PersonalityEngine-v1.0.0-8b", "developer": "PocketDoc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4982, "hfopenllm_v2/BBH": 0.4733, "hfopenllm_v2/MATH Level 5": 0.0816, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.3542, "hfopenllm_v2/MMLU-PRO": 0.3065 } }, { "id": "PocketDoc/Dans-PersonalityEngine-V1.1.0-12b", "name": "Dans-PersonalityEngine-V1.1.0-12b", "developer": "PocketDoc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7075, "hfopenllm_v2/BBH": 0.5361, "hfopenllm_v2/MATH Level 5": 0.105, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4587, "hfopenllm_v2/MMLU-PRO": 0.3262 } }, { "id": "PocketDoc/Dans-PersonalityEngine-V1.2.0-24b", "name": "Dans-PersonalityEngine-V1.2.0-24b", "developer": "PocketDoc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7886, "hfopenllm_v2/BBH": 0.6421, "hfopenllm_v2/MATH Level 5": 0.2455, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.43, "hfopenllm_v2/MMLU-PRO": 0.5026 } }, { "id": "PocketDoc/Dans-SakuraKaze-V1.0.0-12b", "name": "Dans-SakuraKaze-V1.0.0-12b", "developer": "PocketDoc", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.652, "hfopenllm_v2/BBH": 0.5405, "hfopenllm_v2/MATH Level 5": 0.0929, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4745, "hfopenllm_v2/MMLU-PRO": 0.356 } }, { "id": "PoLL/gpt-3.5-turbo-0125_claude-3-sonnet-2024022...", "name": "PoLL/gpt-3.5-turbo-0125_claude-3-sonnet-2024022...", "developer": "PoLL", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7578, "reward-bench/Chat": 0.9525, "reward-bench/Chat Hard": 0.5406, "reward-bench/Safety": 0.8034, "reward-bench/Reasoning": 0.7346 } }, { "id": "postbot/gpt2-medium-emailgen", "name": "gpt2-medium-emailgen", "developer": "postbot", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1492, "hfopenllm_v2/BBH": 0.313, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3911, "hfopenllm_v2/MMLU-PRO": 0.1147 } }, { "id": "PowerInfer/SmallThinker-3B-Preview", "name": "SmallThinker-3B-Preview", "developer": "PowerInfer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.62, "hfopenllm_v2/BBH": 0.4495, "hfopenllm_v2/MATH Level 5": 0.2779, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3525, "hfopenllm_v2/MMLU-PRO": 0.3018 } }, { "id": "PranavHarshan/LaMistral-V4", "name": "LaMistral-V4", "developer": "PranavHarshan", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6239, "hfopenllm_v2/BBH": 0.5184, "hfopenllm_v2/MATH Level 5": 0.0687, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.3643, "hfopenllm_v2/MMLU-PRO": 0.3599 } }, { "id": "PranavHarshan/MedNarra-X1", "name": "MedNarra-X1", "developer": "PranavHarshan", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4338, "hfopenllm_v2/BBH": 0.4637, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.354, "hfopenllm_v2/MMLU-PRO": 0.3431 } }, { "id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended", "name": "OpenChat-3.5-0106_10.7B_48Layers-Appended", "developer": "Pretergeek", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5961, "hfopenllm_v2/BBH": 0.462, "hfopenllm_v2/MATH Level 5": 0.0793, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4254, "hfopenllm_v2/MMLU-PRO": 0.329 } }, { "id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved", "name": "OpenChat-3.5-0106_10.7B_48Layers-Interleaved", "developer": "Pretergeek", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5961, "hfopenllm_v2/BBH": 0.462, "hfopenllm_v2/MATH Level 5": 0.0778, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4254, "hfopenllm_v2/MMLU-PRO": 0.3299 } }, { "id": "Pretergeek/OpenChat-3.5-0106_32K-PoSE", "name": "OpenChat-3.5-0106_32K-PoSE", "developer": "Pretergeek", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3969, "hfopenllm_v2/BBH": 0.3471, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.4205, "hfopenllm_v2/MMLU-PRO": 0.2031 } }, { "id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended", "name": "OpenChat-3.5-0106_8.11B_36Layers-Appended", "developer": "Pretergeek", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5976, "hfopenllm_v2/BBH": 0.462, "hfopenllm_v2/MATH Level 5": 0.0793, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4254, "hfopenllm_v2/MMLU-PRO": 0.329 } }, { "id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved", "name": "OpenChat-3.5-0106_8.11B_36Layers-Interleaved", "developer": "Pretergeek", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5961, "hfopenllm_v2/BBH": 0.4621, "hfopenllm_v2/MATH Level 5": 0.0778, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4241, "hfopenllm_v2/MMLU-PRO": 0.3299 } }, { "id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended", "name": "OpenChat-3.5-0106_8.99B_40Layers-Appended", "developer": "Pretergeek", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5961, "hfopenllm_v2/BBH": 0.462, "hfopenllm_v2/MATH Level 5": 0.0793, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4254, "hfopenllm_v2/MMLU-PRO": 0.329 } }, { "id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved", "name": "OpenChat-3.5-0106_8.99B_40Layers-Interleaved", "developer": "Pretergeek", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5976, "hfopenllm_v2/BBH": 0.4621, "hfopenllm_v2/MATH Level 5": 0.0778, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4241, "hfopenllm_v2/MMLU-PRO": 0.3299 } }, { "id": "Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended", "name": "OpenChat-3.5-0106_9.86B_44Layers-Appended", "developer": "Pretergeek", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5961, "hfopenllm_v2/BBH": 0.462, "hfopenllm_v2/MATH Level 5": 0.0793, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4254, "hfopenllm_v2/MMLU-PRO": 0.329 } }, { "id": "Pretergeek/openchat-3.5-0106_Rebased_Mistral-7B-v0.2", "name": "openchat-3.5-0106_Rebased_Mistral-7B-v0.2", "developer": "Pretergeek", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3706, "hfopenllm_v2/BBH": 0.3627, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.484, "hfopenllm_v2/MMLU-PRO": 0.283 } }, { "id": "PrimeIntellect/INTELLECT-1", "name": "INTELLECT-1", "developer": "PrimeIntellect", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1757, "hfopenllm_v2/BBH": 0.274, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3753, "hfopenllm_v2/MMLU-PRO": 0.112 } }, { "id": "PrimeIntellect/INTELLECT-1-Instruct", "name": "INTELLECT-1-Instruct", "developer": "PrimeIntellect", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0, "hfopenllm_v2/BBH": 0.287, "hfopenllm_v2/MATH Level 5": 0.0227, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.3577, "hfopenllm_v2/MMLU-PRO": 0.1064 } }, { "id": "prince-canuma/Ministral-8B-Instruct-2410-HF", "name": "Ministral-8B-Instruct-2410-HF", "developer": "prince-canuma", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5912, "hfopenllm_v2/BBH": 0.4586, "hfopenllm_v2/MATH Level 5": 0.1918, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4138, "hfopenllm_v2/MMLU-PRO": 0.3298 } }, { "id": "princeton-nlp/gemma-2-9b-it-DPO", "name": "gemma-2-9b-it-DPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2769, "hfopenllm_v2/BBH": 0.5941, "hfopenllm_v2/MATH Level 5": 0.0831, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.382, "hfopenllm_v2/MMLU-PRO": 0.3723 } }, { "id": "princeton-nlp/gemma-2-9b-it-SimPO", "name": "gemma-2-9b-it-SimPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3207, "hfopenllm_v2/BBH": 0.5839, "hfopenllm_v2/MATH Level 5": 0.071, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.4123, "hfopenllm_v2/MMLU-PRO": 0.3975 } }, { "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Base", "name": "Llama-3-8B-ProLong-512k-Base", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5322, "hfopenllm_v2/BBH": 0.5033, "hfopenllm_v2/MATH Level 5": 0.0687, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.4223, "hfopenllm_v2/MMLU-PRO": 0.3329 } }, { "id": "princeton-nlp/Llama-3-8B-ProLong-512k-Instruct", "name": "Llama-3-8B-ProLong-512k-Instruct", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5508, "hfopenllm_v2/BBH": 0.5028, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4266, "hfopenllm_v2/MMLU-PRO": 0.3231 } }, { "id": "princeton-nlp/Llama-3-8B-ProLong-64k-Base", "name": "Llama-3-8B-ProLong-64k-Base", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5201, "hfopenllm_v2/BBH": 0.4927, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.4341, "hfopenllm_v2/MMLU-PRO": 0.3348 } }, { "id": "princeton-nlp/Llama-3-8B-ProLong-64k-Instruct", "name": "Llama-3-8B-ProLong-64k-Instruct", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5563, "hfopenllm_v2/BBH": 0.5083, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4397, "hfopenllm_v2/MMLU-PRO": 0.3275 } }, { "id": "princeton-nlp/Llama-3-Base-8B-SFT", "name": "Llama-3-Base-8B-SFT", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2796, "hfopenllm_v2/BBH": 0.4643, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4118, "hfopenllm_v2/MMLU-PRO": 0.3093 } }, { "id": "princeton-nlp/Llama-3-Base-8B-SFT-CPO", "name": "Llama-3-Base-8B-SFT-CPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3703, "hfopenllm_v2/BBH": 0.4595, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3609, "hfopenllm_v2/MMLU-PRO": 0.2976 } }, { "id": "princeton-nlp/Llama-3-Base-8B-SFT-DPO", "name": "Llama-3-Base-8B-SFT-DPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4111, "hfopenllm_v2/BBH": 0.4666, "hfopenllm_v2/MATH Level 5": 0.0415, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.3867, "hfopenllm_v2/MMLU-PRO": 0.3078 } }, { "id": "princeton-nlp/Llama-3-Base-8B-SFT-IPO", "name": "Llama-3-Base-8B-SFT-IPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4487, "hfopenllm_v2/BBH": 0.469, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.3919, "hfopenllm_v2/MMLU-PRO": 0.3115 } }, { "id": "princeton-nlp/Llama-3-Base-8B-SFT-KTO", "name": "Llama-3-Base-8B-SFT-KTO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4523, "hfopenllm_v2/BBH": 0.4693, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.3842, "hfopenllm_v2/MMLU-PRO": 0.3054 } }, { "id": "princeton-nlp/Llama-3-Base-8B-SFT-ORPO", "name": "Llama-3-Base-8B-SFT-ORPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4517, "hfopenllm_v2/BBH": 0.4734, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.3707, "hfopenllm_v2/MMLU-PRO": 0.3083 } }, { "id": "princeton-nlp/Llama-3-Base-8B-SFT-RDPO", "name": "Llama-3-Base-8B-SFT-RDPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.448, "hfopenllm_v2/BBH": 0.4662, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4027, "hfopenllm_v2/MMLU-PRO": 0.3014 } }, { "id": "princeton-nlp/Llama-3-Base-8B-SFT-RRHF", "name": "Llama-3-Base-8B-SFT-RRHF", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3357, "hfopenllm_v2/BBH": 0.452, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.3722, "hfopenllm_v2/MMLU-PRO": 0.2889 } }, { "id": "princeton-nlp/Llama-3-Base-8B-SFT-SimPO", "name": "Llama-3-Base-8B-SFT-SimPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4685, "hfopenllm_v2/BBH": 0.4741, "hfopenllm_v2/MATH Level 5": 0.0551, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4127, "hfopenllm_v2/MMLU-PRO": 0.3105 } }, { "id": "princeton-nlp/Llama-3-Base-8B-SFT-SLiC-HF", "name": "Llama-3-Base-8B-SFT-SLiC-HF", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.489, "hfopenllm_v2/BBH": 0.4704, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4091, "hfopenllm_v2/MMLU-PRO": 0.3063 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-CPO", "name": "Llama-3-Instruct-8B-CPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7293, "hfopenllm_v2/BBH": 0.4999, "hfopenllm_v2/MATH Level 5": 0.0989, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3514, "hfopenllm_v2/MMLU-PRO": 0.3652 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-CPO-v0.2", "name": "Llama-3-Instruct-8B-CPO-v0.2", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7506, "hfopenllm_v2/BBH": 0.5027, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3619, "hfopenllm_v2/MMLU-PRO": 0.3706 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-DPO", "name": "Llama-3-Instruct-8B-DPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6757, "hfopenllm_v2/BBH": 0.4991, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3738, "hfopenllm_v2/MMLU-PRO": 0.3665 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-DPO-v0.2", "name": "Llama-3-Instruct-8B-DPO-v0.2", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7208, "hfopenllm_v2/BBH": 0.5056, "hfopenllm_v2/MATH Level 5": 0.0899, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.3844, "hfopenllm_v2/MMLU-PRO": 0.3769 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-KTO", "name": "Llama-3-Instruct-8B-KTO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6864, "hfopenllm_v2/BBH": 0.4982, "hfopenllm_v2/MATH Level 5": 0.0725, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3698, "hfopenllm_v2/MMLU-PRO": 0.3599 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-KTO-v0.2", "name": "Llama-3-Instruct-8B-KTO-v0.2", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.729, "hfopenllm_v2/BBH": 0.508, "hfopenllm_v2/MATH Level 5": 0.0997, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3777, "hfopenllm_v2/MMLU-PRO": 0.3668 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-ORPO", "name": "Llama-3-Instruct-8B-ORPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7128, "hfopenllm_v2/BBH": 0.5001, "hfopenllm_v2/MATH Level 5": 0.0785, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3502, "hfopenllm_v2/MMLU-PRO": 0.3646 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-ORPO-v0.2", "name": "Llama-3-Instruct-8B-ORPO-v0.2", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7633, "hfopenllm_v2/BBH": 0.5078, "hfopenllm_v2/MATH Level 5": 0.102, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.378, "hfopenllm_v2/MMLU-PRO": 0.3731 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-RDPO", "name": "Llama-3-Instruct-8B-RDPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.666, "hfopenllm_v2/BBH": 0.5034, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.3752, "hfopenllm_v2/MMLU-PRO": 0.3607 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-RDPO-v0.2", "name": "Llama-3-Instruct-8B-RDPO-v0.2", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7077, "hfopenllm_v2/BBH": 0.5049, "hfopenllm_v2/MATH Level 5": 0.0869, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.3804, "hfopenllm_v2/MMLU-PRO": 0.3774 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-RRHF", "name": "Llama-3-Instruct-8B-RRHF", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7275, "hfopenllm_v2/BBH": 0.4911, "hfopenllm_v2/MATH Level 5": 0.0967, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3476, "hfopenllm_v2/MMLU-PRO": 0.3644 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-RRHF-v0.2", "name": "Llama-3-Instruct-8B-RRHF-v0.2", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7125, "hfopenllm_v2/BBH": 0.4984, "hfopenllm_v2/MATH Level 5": 0.0876, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3738, "hfopenllm_v2/MMLU-PRO": 0.3482 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-SimPO", "name": "Llama-3-Instruct-8B-SimPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6504, "hfopenllm_v2/BBH": 0.4845, "hfopenllm_v2/MATH Level 5": 0.0861, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.3948, "hfopenllm_v2/MMLU-PRO": 0.3489 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-SimPO-v0.2", "name": "Llama-3-Instruct-8B-SimPO-v0.2", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6809, "hfopenllm_v2/BBH": 0.5038, "hfopenllm_v2/MATH Level 5": 0.074, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.3988, "hfopenllm_v2/MMLU-PRO": 0.3622 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF", "name": "Llama-3-Instruct-8B-SLiC-HF", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.74, "hfopenllm_v2/BBH": 0.5029, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.3723, "hfopenllm_v2/MMLU-PRO": 0.3585 } }, { "id": "princeton-nlp/Llama-3-Instruct-8B-SLiC-HF-v0.2", "name": "Llama-3-Instruct-8B-SLiC-HF-v0.2", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.711, "hfopenllm_v2/BBH": 0.4984, "hfopenllm_v2/MATH Level 5": 0.0876, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3738, "hfopenllm_v2/MMLU-PRO": 0.3482 } }, { "id": "princeton-nlp/Mistral-7B-Base-SFT-CPO", "name": "Mistral-7B-Base-SFT-CPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4655, "hfopenllm_v2/BBH": 0.4382, "hfopenllm_v2/MATH Level 5": 0.0279, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4071, "hfopenllm_v2/MMLU-PRO": 0.2651 } }, { "id": "princeton-nlp/Mistral-7B-Base-SFT-DPO", "name": "Mistral-7B-Base-SFT-DPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4403, "hfopenllm_v2/BBH": 0.435, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.4122, "hfopenllm_v2/MMLU-PRO": 0.2645 } }, { "id": "princeton-nlp/Mistral-7B-Base-SFT-IPO", "name": "Mistral-7B-Base-SFT-IPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.483, "hfopenllm_v2/BBH": 0.4458, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3776, "hfopenllm_v2/MMLU-PRO": 0.2792 } }, { "id": "princeton-nlp/Mistral-7B-Base-SFT-KTO", "name": "Mistral-7B-Base-SFT-KTO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4785, "hfopenllm_v2/BBH": 0.4476, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4368, "hfopenllm_v2/MMLU-PRO": 0.2872 } }, { "id": "princeton-nlp/Mistral-7B-Base-SFT-RDPO", "name": "Mistral-7B-Base-SFT-RDPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4606, "hfopenllm_v2/BBH": 0.444, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3579, "hfopenllm_v2/MMLU-PRO": 0.2777 } }, { "id": "princeton-nlp/Mistral-7B-Base-SFT-RRHF", "name": "Mistral-7B-Base-SFT-RRHF", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4407, "hfopenllm_v2/BBH": 0.4281, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4187, "hfopenllm_v2/MMLU-PRO": 0.2398 } }, { "id": "princeton-nlp/Mistral-7B-Base-SFT-SimPO", "name": "Mistral-7B-Base-SFT-SimPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4701, "hfopenllm_v2/BBH": 0.4398, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3971, "hfopenllm_v2/MMLU-PRO": 0.2702 } }, { "id": "princeton-nlp/Mistral-7B-Base-SFT-SLiC-HF", "name": "Mistral-7B-Base-SFT-SLiC-HF", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5127, "hfopenllm_v2/BBH": 0.4422, "hfopenllm_v2/MATH Level 5": 0.0355, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4261, "hfopenllm_v2/MMLU-PRO": 0.2781 } }, { "id": "princeton-nlp/Mistral-7B-Instruct-CPO", "name": "Mistral-7B-Instruct-CPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4203, "hfopenllm_v2/BBH": 0.4069, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.4178, "hfopenllm_v2/MMLU-PRO": 0.2701 } }, { "id": "princeton-nlp/Mistral-7B-Instruct-DPO", "name": "Mistral-7B-Instruct-DPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5176, "hfopenllm_v2/BBH": 0.406, "hfopenllm_v2/MATH Level 5": 0.031, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3833, "hfopenllm_v2/MMLU-PRO": 0.2749 } }, { "id": "princeton-nlp/Mistral-7B-Instruct-IPO", "name": "Mistral-7B-Instruct-IPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4929, "hfopenllm_v2/BBH": 0.4322, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.4324, "hfopenllm_v2/MMLU-PRO": 0.2708 } }, { "id": "princeton-nlp/Mistral-7B-Instruct-KTO", "name": "Mistral-7B-Instruct-KTO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4908, "hfopenllm_v2/BBH": 0.414, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3953, "hfopenllm_v2/MMLU-PRO": 0.2812 } }, { "id": "princeton-nlp/Mistral-7B-Instruct-ORPO", "name": "Mistral-7B-Instruct-ORPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.472, "hfopenllm_v2/BBH": 0.4104, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3912, "hfopenllm_v2/MMLU-PRO": 0.2662 } }, { "id": "princeton-nlp/Mistral-7B-Instruct-RDPO", "name": "Mistral-7B-Instruct-RDPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4887, "hfopenllm_v2/BBH": 0.405, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3873, "hfopenllm_v2/MMLU-PRO": 0.2777 } }, { "id": "princeton-nlp/Mistral-7B-Instruct-RRHF", "name": "Mistral-7B-Instruct-RRHF", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.496, "hfopenllm_v2/BBH": 0.419, "hfopenllm_v2/MATH Level 5": 0.0279, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3979, "hfopenllm_v2/MMLU-PRO": 0.2651 } }, { "id": "princeton-nlp/Mistral-7B-Instruct-SimPO", "name": "Mistral-7B-Instruct-SimPO", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4687, "hfopenllm_v2/BBH": 0.4507, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.4098, "hfopenllm_v2/MMLU-PRO": 0.2797 } }, { "id": "princeton-nlp/Mistral-7B-Instruct-SLiC-HF", "name": "Mistral-7B-Instruct-SLiC-HF", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5115, "hfopenllm_v2/BBH": 0.404, "hfopenllm_v2/MATH Level 5": 0.0174, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3913, "hfopenllm_v2/MMLU-PRO": 0.2715 } }, { "id": "princeton-nlp/Sheared-LLaMA-1.3B", "name": "Sheared-LLaMA-1.3B", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2198, "hfopenllm_v2/BBH": 0.3197, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2399, "hfopenllm_v2/MUSR": 0.3713, "hfopenllm_v2/MMLU-PRO": 0.1171 } }, { "id": "princeton-nlp/Sheared-LLaMA-2.7B", "name": "Sheared-LLaMA-2.7B", "developer": "princeton-nlp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2417, "hfopenllm_v2/BBH": 0.3259, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.3567, "hfopenllm_v2/MMLU-PRO": 0.1187 } }, { "id": "prithivMLmods/Bellatrix-1.5B-xElite", "name": "Bellatrix-1.5B-xElite", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1964, "hfopenllm_v2/BBH": 0.3501, "hfopenllm_v2/MATH Level 5": 0.287, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.3619, "hfopenllm_v2/MMLU-PRO": 0.1657 } }, { "id": "prithivMLmods/Bellatrix-Tiny-1.5B-R1", "name": "Bellatrix-Tiny-1.5B-R1", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3352, "hfopenllm_v2/BBH": 0.4022, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.3683, "hfopenllm_v2/MMLU-PRO": 0.2751 } }, { "id": "prithivMLmods/Bellatrix-Tiny-1B-v2", "name": "Bellatrix-Tiny-1B-v2", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.151, "hfopenllm_v2/BBH": 0.3268, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.343, "hfopenllm_v2/MMLU-PRO": 0.1493 } }, { "id": "prithivMLmods/Blaze-14B-xElite", "name": "Blaze-14B-xElite", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0363, "hfopenllm_v2/BBH": 0.6628, "hfopenllm_v2/MATH Level 5": 0.3693, "hfopenllm_v2/GPQA": 0.3943, "hfopenllm_v2/MUSR": 0.4625, "hfopenllm_v2/MMLU-PRO": 0.5111 } }, { "id": "prithivMLmods/Calcium-Opus-14B-Elite", "name": "Calcium-Opus-14B-Elite", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6052, "hfopenllm_v2/BBH": 0.6317, "hfopenllm_v2/MATH Level 5": 0.4789, "hfopenllm_v2/GPQA": 0.3742, "hfopenllm_v2/MUSR": 0.486, "hfopenllm_v2/MMLU-PRO": 0.5302 } }, { "id": "prithivMLmods/Calcium-Opus-14B-Elite-1M", "name": "Calcium-Opus-14B-Elite-1M", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5613, "hfopenllm_v2/BBH": 0.6329, "hfopenllm_v2/MATH Level 5": 0.4456, "hfopenllm_v2/GPQA": 0.3523, "hfopenllm_v2/MUSR": 0.4676, "hfopenllm_v2/MMLU-PRO": 0.5152 } }, { "id": "prithivMLmods/Calcium-Opus-14B-Elite-Stock", "name": "Calcium-Opus-14B-Elite-Stock", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6143, "hfopenllm_v2/BBH": 0.6329, "hfopenllm_v2/MATH Level 5": 0.4668, "hfopenllm_v2/GPQA": 0.3683, "hfopenllm_v2/MUSR": 0.4808, "hfopenllm_v2/MMLU-PRO": 0.5284 } }, { "id": "prithivMLmods/Calcium-Opus-14B-Elite2", "name": "Calcium-Opus-14B-Elite2", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6176, "hfopenllm_v2/BBH": 0.6318, "hfopenllm_v2/MATH Level 5": 0.469, "hfopenllm_v2/GPQA": 0.37, "hfopenllm_v2/MUSR": 0.494, "hfopenllm_v2/MMLU-PRO": 0.5301 } }, { "id": "prithivMLmods/Calcium-Opus-14B-Elite2-R1", "name": "Calcium-Opus-14B-Elite2-R1", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6326, "hfopenllm_v2/BBH": 0.6362, "hfopenllm_v2/MATH Level 5": 0.3338, "hfopenllm_v2/GPQA": 0.3909, "hfopenllm_v2/MUSR": 0.49, "hfopenllm_v2/MMLU-PRO": 0.5248 } }, { "id": "prithivMLmods/Calcium-Opus-14B-Elite3", "name": "Calcium-Opus-14B-Elite3", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5428, "hfopenllm_v2/BBH": 0.635, "hfopenllm_v2/MATH Level 5": 0.4705, "hfopenllm_v2/GPQA": 0.3708, "hfopenllm_v2/MUSR": 0.4795, "hfopenllm_v2/MMLU-PRO": 0.5335 } }, { "id": "prithivMLmods/Calcium-Opus-14B-Elite4", "name": "Calcium-Opus-14B-Elite4", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6112, "hfopenllm_v2/BBH": 0.6195, "hfopenllm_v2/MATH Level 5": 0.3625, "hfopenllm_v2/GPQA": 0.3557, "hfopenllm_v2/MUSR": 0.4687, "hfopenllm_v2/MMLU-PRO": 0.5149 } }, { "id": "prithivMLmods/Calcium-Opus-14B-Merge", "name": "Calcium-Opus-14B-Merge", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4949, "hfopenllm_v2/BBH": 0.6319, "hfopenllm_v2/MATH Level 5": 0.4637, "hfopenllm_v2/GPQA": 0.3708, "hfopenllm_v2/MUSR": 0.4861, "hfopenllm_v2/MMLU-PRO": 0.5356 } }, { "id": "prithivMLmods/Calcium-Opus-20B-v1", "name": "Calcium-Opus-20B-v1", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3093, "hfopenllm_v2/BBH": 0.599, "hfopenllm_v2/MATH Level 5": 0.3618, "hfopenllm_v2/GPQA": 0.3532, "hfopenllm_v2/MUSR": 0.4943, "hfopenllm_v2/MMLU-PRO": 0.4734 } }, { "id": "prithivMLmods/COCO-7B-Instruct-1M", "name": "COCO-7B-Instruct-1M", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4743, "hfopenllm_v2/BBH": 0.541, "hfopenllm_v2/MATH Level 5": 0.3497, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4382, "hfopenllm_v2/MMLU-PRO": 0.4186 } }, { "id": "prithivMLmods/Codepy-Deepthink-3B", "name": "Codepy-Deepthink-3B", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4327, "hfopenllm_v2/BBH": 0.4259, "hfopenllm_v2/MATH Level 5": 0.1156, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.331, "hfopenllm_v2/MMLU-PRO": 0.309 } }, { "id": "prithivMLmods/Coma-II-14B", "name": "Coma-II-14B", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4168, "hfopenllm_v2/BBH": 0.6321, "hfopenllm_v2/MATH Level 5": 0.5514, "hfopenllm_v2/GPQA": 0.4002, "hfopenllm_v2/MUSR": 0.5351, "hfopenllm_v2/MMLU-PRO": 0.504 } }, { "id": "prithivMLmods/Condor-Opus-14B-Exp", "name": "Condor-Opus-14B-Exp", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4043, "hfopenllm_v2/BBH": 0.6154, "hfopenllm_v2/MATH Level 5": 0.5227, "hfopenllm_v2/GPQA": 0.3918, "hfopenllm_v2/MUSR": 0.5194, "hfopenllm_v2/MMLU-PRO": 0.5014 } }, { "id": "prithivMLmods/Cygnus-II-14B", "name": "Cygnus-II-14B", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6184, "hfopenllm_v2/BBH": 0.6661, "hfopenllm_v2/MATH Level 5": 0.4396, "hfopenllm_v2/GPQA": 0.3876, "hfopenllm_v2/MUSR": 0.4688, "hfopenllm_v2/MMLU-PRO": 0.5391 } }, { "id": "prithivMLmods/Deepthink-Llama-3-8B-Preview", "name": "Deepthink-Llama-3-8B-Preview", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2955, "hfopenllm_v2/BBH": 0.4665, "hfopenllm_v2/MATH Level 5": 0.355, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.3707, "hfopenllm_v2/MMLU-PRO": 0.2739 } }, { "id": "prithivMLmods/Deepthink-Reasoning-14B", "name": "Deepthink-Reasoning-14B", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5424, "hfopenllm_v2/BBH": 0.6334, "hfopenllm_v2/MATH Level 5": 0.423, "hfopenllm_v2/GPQA": 0.3666, "hfopenllm_v2/MUSR": 0.4732, "hfopenllm_v2/MMLU-PRO": 0.5296 } }, { "id": "prithivMLmods/Deepthink-Reasoning-7B", "name": "Deepthink-Reasoning-7B", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.484, "hfopenllm_v2/BBH": 0.5505, "hfopenllm_v2/MATH Level 5": 0.3346, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4432, "hfopenllm_v2/MMLU-PRO": 0.4349 } }, { "id": "prithivMLmods/Dinobot-Opus-14B-Exp", "name": "Dinobot-Opus-14B-Exp", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.824, "hfopenllm_v2/BBH": 0.637, "hfopenllm_v2/MATH Level 5": 0.5317, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.426, "hfopenllm_v2/MMLU-PRO": 0.4979 } }, { "id": "prithivMLmods/Elita-0.1-Distilled-R1-abliterated", "name": "Elita-0.1-Distilled-R1-abliterated", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3542, "hfopenllm_v2/BBH": 0.3828, "hfopenllm_v2/MATH Level 5": 0.3066, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.366, "hfopenllm_v2/MMLU-PRO": 0.2758 } }, { "id": "prithivMLmods/Elita-1", "name": "Elita-1", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4906, "hfopenllm_v2/BBH": 0.652, "hfopenllm_v2/MATH Level 5": 0.3429, "hfopenllm_v2/GPQA": 0.3758, "hfopenllm_v2/MUSR": 0.4834, "hfopenllm_v2/MMLU-PRO": 0.5381 } }, { "id": "prithivMLmods/Epimetheus-14B-Axo", "name": "Epimetheus-14B-Axo", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5546, "hfopenllm_v2/BBH": 0.6613, "hfopenllm_v2/MATH Level 5": 0.4101, "hfopenllm_v2/GPQA": 0.3926, "hfopenllm_v2/MUSR": 0.482, "hfopenllm_v2/MMLU-PRO": 0.5304 } }, { "id": "prithivMLmods/Equuleus-Opus-14B-Exp", "name": "Equuleus-Opus-14B-Exp", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7001, "hfopenllm_v2/BBH": 0.6434, "hfopenllm_v2/MATH Level 5": 0.4585, "hfopenllm_v2/GPQA": 0.3867, "hfopenllm_v2/MUSR": 0.4952, "hfopenllm_v2/MMLU-PRO": 0.5374 } }, { "id": "prithivMLmods/Eridanus-Opus-14B-r999", "name": "Eridanus-Opus-14B-r999", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6386, "hfopenllm_v2/BBH": 0.6584, "hfopenllm_v2/MATH Level 5": 0.386, "hfopenllm_v2/GPQA": 0.3943, "hfopenllm_v2/MUSR": 0.4769, "hfopenllm_v2/MMLU-PRO": 0.5362 } }, { "id": "prithivMLmods/Evac-Opus-14B-Exp", "name": "Evac-Opus-14B-Exp", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5916, "hfopenllm_v2/BBH": 0.6475, "hfopenllm_v2/MATH Level 5": 0.4215, "hfopenllm_v2/GPQA": 0.3884, "hfopenllm_v2/MUSR": 0.4728, "hfopenllm_v2/MMLU-PRO": 0.5317 } }, { "id": "prithivMLmods/FastThink-0.5B-Tiny", "name": "FastThink-0.5B-Tiny", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.258, "hfopenllm_v2/BBH": 0.3206, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3566, "hfopenllm_v2/MMLU-PRO": 0.1649 } }, { "id": "prithivMLmods/Gaea-Opus-14B-Exp", "name": "Gaea-Opus-14B-Exp", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5956, "hfopenllm_v2/BBH": 0.656, "hfopenllm_v2/MATH Level 5": 0.4275, "hfopenllm_v2/GPQA": 0.3909, "hfopenllm_v2/MUSR": 0.4859, "hfopenllm_v2/MMLU-PRO": 0.5401 } }, { "id": "prithivMLmods/Galactic-Qwen-14B-Exp1", "name": "Galactic-Qwen-14B-Exp1", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5832, "hfopenllm_v2/BBH": 0.6582, "hfopenllm_v2/MATH Level 5": 0.4018, "hfopenllm_v2/GPQA": 0.3935, "hfopenllm_v2/MUSR": 0.4781, "hfopenllm_v2/MMLU-PRO": 0.5396 } }, { "id": "prithivMLmods/Galactic-Qwen-14B-Exp2", "name": "Galactic-Qwen-14B-Exp2", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.662, "hfopenllm_v2/BBH": 0.7203, "hfopenllm_v2/MATH Level 5": 0.3474, "hfopenllm_v2/GPQA": 0.3993, "hfopenllm_v2/MUSR": 0.5354, "hfopenllm_v2/MMLU-PRO": 0.5691 } }, { "id": "prithivMLmods/Gauss-Opus-14B-R999", "name": "Gauss-Opus-14B-R999", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3907, "hfopenllm_v2/BBH": 0.6228, "hfopenllm_v2/MATH Level 5": 0.5755, "hfopenllm_v2/GPQA": 0.3918, "hfopenllm_v2/MUSR": 0.5338, "hfopenllm_v2/MMLU-PRO": 0.5007 } }, { "id": "prithivMLmods/GWQ-9B-Preview", "name": "GWQ-9B-Preview", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5066, "hfopenllm_v2/BBH": 0.5806, "hfopenllm_v2/MATH Level 5": 0.2266, "hfopenllm_v2/GPQA": 0.3398, "hfopenllm_v2/MUSR": 0.4951, "hfopenllm_v2/MMLU-PRO": 0.3984 } }, { "id": "prithivMLmods/GWQ-9B-Preview2", "name": "GWQ-9B-Preview2", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5209, "hfopenllm_v2/BBH": 0.5797, "hfopenllm_v2/MATH Level 5": 0.2372, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.486, "hfopenllm_v2/MMLU-PRO": 0.3997 } }, { "id": "prithivMLmods/GWQ2b", "name": "GWQ2b", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4115, "hfopenllm_v2/BBH": 0.4143, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4311, "hfopenllm_v2/MMLU-PRO": 0.2473 } }, { "id": "prithivMLmods/Jolt-v0.1", "name": "Jolt-v0.1", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5092, "hfopenllm_v2/BBH": 0.6521, "hfopenllm_v2/MATH Level 5": 0.3565, "hfopenllm_v2/GPQA": 0.38, "hfopenllm_v2/MUSR": 0.4847, "hfopenllm_v2/MMLU-PRO": 0.5386 } }, { "id": "prithivMLmods/Lacerta-Opus-14B-Elite8", "name": "Lacerta-Opus-14B-Elite8", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6141, "hfopenllm_v2/BBH": 0.6401, "hfopenllm_v2/MATH Level 5": 0.3648, "hfopenllm_v2/GPQA": 0.3784, "hfopenllm_v2/MUSR": 0.4635, "hfopenllm_v2/MMLU-PRO": 0.5322 } }, { "id": "prithivMLmods/Llama-3.1-5B-Instruct", "name": "Llama-3.1-5B-Instruct", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1407, "hfopenllm_v2/BBH": 0.3051, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.354, "hfopenllm_v2/MMLU-PRO": 0.1184 } }, { "id": "prithivMLmods/Llama-3.1-8B-Open-SFT", "name": "Llama-3.1-8B-Open-SFT", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4123, "hfopenllm_v2/BBH": 0.4968, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.3904, "hfopenllm_v2/MMLU-PRO": 0.3522 } }, { "id": "prithivMLmods/Llama-3.2-3B-Math-Oct", "name": "Llama-3.2-3B-Math-Oct", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4585, "hfopenllm_v2/BBH": 0.4372, "hfopenllm_v2/MATH Level 5": 0.1156, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.347, "hfopenllm_v2/MMLU-PRO": 0.2911 } }, { "id": "prithivMLmods/Llama-3.2-6B-AlgoCode", "name": "Llama-3.2-6B-AlgoCode", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2136, "hfopenllm_v2/BBH": 0.3748, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4013, "hfopenllm_v2/MMLU-PRO": 0.1798 } }, { "id": "prithivMLmods/Llama-8B-Distill-CoT", "name": "Llama-8B-Distill-CoT", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3342, "hfopenllm_v2/BBH": 0.4298, "hfopenllm_v2/MATH Level 5": 0.4003, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.372, "hfopenllm_v2/MMLU-PRO": 0.2732 } }, { "id": "prithivMLmods/Llama-Deepsync-1B", "name": "Llama-Deepsync-1B", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.357, "hfopenllm_v2/BBH": 0.3386, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3565, "hfopenllm_v2/MMLU-PRO": 0.1738 } }, { "id": "prithivMLmods/Llama-Deepsync-3B", "name": "Llama-Deepsync-3B", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4302, "hfopenllm_v2/BBH": 0.4292, "hfopenllm_v2/MATH Level 5": 0.1178, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3324, "hfopenllm_v2/MMLU-PRO": 0.3031 } }, { "id": "prithivMLmods/Llama-Express.1-Math", "name": "Llama-Express.1-Math", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5084, "hfopenllm_v2/BBH": 0.3364, "hfopenllm_v2/MATH Level 5": 0.0559, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3143, "hfopenllm_v2/MMLU-PRO": 0.161 } }, { "id": "prithivMLmods/LwQ-10B-Instruct", "name": "LwQ-10B-Instruct", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3935, "hfopenllm_v2/BBH": 0.5122, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4544, "hfopenllm_v2/MMLU-PRO": 0.3318 } }, { "id": "prithivMLmods/LwQ-Reasoner-10B", "name": "LwQ-Reasoner-10B", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2941, "hfopenllm_v2/BBH": 0.5866, "hfopenllm_v2/MATH Level 5": 0.358, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.4079, "hfopenllm_v2/MMLU-PRO": 0.4147 } }, { "id": "prithivMLmods/Magellanic-Opus-14B-Exp", "name": "Magellanic-Opus-14B-Exp", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6866, "hfopenllm_v2/BBH": 0.6383, "hfopenllm_v2/MATH Level 5": 0.3799, "hfopenllm_v2/GPQA": 0.3742, "hfopenllm_v2/MUSR": 0.4926, "hfopenllm_v2/MMLU-PRO": 0.5273 } }, { "id": "prithivMLmods/Magellanic-Qwen-25B-R999", "name": "Magellanic-Qwen-25B-R999", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1873, "hfopenllm_v2/BBH": 0.2608, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.3831, "hfopenllm_v2/MMLU-PRO": 0.13 } }, { "id": "prithivMLmods/Megatron-Corpus-14B-Exp", "name": "Megatron-Corpus-14B-Exp", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4983, "hfopenllm_v2/BBH": 0.6355, "hfopenllm_v2/MATH Level 5": 0.3429, "hfopenllm_v2/GPQA": 0.3633, "hfopenllm_v2/MUSR": 0.4767, "hfopenllm_v2/MMLU-PRO": 0.526 } }, { "id": "prithivMLmods/Megatron-Corpus-14B-Exp.v2", "name": "Megatron-Corpus-14B-Exp.v2", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.487, "hfopenllm_v2/BBH": 0.6321, "hfopenllm_v2/MATH Level 5": 0.2591, "hfopenllm_v2/GPQA": 0.3423, "hfopenllm_v2/MUSR": 0.449, "hfopenllm_v2/MMLU-PRO": 0.481 } }, { "id": "prithivMLmods/Megatron-Opus-14B-2.0", "name": "Megatron-Opus-14B-2.0", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6694, "hfopenllm_v2/BBH": 0.6871, "hfopenllm_v2/MATH Level 5": 0.2779, "hfopenllm_v2/GPQA": 0.3591, "hfopenllm_v2/MUSR": 0.414, "hfopenllm_v2/MMLU-PRO": 0.517 } }, { "id": "prithivMLmods/Megatron-Opus-14B-2.1", "name": "Megatron-Opus-14B-2.1", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0246, "hfopenllm_v2/BBH": 0.6727, "hfopenllm_v2/MATH Level 5": 0.2998, "hfopenllm_v2/GPQA": 0.3834, "hfopenllm_v2/MUSR": 0.4928, "hfopenllm_v2/MMLU-PRO": 0.5174 } }, { "id": "prithivMLmods/Megatron-Opus-14B-Exp", "name": "Megatron-Opus-14B-Exp", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4979, "hfopenllm_v2/BBH": 0.6516, "hfopenllm_v2/MATH Level 5": 0.3535, "hfopenllm_v2/GPQA": 0.375, "hfopenllm_v2/MUSR": 0.4887, "hfopenllm_v2/MMLU-PRO": 0.5401 } }, { "id": "prithivMLmods/Megatron-Opus-14B-Stock", "name": "Megatron-Opus-14B-Stock", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5174, "hfopenllm_v2/BBH": 0.6412, "hfopenllm_v2/MATH Level 5": 0.3346, "hfopenllm_v2/GPQA": 0.375, "hfopenllm_v2/MUSR": 0.482, "hfopenllm_v2/MMLU-PRO": 0.5293 } }, { "id": "prithivMLmods/Megatron-Opus-7B-Exp", "name": "Megatron-Opus-7B-Exp", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6017, "hfopenllm_v2/BBH": 0.5367, "hfopenllm_v2/MATH Level 5": 0.1971, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4186, "hfopenllm_v2/MMLU-PRO": 0.39 } }, { "id": "prithivMLmods/Messier-Opus-14B-Elite7", "name": "Messier-Opus-14B-Elite7", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7113, "hfopenllm_v2/BBH": 0.6499, "hfopenllm_v2/MATH Level 5": 0.4071, "hfopenllm_v2/GPQA": 0.3909, "hfopenllm_v2/MUSR": 0.4886, "hfopenllm_v2/MMLU-PRO": 0.5404 } }, { "id": "prithivMLmods/Omni-Reasoner-Merged", "name": "Omni-Reasoner-Merged", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4599, "hfopenllm_v2/BBH": 0.5508, "hfopenllm_v2/MATH Level 5": 0.3331, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4616, "hfopenllm_v2/MMLU-PRO": 0.4364 } }, { "id": "prithivMLmods/Omni-Reasoner3-Merged", "name": "Omni-Reasoner3-Merged", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4935, "hfopenllm_v2/BBH": 0.4388, "hfopenllm_v2/MATH Level 5": 0.1088, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3522, "hfopenllm_v2/MMLU-PRO": 0.295 } }, { "id": "prithivMLmods/Pegasus-Opus-14B-Exp", "name": "Pegasus-Opus-14B-Exp", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6982, "hfopenllm_v2/BBH": 0.6548, "hfopenllm_v2/MATH Level 5": 0.4086, "hfopenllm_v2/GPQA": 0.3951, "hfopenllm_v2/MUSR": 0.486, "hfopenllm_v2/MMLU-PRO": 0.5412 } }, { "id": "prithivMLmods/Phi-4-Empathetic", "name": "Phi-4-Empathetic", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0497, "hfopenllm_v2/BBH": 0.6727, "hfopenllm_v2/MATH Level 5": 0.2621, "hfopenllm_v2/GPQA": 0.38, "hfopenllm_v2/MUSR": 0.4991, "hfopenllm_v2/MMLU-PRO": 0.5066 } }, { "id": "prithivMLmods/Phi-4-Math-IO", "name": "Phi-4-Math-IO", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.059, "hfopenllm_v2/BBH": 0.6668, "hfopenllm_v2/MATH Level 5": 0.4577, "hfopenllm_v2/GPQA": 0.3985, "hfopenllm_v2/MUSR": 0.4873, "hfopenllm_v2/MMLU-PRO": 0.5205 } }, { "id": "prithivMLmods/Phi-4-o1", "name": "Phi-4-o1", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.029, "hfopenllm_v2/BBH": 0.6689, "hfopenllm_v2/MATH Level 5": 0.3995, "hfopenllm_v2/GPQA": 0.3826, "hfopenllm_v2/MUSR": 0.4978, "hfopenllm_v2/MMLU-PRO": 0.5174 } }, { "id": "prithivMLmods/Phi-4-QwQ", "name": "Phi-4-QwQ", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0559, "hfopenllm_v2/BBH": 0.6696, "hfopenllm_v2/MATH Level 5": 0.4577, "hfopenllm_v2/GPQA": 0.3909, "hfopenllm_v2/MUSR": 0.4651, "hfopenllm_v2/MMLU-PRO": 0.5275 } }, { "id": "prithivMLmods/Phi-4-Super", "name": "Phi-4-Super", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0481, "hfopenllm_v2/BBH": 0.672, "hfopenllm_v2/MATH Level 5": 0.3489, "hfopenllm_v2/GPQA": 0.3943, "hfopenllm_v2/MUSR": 0.5044, "hfopenllm_v2/MMLU-PRO": 0.5266 } }, { "id": "prithivMLmods/Phi-4-Super-1", "name": "Phi-4-Super-1", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0418, "hfopenllm_v2/BBH": 0.6729, "hfopenllm_v2/MATH Level 5": 0.352, "hfopenllm_v2/GPQA": 0.3935, "hfopenllm_v2/MUSR": 0.5017, "hfopenllm_v2/MMLU-PRO": 0.5235 } }, { "id": "prithivMLmods/Phi-4-Super-o1", "name": "Phi-4-Super-o1", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0418, "hfopenllm_v2/BBH": 0.6729, "hfopenllm_v2/MATH Level 5": 0.352, "hfopenllm_v2/GPQA": 0.3935, "hfopenllm_v2/MUSR": 0.5017, "hfopenllm_v2/MMLU-PRO": 0.5235 } }, { "id": "prithivMLmods/Phi4-Super", "name": "Phi4-Super", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0481, "hfopenllm_v2/BBH": 0.672, "hfopenllm_v2/MATH Level 5": 0.3489, "hfopenllm_v2/GPQA": 0.3943, "hfopenllm_v2/MUSR": 0.5044, "hfopenllm_v2/MMLU-PRO": 0.5266 } }, { "id": "prithivMLmods/Porpoise-Opus-14B-Exp", "name": "Porpoise-Opus-14B-Exp", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7098, "hfopenllm_v2/BBH": 0.6519, "hfopenllm_v2/MATH Level 5": 0.4041, "hfopenllm_v2/GPQA": 0.3935, "hfopenllm_v2/MUSR": 0.4926, "hfopenllm_v2/MMLU-PRO": 0.5396 } }, { "id": "prithivMLmods/Primal-Opus-14B-Optimus-v1", "name": "Primal-Opus-14B-Optimus-v1", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5013, "hfopenllm_v2/BBH": 0.6419, "hfopenllm_v2/MATH Level 5": 0.3384, "hfopenllm_v2/GPQA": 0.3725, "hfopenllm_v2/MUSR": 0.4847, "hfopenllm_v2/MMLU-PRO": 0.5259 } }, { "id": "prithivMLmods/Primal-Opus-14B-Optimus-v2", "name": "Primal-Opus-14B-Optimus-v2", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6404, "hfopenllm_v2/BBH": 0.6544, "hfopenllm_v2/MATH Level 5": 0.4207, "hfopenllm_v2/GPQA": 0.3918, "hfopenllm_v2/MUSR": 0.49, "hfopenllm_v2/MMLU-PRO": 0.5422 } }, { "id": "prithivMLmods/Qwen-7B-Distill-Reasoner", "name": "Qwen-7B-Distill-Reasoner", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3396, "hfopenllm_v2/BBH": 0.4409, "hfopenllm_v2/MATH Level 5": 0.395, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.366, "hfopenllm_v2/MMLU-PRO": 0.2818 } }, { "id": "prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct", "name": "Qwen2.5-1.5B-DeepSeek-R1-Instruct", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1397, "hfopenllm_v2/BBH": 0.2824, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3724, "hfopenllm_v2/MMLU-PRO": 0.1123 } }, { "id": "prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M", "name": "Qwen2.5-14B-DeepSeek-R1-1M", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4193, "hfopenllm_v2/BBH": 0.5935, "hfopenllm_v2/MATH Level 5": 0.5128, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.4606, "hfopenllm_v2/MMLU-PRO": 0.4899 } }, { "id": "prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M", "name": "Qwen2.5-7B-DeepSeek-R1-1M", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1861, "hfopenllm_v2/BBH": 0.3126, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3417, "hfopenllm_v2/MMLU-PRO": 0.1201 } }, { "id": "prithivMLmods/QwQ-LCoT-14B-Conversational", "name": "QwQ-LCoT-14B-Conversational", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4047, "hfopenllm_v2/BBH": 0.624, "hfopenllm_v2/MATH Level 5": 0.4653, "hfopenllm_v2/GPQA": 0.3498, "hfopenllm_v2/MUSR": 0.4847, "hfopenllm_v2/MMLU-PRO": 0.5278 } }, { "id": "prithivMLmods/QwQ-LCoT-3B-Instruct", "name": "QwQ-LCoT-3B-Instruct", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4354, "hfopenllm_v2/BBH": 0.4763, "hfopenllm_v2/MATH Level 5": 0.2825, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4358, "hfopenllm_v2/MMLU-PRO": 0.3582 } }, { "id": "prithivMLmods/QwQ-LCoT-7B-Instruct", "name": "QwQ-LCoT-7B-Instruct", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4987, "hfopenllm_v2/BBH": 0.5466, "hfopenllm_v2/MATH Level 5": 0.3716, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4802, "hfopenllm_v2/MMLU-PRO": 0.4334 } }, { "id": "prithivMLmods/QwQ-LCoT1-Merged", "name": "QwQ-LCoT1-Merged", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4751, "hfopenllm_v2/BBH": 0.5481, "hfopenllm_v2/MATH Level 5": 0.3731, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4696, "hfopenllm_v2/MMLU-PRO": 0.4358 } }, { "id": "prithivMLmods/QwQ-LCoT2-7B-Instruct", "name": "QwQ-LCoT2-7B-Instruct", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5561, "hfopenllm_v2/BBH": 0.5425, "hfopenllm_v2/MATH Level 5": 0.327, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4564, "hfopenllm_v2/MMLU-PRO": 0.4342 } }, { "id": "prithivMLmods/QwQ-MathOct-7B", "name": "QwQ-MathOct-7B", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4684, "hfopenllm_v2/BBH": 0.5486, "hfopenllm_v2/MATH Level 5": 0.2953, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4601, "hfopenllm_v2/MMLU-PRO": 0.433 } }, { "id": "prithivMLmods/QwQ-R1-Distill-1.5B-CoT", "name": "QwQ-R1-Distill-1.5B-CoT", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2194, "hfopenllm_v2/BBH": 0.3666, "hfopenllm_v2/MATH Level 5": 0.3346, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.3434, "hfopenllm_v2/MMLU-PRO": 0.1913 } }, { "id": "prithivMLmods/QwQ-R1-Distill-7B-CoT", "name": "QwQ-R1-Distill-7B-CoT", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.35, "hfopenllm_v2/BBH": 0.4388, "hfopenllm_v2/MATH Level 5": 0.4683, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.3779, "hfopenllm_v2/MMLU-PRO": 0.2804 } }, { "id": "prithivMLmods/SmolLM2-CoT-360M", "name": "SmolLM2-CoT-360M", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2216, "hfopenllm_v2/BBH": 0.3135, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2366, "hfopenllm_v2/MUSR": 0.3794, "hfopenllm_v2/MMLU-PRO": 0.1085 } }, { "id": "prithivMLmods/Sombrero-Opus-14B-Elite5", "name": "Sombrero-Opus-14B-Elite5", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7881, "hfopenllm_v2/BBH": 0.6502, "hfopenllm_v2/MATH Level 5": 0.5355, "hfopenllm_v2/GPQA": 0.3364, "hfopenllm_v2/MUSR": 0.4287, "hfopenllm_v2/MMLU-PRO": 0.52 } }, { "id": "prithivMLmods/Sombrero-Opus-14B-Elite6", "name": "Sombrero-Opus-14B-Elite6", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7226, "hfopenllm_v2/BBH": 0.6488, "hfopenllm_v2/MATH Level 5": 0.4079, "hfopenllm_v2/GPQA": 0.3935, "hfopenllm_v2/MUSR": 0.4886, "hfopenllm_v2/MMLU-PRO": 0.539 } }, { "id": "prithivMLmods/Sombrero-Opus-14B-Sm1", "name": "Sombrero-Opus-14B-Sm1", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3813, "hfopenllm_v2/BBH": 0.6355, "hfopenllm_v2/MATH Level 5": 0.5665, "hfopenllm_v2/GPQA": 0.4035, "hfopenllm_v2/MUSR": 0.5299, "hfopenllm_v2/MMLU-PRO": 0.5125 } }, { "id": "prithivMLmods/Sombrero-Opus-14B-Sm2", "name": "Sombrero-Opus-14B-Sm2", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4272, "hfopenllm_v2/BBH": 0.6609, "hfopenllm_v2/MATH Level 5": 0.4864, "hfopenllm_v2/GPQA": 0.3884, "hfopenllm_v2/MUSR": 0.5088, "hfopenllm_v2/MMLU-PRO": 0.5345 } }, { "id": "prithivMLmods/Sombrero-Opus-14B-Sm4", "name": "Sombrero-Opus-14B-Sm4", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4347, "hfopenllm_v2/BBH": 0.6613, "hfopenllm_v2/MATH Level 5": 0.4879, "hfopenllm_v2/GPQA": 0.3951, "hfopenllm_v2/MUSR": 0.5192, "hfopenllm_v2/MMLU-PRO": 0.53 } }, { "id": "prithivMLmods/Sombrero-Opus-14B-Sm5", "name": "Sombrero-Opus-14B-Sm5", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6852, "hfopenllm_v2/BBH": 0.6564, "hfopenllm_v2/MATH Level 5": 0.4094, "hfopenllm_v2/GPQA": 0.3867, "hfopenllm_v2/MUSR": 0.4806, "hfopenllm_v2/MMLU-PRO": 0.54 } }, { "id": "prithivMLmods/Sqweeks-7B-Instruct", "name": "Sqweeks-7B-Instruct", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2158, "hfopenllm_v2/BBH": 0.4667, "hfopenllm_v2/MATH Level 5": 0.5144, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4476, "hfopenllm_v2/MMLU-PRO": 0.3133 } }, { "id": "prithivMLmods/Tadpole-Opus-14B-Exp", "name": "Tadpole-Opus-14B-Exp", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.575, "hfopenllm_v2/BBH": 0.6369, "hfopenllm_v2/MATH Level 5": 0.3134, "hfopenllm_v2/GPQA": 0.3859, "hfopenllm_v2/MUSR": 0.4728, "hfopenllm_v2/MMLU-PRO": 0.5322 } }, { "id": "prithivMLmods/Taurus-Opus-7B", "name": "Taurus-Opus-7B", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4223, "hfopenllm_v2/BBH": 0.5367, "hfopenllm_v2/MATH Level 5": 0.2168, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.4399, "hfopenllm_v2/MMLU-PRO": 0.3951 } }, { "id": "prithivMLmods/Triangulum-10B", "name": "Triangulum-10B", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3229, "hfopenllm_v2/BBH": 0.5968, "hfopenllm_v2/MATH Level 5": 0.355, "hfopenllm_v2/GPQA": 0.354, "hfopenllm_v2/MUSR": 0.4172, "hfopenllm_v2/MMLU-PRO": 0.4178 } }, { "id": "prithivMLmods/Triangulum-5B", "name": "Triangulum-5B", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1283, "hfopenllm_v2/BBH": 0.3124, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.255, "hfopenllm_v2/MUSR": 0.3445, "hfopenllm_v2/MMLU-PRO": 0.1223 } }, { "id": "prithivMLmods/Triangulum-v2-10B", "name": "Triangulum-v2-10B", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6705, "hfopenllm_v2/BBH": 0.6065, "hfopenllm_v2/MATH Level 5": 0.2447, "hfopenllm_v2/GPQA": 0.3372, "hfopenllm_v2/MUSR": 0.4281, "hfopenllm_v2/MMLU-PRO": 0.4466 } }, { "id": "prithivMLmods/Tucana-Opus-14B-r999", "name": "Tucana-Opus-14B-r999", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6067, "hfopenllm_v2/BBH": 0.6557, "hfopenllm_v2/MATH Level 5": 0.4063, "hfopenllm_v2/GPQA": 0.3918, "hfopenllm_v2/MUSR": 0.473, "hfopenllm_v2/MMLU-PRO": 0.5384 } }, { "id": "prithivMLmods/Tulu-MathLingo-8B", "name": "Tulu-MathLingo-8B", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5589, "hfopenllm_v2/BBH": 0.4659, "hfopenllm_v2/MATH Level 5": 0.145, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3864, "hfopenllm_v2/MMLU-PRO": 0.3044 } }, { "id": "prithivMLmods/Viper-Coder-7B-Elite14", "name": "Viper-Coder-7B-Elite14", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1488, "hfopenllm_v2/BBH": 0.2829, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.255, "hfopenllm_v2/MUSR": 0.3422, "hfopenllm_v2/MMLU-PRO": 0.1089 } }, { "id": "prithivMLmods/Viper-Coder-Hybrid-v1.2", "name": "Viper-Coder-Hybrid-v1.2", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6736, "hfopenllm_v2/BBH": 0.6391, "hfopenllm_v2/MATH Level 5": 0.3331, "hfopenllm_v2/GPQA": 0.3742, "hfopenllm_v2/MUSR": 0.4822, "hfopenllm_v2/MMLU-PRO": 0.5243 } }, { "id": "prithivMLmods/Viper-Coder-Hybrid-v1.3", "name": "Viper-Coder-Hybrid-v1.3", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7555, "hfopenllm_v2/BBH": 0.6471, "hfopenllm_v2/MATH Level 5": 0.4517, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4403, "hfopenllm_v2/MMLU-PRO": 0.5097 } }, { "id": "prithivMLmods/Viper-Coder-HybridMini-v1.3", "name": "Viper-Coder-HybridMini-v1.3", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6104, "hfopenllm_v2/BBH": 0.5365, "hfopenllm_v2/MATH Level 5": 0.463, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4505, "hfopenllm_v2/MMLU-PRO": 0.4352 } }, { "id": "prithivMLmods/Viper-Coder-v0.1", "name": "Viper-Coder-v0.1", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5521, "hfopenllm_v2/BBH": 0.6143, "hfopenllm_v2/MATH Level 5": 0.327, "hfopenllm_v2/GPQA": 0.354, "hfopenllm_v2/MUSR": 0.4394, "hfopenllm_v2/MMLU-PRO": 0.3928 } }, { "id": "prithivMLmods/Viper-Coder-v1.1", "name": "Viper-Coder-v1.1", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4432, "hfopenllm_v2/BBH": 0.6492, "hfopenllm_v2/MATH Level 5": 0.5461, "hfopenllm_v2/GPQA": 0.401, "hfopenllm_v2/MUSR": 0.5219, "hfopenllm_v2/MMLU-PRO": 0.5232 } }, { "id": "prithivMLmods/Viper-Coder-v1.6-r999", "name": "Viper-Coder-v1.6-r999", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4433, "hfopenllm_v2/BBH": 0.6492, "hfopenllm_v2/MATH Level 5": 0.5657, "hfopenllm_v2/GPQA": 0.401, "hfopenllm_v2/MUSR": 0.5219, "hfopenllm_v2/MMLU-PRO": 0.5232 } }, { "id": "prithivMLmods/Viper-Coder-v1.7-Vsm6", "name": "Viper-Coder-v1.7-Vsm6", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5004, "hfopenllm_v2/BBH": 0.6502, "hfopenllm_v2/MATH Level 5": 0.4645, "hfopenllm_v2/GPQA": 0.3968, "hfopenllm_v2/MUSR": 0.4768, "hfopenllm_v2/MMLU-PRO": 0.5288 } }, { "id": "prithivMLmods/Viper-OneCoder-UIGEN", "name": "Viper-OneCoder-UIGEN", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4692, "hfopenllm_v2/BBH": 0.6047, "hfopenllm_v2/MATH Level 5": 0.3867, "hfopenllm_v2/GPQA": 0.3423, "hfopenllm_v2/MUSR": 0.4514, "hfopenllm_v2/MMLU-PRO": 0.3904 } }, { "id": "prithivMLmods/Volans-Opus-14B-Exp", "name": "Volans-Opus-14B-Exp", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5868, "hfopenllm_v2/BBH": 0.6521, "hfopenllm_v2/MATH Level 5": 0.4252, "hfopenllm_v2/GPQA": 0.3851, "hfopenllm_v2/MUSR": 0.4872, "hfopenllm_v2/MMLU-PRO": 0.5385 } }, { "id": "prithivMLmods/WebMind-7B-v0.1", "name": "WebMind-7B-v0.1", "developer": "prithivMLmods", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5278, "hfopenllm_v2/BBH": 0.5434, "hfopenllm_v2/MATH Level 5": 0.3648, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4537, "hfopenllm_v2/MMLU-PRO": 0.4279 } }, { "id": "prometheus-eval/prometheus-7b-v2.0", "name": "prometheus-eval/prometheus-7b-v2.0", "developer": "prometheus-eval", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7204, "reward-bench/Chat": 0.8547, "reward-bench/Chat Hard": 0.4912, "reward-bench/Safety": 0.7709, "reward-bench/Reasoning": 0.7648 } }, { "id": "prometheus-eval/prometheus-8x7b-v2.0", "name": "prometheus-eval/prometheus-8x7b-v2.0", "developer": "prometheus-eval", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7451, "reward-bench/Chat": 0.9302, "reward-bench/Chat Hard": 0.4715, "reward-bench/Safety": 0.8047, "reward-bench/Reasoning": 0.774 } }, { "id": "pszemraj/Llama-3-6.3b-v0.1", "name": "Llama-3-6.3b-v0.1", "developer": "pszemraj", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1044, "hfopenllm_v2/BBH": 0.4197, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3908, "hfopenllm_v2/MMLU-PRO": 0.284 } }, { "id": "pszemraj/Mistral-v0.3-6B", "name": "Mistral-v0.3-6B", "developer": "pszemraj", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2454, "hfopenllm_v2/BBH": 0.3774, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3908, "hfopenllm_v2/MMLU-PRO": 0.2143 } }, { "id": "PuxAI/LUA_model", "name": "LUA_model", "developer": "PuxAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2282, "hfopenllm_v2/BBH": 0.2877, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3484, "hfopenllm_v2/MMLU-PRO": 0.1123 } }, { "id": "PygmalionAI/pygmalion-6b", "name": "pygmalion-6b", "developer": "PygmalionAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2091, "hfopenllm_v2/BBH": 0.3199, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.3684, "hfopenllm_v2/MMLU-PRO": 0.1184 } }, { "id": "Q-bert/MetaMath-1B", "name": "MetaMath-1B", "developer": "Q-bert", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.53, "hfopenllm_v2/BBH": 0.3451, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3289, "hfopenllm_v2/MMLU-PRO": 0.1495 } }, { "id": "qingy2019/LLaMa_3.2_3B_Catalysts", "name": "LLaMa_3.2_3B_Catalysts", "developer": "qingy2019", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4992, "hfopenllm_v2/BBH": 0.4468, "hfopenllm_v2/MATH Level 5": 0.1292, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.3788, "hfopenllm_v2/MMLU-PRO": 0.3008 } }, { "id": "qingy2019/OpenMath2-Llama3.1-8B", "name": "OpenMath2-Llama3.1-8B", "developer": "qingy2019", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2331, "hfopenllm_v2/BBH": 0.4096, "hfopenllm_v2/MATH Level 5": 0.2674, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3436, "hfopenllm_v2/MMLU-PRO": 0.1553 } }, { "id": "qingy2019/Oracle-14B", "name": "Oracle-14B", "developer": "qingy2019", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2358, "hfopenllm_v2/BBH": 0.4612, "hfopenllm_v2/MATH Level 5": 0.0642, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3717, "hfopenllm_v2/MMLU-PRO": 0.2382 } }, { "id": "qingy2019/Qwen2.5-Math-14B-Instruct", "name": "Qwen2.5-Math-14B-Instruct", "developer": "qingy2019", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6066, "hfopenllm_v2/BBH": 0.635, "hfopenllm_v2/MATH Level 5": 0.3716, "hfopenllm_v2/GPQA": 0.3725, "hfopenllm_v2/MUSR": 0.4757, "hfopenllm_v2/MMLU-PRO": 0.5331 } }, { "id": "qingy2019/Qwen2.5-Math-14B-Instruct-Alpha", "name": "Qwen2.5-Math-14B-Instruct-Alpha", "developer": "qingy2019", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5981, "hfopenllm_v2/BBH": 0.6375, "hfopenllm_v2/MATH Level 5": 0.3142, "hfopenllm_v2/GPQA": 0.37, "hfopenllm_v2/MUSR": 0.4649, "hfopenllm_v2/MMLU-PRO": 0.5331 } }, { "id": "qingy2019/Qwen2.5-Math-14B-Instruct-Pro", "name": "Qwen2.5-Math-14B-Instruct-Pro", "developer": "qingy2019", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1922, "hfopenllm_v2/BBH": 0.5319, "hfopenllm_v2/MATH Level 5": 0.284, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.374, "hfopenllm_v2/MMLU-PRO": 0.3558 } }, { "id": "qingy2019/Qwen2.5-Ultimate-14B-Instruct", "name": "Qwen2.5-Ultimate-14B-Instruct", "developer": "qingy2019", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3938, "hfopenllm_v2/BBH": 0.5842, "hfopenllm_v2/MATH Level 5": 0.2893, "hfopenllm_v2/GPQA": 0.3565, "hfopenllm_v2/MUSR": 0.4135, "hfopenllm_v2/MMLU-PRO": 0.4929 } }, { "id": "qingy2024/Benchmaxx-Llama-3.2-1B-Instruct", "name": "Benchmaxx-Llama-3.2-1B-Instruct", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2014, "hfopenllm_v2/BBH": 0.8269, "hfopenllm_v2/MATH Level 5": 0.4804, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3446, "hfopenllm_v2/MMLU-PRO": 0.1113 } }, { "id": "qingy2024/Eyas-17B-Instruct", "name": "Eyas-17B-Instruct", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6575, "hfopenllm_v2/BBH": 0.6085, "hfopenllm_v2/MATH Level 5": 0.247, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4522, "hfopenllm_v2/MMLU-PRO": 0.4343 } }, { "id": "qingy2024/Falcon3-2x10B-MoE-Instruct", "name": "Falcon3-2x10B-MoE-Instruct", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.785, "hfopenllm_v2/BBH": 0.6185, "hfopenllm_v2/MATH Level 5": 0.2795, "hfopenllm_v2/GPQA": 0.3305, "hfopenllm_v2/MUSR": 0.4284, "hfopenllm_v2/MMLU-PRO": 0.4423 } }, { "id": "qingy2024/Fusion-14B-Instruct", "name": "Fusion-14B-Instruct", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.726, "hfopenllm_v2/BBH": 0.6396, "hfopenllm_v2/MATH Level 5": 0.3369, "hfopenllm_v2/GPQA": 0.3549, "hfopenllm_v2/MUSR": 0.44, "hfopenllm_v2/MMLU-PRO": 0.5044 } }, { "id": "qingy2024/Fusion2-14B-Instruct", "name": "Fusion2-14B-Instruct", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6064, "hfopenllm_v2/BBH": 0.6119, "hfopenllm_v2/MATH Level 5": 0.3127, "hfopenllm_v2/GPQA": 0.3448, "hfopenllm_v2/MUSR": 0.4634, "hfopenllm_v2/MMLU-PRO": 0.5051 } }, { "id": "qingy2024/Fusion4-14B-Instruct", "name": "Fusion4-14B-Instruct", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7649, "hfopenllm_v2/BBH": 0.6543, "hfopenllm_v2/MATH Level 5": 0.3882, "hfopenllm_v2/GPQA": 0.3305, "hfopenllm_v2/MUSR": 0.4326, "hfopenllm_v2/MMLU-PRO": 0.5194 } }, { "id": "qingy2024/OwO-14B-Instruct", "name": "OwO-14B-Instruct", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1383, "hfopenllm_v2/BBH": 0.6165, "hfopenllm_v2/MATH Level 5": 0.4162, "hfopenllm_v2/GPQA": 0.3641, "hfopenllm_v2/MUSR": 0.4407, "hfopenllm_v2/MMLU-PRO": 0.5181 } }, { "id": "qingy2024/Qwarkstar-4B", "name": "Qwarkstar-4B", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1994, "hfopenllm_v2/BBH": 0.4015, "hfopenllm_v2/MATH Level 5": 0.0861, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4428, "hfopenllm_v2/MMLU-PRO": 0.2425 } }, { "id": "qingy2024/Qwarkstar-4B-Instruct-Preview", "name": "Qwarkstar-4B-Instruct-Preview", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5324, "hfopenllm_v2/BBH": 0.4358, "hfopenllm_v2/MATH Level 5": 0.1284, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3896, "hfopenllm_v2/MMLU-PRO": 0.2502 } }, { "id": "qingy2024/Qwen2.5-4B", "name": "Qwen2.5-4B", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2158, "hfopenllm_v2/BBH": 0.4269, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.461, "hfopenllm_v2/MMLU-PRO": 0.2525 } }, { "id": "qingy2024/Qwen2.5-Coder-Draft-1.5B-Instruct", "name": "Qwen2.5-Coder-Draft-1.5B-Instruct", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4125, "hfopenllm_v2/BBH": 0.3837, "hfopenllm_v2/MATH Level 5": 0.1579, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.358, "hfopenllm_v2/MMLU-PRO": 0.2244 } }, { "id": "qingy2024/Qwen2.5-Math-14B-Instruct-Alpha", "name": "Qwen2.5-Math-14B-Instruct-Alpha", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7704, "hfopenllm_v2/BBH": 0.6465, "hfopenllm_v2/MATH Level 5": 0.429, "hfopenllm_v2/GPQA": 0.349, "hfopenllm_v2/MUSR": 0.4021, "hfopenllm_v2/MMLU-PRO": 0.4966 } }, { "id": "qingy2024/Qwen2.5-Math-14B-Instruct-Preview", "name": "Qwen2.5-Math-14B-Instruct-Preview", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7826, "hfopenllm_v2/BBH": 0.6294, "hfopenllm_v2/MATH Level 5": 0.4758, "hfopenllm_v2/GPQA": 0.3406, "hfopenllm_v2/MUSR": 0.4115, "hfopenllm_v2/MMLU-PRO": 0.4993 } }, { "id": "qingy2024/Qwen2.6-14B-Instruct", "name": "Qwen2.6-14B-Instruct", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5811, "hfopenllm_v2/BBH": 0.6394, "hfopenllm_v2/MATH Level 5": 0.3051, "hfopenllm_v2/GPQA": 0.3792, "hfopenllm_v2/MUSR": 0.4569, "hfopenllm_v2/MMLU-PRO": 0.5285 } }, { "id": "qingy2024/Qwen2.6-Math-14B-Instruct", "name": "Qwen2.6-Math-14B-Instruct", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3862, "hfopenllm_v2/BBH": 0.6324, "hfopenllm_v2/MATH Level 5": 0.429, "hfopenllm_v2/GPQA": 0.37, "hfopenllm_v2/MUSR": 0.4759, "hfopenllm_v2/MMLU-PRO": 0.5241 } }, { "id": "qingy2024/QwEnlarge-16B-Instruct", "name": "QwEnlarge-16B-Instruct", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7802, "hfopenllm_v2/BBH": 0.5949, "hfopenllm_v2/MATH Level 5": 0.46, "hfopenllm_v2/GPQA": 0.3331, "hfopenllm_v2/MUSR": 0.4101, "hfopenllm_v2/MMLU-PRO": 0.4476 } }, { "id": "qingy2024/QwQ-14B-Math-v0.2", "name": "QwQ-14B-Math-v0.2", "developer": "qingy2024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3391, "hfopenllm_v2/BBH": 0.5731, "hfopenllm_v2/MATH Level 5": 0.4811, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.4021, "hfopenllm_v2/MMLU-PRO": 0.48 } }, { "id": "qq8933/OpenLongCoT-Base-Gemma2-2B", "name": "OpenLongCoT-Base-Gemma2-2B", "developer": "qq8933", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1965, "hfopenllm_v2/BBH": 0.3106, "hfopenllm_v2/MATH Level 5": 0.0234, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3222, "hfopenllm_v2/MMLU-PRO": 0.1316 } }, { "id": "Quazim0t0/1up-14b", "name": "1up-14b", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6888, "hfopenllm_v2/BBH": 0.6921, "hfopenllm_v2/MATH Level 5": 0.4162, "hfopenllm_v2/GPQA": 0.3624, "hfopenllm_v2/MUSR": 0.4583, "hfopenllm_v2/MMLU-PRO": 0.5406 } }, { "id": "Quazim0t0/Adamant-14B-sce", "name": "Adamant-14B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6858, "hfopenllm_v2/BBH": 0.6859, "hfopenllm_v2/MATH Level 5": 0.3988, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.4558, "hfopenllm_v2/MMLU-PRO": 0.5372 } }, { "id": "Quazim0t0/Alice-14B", "name": "Alice-14B", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6836, "hfopenllm_v2/BBH": 0.6938, "hfopenllm_v2/MATH Level 5": 0.4569, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4479, "hfopenllm_v2/MMLU-PRO": 0.5419 } }, { "id": "Quazim0t0/Alien-CoT-14B-sce", "name": "Alien-CoT-14B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0749, "hfopenllm_v2/BBH": 0.6395, "hfopenllm_v2/MATH Level 5": 0.5204, "hfopenllm_v2/GPQA": 0.3918, "hfopenllm_v2/MUSR": 0.4785, "hfopenllm_v2/MMLU-PRO": 0.517 } }, { "id": "Quazim0t0/Aura-8B-Linear", "name": "Aura-8B-Linear", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7948, "hfopenllm_v2/BBH": 0.5074, "hfopenllm_v2/MATH Level 5": 0.1805, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3687, "hfopenllm_v2/MMLU-PRO": 0.3801 } }, { "id": "Quazim0t0/bloom-14b-stock", "name": "bloom-14b-stock", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6575, "hfopenllm_v2/BBH": 0.6878, "hfopenllm_v2/MATH Level 5": 0.4811, "hfopenllm_v2/GPQA": 0.3314, "hfopenllm_v2/MUSR": 0.431, "hfopenllm_v2/MMLU-PRO": 0.5373 } }, { "id": "Quazim0t0/caramel-14B", "name": "caramel-14B", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6745, "hfopenllm_v2/BBH": 0.6919, "hfopenllm_v2/MATH Level 5": 0.4713, "hfopenllm_v2/GPQA": 0.3448, "hfopenllm_v2/MUSR": 0.4454, "hfopenllm_v2/MMLU-PRO": 0.5436 } }, { "id": "Quazim0t0/Casa-14b-sce", "name": "Casa-14b-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6654, "hfopenllm_v2/BBH": 0.6901, "hfopenllm_v2/MATH Level 5": 0.4698, "hfopenllm_v2/GPQA": 0.3331, "hfopenllm_v2/MUSR": 0.431, "hfopenllm_v2/MMLU-PRO": 0.5426 } }, { "id": "Quazim0t0/Charlie-8B-Linear", "name": "Charlie-8B-Linear", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7381, "hfopenllm_v2/BBH": 0.5141, "hfopenllm_v2/MATH Level 5": 0.2651, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3485, "hfopenllm_v2/MMLU-PRO": 0.3573 } }, { "id": "Quazim0t0/Chromatic-8b-sce", "name": "Chromatic-8b-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5085, "hfopenllm_v2/BBH": 0.5063, "hfopenllm_v2/MATH Level 5": 0.1556, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4051, "hfopenllm_v2/MMLU-PRO": 0.3755 } }, { "id": "Quazim0t0/CoT_Phi", "name": "CoT_Phi", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6159, "hfopenllm_v2/BBH": 0.6751, "hfopenllm_v2/MATH Level 5": 0.3308, "hfopenllm_v2/GPQA": 0.3582, "hfopenllm_v2/MUSR": 0.4244, "hfopenllm_v2/MMLU-PRO": 0.4901 } }, { "id": "Quazim0t0/Dyson-14b", "name": "Dyson-14b", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5857, "hfopenllm_v2/BBH": 0.6863, "hfopenllm_v2/MATH Level 5": 0.5393, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4259, "hfopenllm_v2/MMLU-PRO": 0.5399 } }, { "id": "Quazim0t0/Edu-14B-Linear", "name": "Edu-14B-Linear", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6158, "hfopenllm_v2/BBH": 0.6758, "hfopenllm_v2/MATH Level 5": 0.2447, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4378, "hfopenllm_v2/MMLU-PRO": 0.5086 } }, { "id": "Quazim0t0/Fugazi14b", "name": "Fugazi14b", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6998, "hfopenllm_v2/BBH": 0.6941, "hfopenllm_v2/MATH Level 5": 0.4653, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4546, "hfopenllm_v2/MMLU-PRO": 0.5417 } }, { "id": "Quazim0t0/Geedorah-14B", "name": "Geedorah-14B", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6873, "hfopenllm_v2/BBH": 0.6964, "hfopenllm_v2/MATH Level 5": 0.4449, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4547, "hfopenllm_v2/MMLU-PRO": 0.5421 } }, { "id": "Quazim0t0/GivingTree-8b-sce", "name": "GivingTree-8b-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5006, "hfopenllm_v2/BBH": 0.504, "hfopenllm_v2/MATH Level 5": 0.1526, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.4051, "hfopenllm_v2/MMLU-PRO": 0.3761 } }, { "id": "Quazim0t0/graphite-14b-sce", "name": "graphite-14b-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3217, "hfopenllm_v2/BBH": 0.6631, "hfopenllm_v2/MATH Level 5": 0.3006, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.3981, "hfopenllm_v2/MMLU-PRO": 0.528 } }, { "id": "Quazim0t0/GuiltySpark-14B-ties", "name": "GuiltySpark-14B-ties", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6854, "hfopenllm_v2/BBH": 0.6914, "hfopenllm_v2/MATH Level 5": 0.3837, "hfopenllm_v2/GPQA": 0.3649, "hfopenllm_v2/MUSR": 0.4557, "hfopenllm_v2/MMLU-PRO": 0.54 } }, { "id": "Quazim0t0/GZA-14B-sce", "name": "GZA-14B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6274, "hfopenllm_v2/BBH": 0.6687, "hfopenllm_v2/MATH Level 5": 0.4721, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4285, "hfopenllm_v2/MMLU-PRO": 0.5232 } }, { "id": "Quazim0t0/Halo-14B-sce", "name": "Halo-14B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6754, "hfopenllm_v2/BBH": 0.6876, "hfopenllm_v2/MATH Level 5": 0.429, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4401, "hfopenllm_v2/MMLU-PRO": 0.5376 } }, { "id": "Quazim0t0/Heretic1.5b", "name": "Heretic1.5b", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2062, "hfopenllm_v2/BBH": 0.3529, "hfopenllm_v2/MATH Level 5": 0.244, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3511, "hfopenllm_v2/MMLU-PRO": 0.1728 } }, { "id": "Quazim0t0/Hyde-14b-sce", "name": "Hyde-14b-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6715, "hfopenllm_v2/BBH": 0.6885, "hfopenllm_v2/MATH Level 5": 0.2734, "hfopenllm_v2/GPQA": 0.3414, "hfopenllm_v2/MUSR": 0.4141, "hfopenllm_v2/MMLU-PRO": 0.53 } }, { "id": "Quazim0t0/Imagine-v0.5-16bit", "name": "Imagine-v0.5-16bit", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2759, "hfopenllm_v2/BBH": 0.6769, "hfopenllm_v2/MATH Level 5": 0.1397, "hfopenllm_v2/GPQA": 0.3649, "hfopenllm_v2/MUSR": 0.4349, "hfopenllm_v2/MMLU-PRO": 0.5354 } }, { "id": "Quazim0t0/Imbue-14b", "name": "Imbue-14b", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.52, "hfopenllm_v2/BBH": 0.6845, "hfopenllm_v2/MATH Level 5": 0.5317, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4167, "hfopenllm_v2/MMLU-PRO": 0.5402 } }, { "id": "Quazim0t0/Insom", "name": "Insom", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6818, "hfopenllm_v2/BBH": 0.6881, "hfopenllm_v2/MATH Level 5": 0.3852, "hfopenllm_v2/GPQA": 0.3498, "hfopenllm_v2/MUSR": 0.4311, "hfopenllm_v2/MMLU-PRO": 0.5352 } }, { "id": "Quazim0t0/InspectorDeck-14B-sce", "name": "InspectorDeck-14B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3241, "hfopenllm_v2/BBH": 0.6668, "hfopenllm_v2/MATH Level 5": 0.3165, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.3982, "hfopenllm_v2/MMLU-PRO": 0.5261 } }, { "id": "Quazim0t0/Jekyl-8b-sce", "name": "Jekyl-8b-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4697, "hfopenllm_v2/BBH": 0.4994, "hfopenllm_v2/MATH Level 5": 0.1616, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4197, "hfopenllm_v2/MMLU-PRO": 0.3686 } }, { "id": "Quazim0t0/Jigsaw-14B-Linear", "name": "Jigsaw-14B-Linear", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.648, "hfopenllm_v2/BBH": 0.6865, "hfopenllm_v2/MATH Level 5": 0.2651, "hfopenllm_v2/GPQA": 0.3406, "hfopenllm_v2/MUSR": 0.4483, "hfopenllm_v2/MMLU-PRO": 0.5234 } }, { "id": "Quazim0t0/Katana-8b-sce", "name": "Katana-8b-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5107, "hfopenllm_v2/BBH": 0.5075, "hfopenllm_v2/MATH Level 5": 0.1511, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4038, "hfopenllm_v2/MMLU-PRO": 0.3771 } }, { "id": "Quazim0t0/Knot-CoT-14B-sce", "name": "Knot-CoT-14B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4832, "hfopenllm_v2/BBH": 0.6616, "hfopenllm_v2/MATH Level 5": 0.3995, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.414, "hfopenllm_v2/MMLU-PRO": 0.5154 } }, { "id": "Quazim0t0/Lineage-14B", "name": "Lineage-14B", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.707, "hfopenllm_v2/BBH": 0.6934, "hfopenllm_v2/MATH Level 5": 0.4245, "hfopenllm_v2/GPQA": 0.3599, "hfopenllm_v2/MUSR": 0.4597, "hfopenllm_v2/MMLU-PRO": 0.5411 } }, { "id": "Quazim0t0/Lo-Phi-14b", "name": "Lo-Phi-14b", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4941, "hfopenllm_v2/BBH": 0.6852, "hfopenllm_v2/MATH Level 5": 0.5196, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4232, "hfopenllm_v2/MMLU-PRO": 0.5369 } }, { "id": "Quazim0t0/Loke-14B-sce", "name": "Loke-14B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6848, "hfopenllm_v2/BBH": 0.6924, "hfopenllm_v2/MATH Level 5": 0.3905, "hfopenllm_v2/GPQA": 0.3649, "hfopenllm_v2/MUSR": 0.4637, "hfopenllm_v2/MMLU-PRO": 0.5401 } }, { "id": "Quazim0t0/Math_Phi4_Reason", "name": "Math_Phi4_Reason", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.322, "hfopenllm_v2/BBH": 0.624, "hfopenllm_v2/MATH Level 5": 0.3278, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4034, "hfopenllm_v2/MMLU-PRO": 0.503 } }, { "id": "Quazim0t0/MFDOOM-14B", "name": "MFDOOM-14B", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6736, "hfopenllm_v2/BBH": 0.6916, "hfopenllm_v2/MATH Level 5": 0.5264, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4377, "hfopenllm_v2/MMLU-PRO": 0.5426 } }, { "id": "Quazim0t0/MFGRIMM-14B", "name": "MFGRIMM-14B", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6894, "hfopenllm_v2/BBH": 0.6909, "hfopenllm_v2/MATH Level 5": 0.506, "hfopenllm_v2/GPQA": 0.3339, "hfopenllm_v2/MUSR": 0.4361, "hfopenllm_v2/MMLU-PRO": 0.5416 } }, { "id": "Quazim0t0/Mithril-14B-sce", "name": "Mithril-14B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6958, "hfopenllm_v2/BBH": 0.6926, "hfopenllm_v2/MATH Level 5": 0.3822, "hfopenllm_v2/GPQA": 0.3691, "hfopenllm_v2/MUSR": 0.4611, "hfopenllm_v2/MMLU-PRO": 0.5403 } }, { "id": "Quazim0t0/mocha-14B", "name": "mocha-14B", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5893, "hfopenllm_v2/BBH": 0.6895, "hfopenllm_v2/MATH Level 5": 0.5264, "hfopenllm_v2/GPQA": 0.3305, "hfopenllm_v2/MUSR": 0.4272, "hfopenllm_v2/MMLU-PRO": 0.5384 } }, { "id": "Quazim0t0/Mononoke-14B-sce", "name": "Mononoke-14B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3502, "hfopenllm_v2/BBH": 0.6744, "hfopenllm_v2/MATH Level 5": 0.4698, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4155, "hfopenllm_v2/MMLU-PRO": 0.5298 } }, { "id": "Quazim0t0/mosaic-14b-sce", "name": "mosaic-14b-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6876, "hfopenllm_v2/BBH": 0.6907, "hfopenllm_v2/MATH Level 5": 0.4026, "hfopenllm_v2/GPQA": 0.3624, "hfopenllm_v2/MUSR": 0.4558, "hfopenllm_v2/MMLU-PRO": 0.5396 } }, { "id": "Quazim0t0/Motion-8B-Linear", "name": "Motion-8B-Linear", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7686, "hfopenllm_v2/BBH": 0.5084, "hfopenllm_v2/MATH Level 5": 0.1888, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3606, "hfopenllm_v2/MMLU-PRO": 0.3785 } }, { "id": "Quazim0t0/Mouse-9B", "name": "Mouse-9B", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1325, "hfopenllm_v2/BBH": 0.2979, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.347, "hfopenllm_v2/MMLU-PRO": 0.1139 } }, { "id": "Quazim0t0/Nova-14b-sce", "name": "Nova-14b-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7022, "hfopenllm_v2/BBH": 0.6935, "hfopenllm_v2/MATH Level 5": 0.4162, "hfopenllm_v2/GPQA": 0.3633, "hfopenllm_v2/MUSR": 0.4571, "hfopenllm_v2/MMLU-PRO": 0.5413 } }, { "id": "Quazim0t0/NovaScotia-14b-stock", "name": "NovaScotia-14b-stock", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6787, "hfopenllm_v2/BBH": 0.6935, "hfopenllm_v2/MATH Level 5": 0.463, "hfopenllm_v2/GPQA": 0.349, "hfopenllm_v2/MUSR": 0.4493, "hfopenllm_v2/MMLU-PRO": 0.5409 } }, { "id": "Quazim0t0/Oasis-14B-ties", "name": "Oasis-14B-ties", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6937, "hfopenllm_v2/BBH": 0.6915, "hfopenllm_v2/MATH Level 5": 0.3754, "hfopenllm_v2/GPQA": 0.3649, "hfopenllm_v2/MUSR": 0.4571, "hfopenllm_v2/MMLU-PRO": 0.5405 } }, { "id": "Quazim0t0/ODB-14B-sce", "name": "ODB-14B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2922, "hfopenllm_v2/BBH": 0.6559, "hfopenllm_v2/MATH Level 5": 0.2545, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3929, "hfopenllm_v2/MMLU-PRO": 0.5207 } }, { "id": "Quazim0t0/Origami-14B-sce", "name": "Origami-14B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3259, "hfopenllm_v2/BBH": 0.662, "hfopenllm_v2/MATH Level 5": 0.2915, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4035, "hfopenllm_v2/MMLU-PRO": 0.5244 } }, { "id": "Quazim0t0/Phi4.Turn.R1Distill.16bit", "name": "Phi4.Turn.R1Distill.16bit", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3126, "hfopenllm_v2/BBH": 0.6563, "hfopenllm_v2/MATH Level 5": 0.2311, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3902, "hfopenllm_v2/MMLU-PRO": 0.5257 } }, { "id": "Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors", "name": "Phi4.Turn.R1Distill_v1.5.1-Tensors", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2995, "hfopenllm_v2/BBH": 0.6456, "hfopenllm_v2/MATH Level 5": 0.219, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3929, "hfopenllm_v2/MMLU-PRO": 0.5117 } }, { "id": "Quazim0t0/Phi4Basis-14B-sce", "name": "Phi4Basis-14B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6502, "hfopenllm_v2/BBH": 0.6909, "hfopenllm_v2/MATH Level 5": 0.4789, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4338, "hfopenllm_v2/MMLU-PRO": 0.539 } }, { "id": "Quazim0t0/Ponder-14B-linear", "name": "Ponder-14B-linear", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6906, "hfopenllm_v2/BBH": 0.6943, "hfopenllm_v2/MATH Level 5": 0.4282, "hfopenllm_v2/GPQA": 0.3582, "hfopenllm_v2/MUSR": 0.4558, "hfopenllm_v2/MMLU-PRO": 0.5408 } }, { "id": "Quazim0t0/Rosemary-14b", "name": "Rosemary-14b", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6915, "hfopenllm_v2/BBH": 0.6955, "hfopenllm_v2/MATH Level 5": 0.4388, "hfopenllm_v2/GPQA": 0.3565, "hfopenllm_v2/MUSR": 0.4492, "hfopenllm_v2/MMLU-PRO": 0.5396 } }, { "id": "Quazim0t0/Rune-14b", "name": "Rune-14b", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7016, "hfopenllm_v2/BBH": 0.6937, "hfopenllm_v2/MATH Level 5": 0.4585, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4533, "hfopenllm_v2/MMLU-PRO": 0.5411 } }, { "id": "Quazim0t0/RZA-14B-sce", "name": "RZA-14B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4774, "hfopenllm_v2/BBH": 0.6686, "hfopenllm_v2/MATH Level 5": 0.5189, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4113, "hfopenllm_v2/MMLU-PRO": 0.5383 } }, { "id": "Quazim0t0/Sake-20b", "name": "Sake-20b", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6693, "hfopenllm_v2/BBH": 0.677, "hfopenllm_v2/MATH Level 5": 0.4653, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4494, "hfopenllm_v2/MMLU-PRO": 0.5391 } }, { "id": "Quazim0t0/Spok-14b-sce", "name": "Spok-14b-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6682, "hfopenllm_v2/BBH": 0.6899, "hfopenllm_v2/MATH Level 5": 0.2719, "hfopenllm_v2/GPQA": 0.3456, "hfopenllm_v2/MUSR": 0.4141, "hfopenllm_v2/MMLU-PRO": 0.5298 } }, { "id": "Quazim0t0/Sumatra-20b", "name": "Sumatra-20b", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6738, "hfopenllm_v2/BBH": 0.6855, "hfopenllm_v2/MATH Level 5": 0.3671, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.456, "hfopenllm_v2/MMLU-PRO": 0.5415 } }, { "id": "Quazim0t0/SuperNova14b", "name": "SuperNova14b", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7076, "hfopenllm_v2/BBH": 0.6937, "hfopenllm_v2/MATH Level 5": 0.4396, "hfopenllm_v2/GPQA": 0.3523, "hfopenllm_v2/MUSR": 0.4545, "hfopenllm_v2/MMLU-PRO": 0.5435 } }, { "id": "Quazim0t0/SZA-14B-sce", "name": "SZA-14B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5659, "hfopenllm_v2/BBH": 0.6889, "hfopenllm_v2/MATH Level 5": 0.5242, "hfopenllm_v2/GPQA": 0.3305, "hfopenllm_v2/MUSR": 0.4339, "hfopenllm_v2/MMLU-PRO": 0.5353 } }, { "id": "Quazim0t0/TB0-8B-sce", "name": "TB0-8B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5107, "hfopenllm_v2/BBH": 0.5075, "hfopenllm_v2/MATH Level 5": 0.1511, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4038, "hfopenllm_v2/MMLU-PRO": 0.3771 } }, { "id": "Quazim0t0/TBL-8B-sce", "name": "TBL-8B-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4581, "hfopenllm_v2/BBH": 0.5008, "hfopenllm_v2/MATH Level 5": 0.1533, "hfopenllm_v2/GPQA": 0.3339, "hfopenllm_v2/MUSR": 0.4236, "hfopenllm_v2/MMLU-PRO": 0.3689 } }, { "id": "Quazim0t0/tesseract-14b-stock", "name": "tesseract-14b-stock", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5848, "hfopenllm_v2/BBH": 0.688, "hfopenllm_v2/MATH Level 5": 0.5144, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.4232, "hfopenllm_v2/MMLU-PRO": 0.5389 } }, { "id": "Quazim0t0/ThinkPhi1.1-Tensors", "name": "ThinkPhi1.1-Tensors", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3908, "hfopenllm_v2/BBH": 0.6449, "hfopenllm_v2/MATH Level 5": 0.182, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.418, "hfopenllm_v2/MMLU-PRO": 0.4908 } }, { "id": "Quazim0t0/time-14b-stock", "name": "time-14b-stock", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6699, "hfopenllm_v2/BBH": 0.6897, "hfopenllm_v2/MATH Level 5": 0.5083, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.4323, "hfopenllm_v2/MMLU-PRO": 0.5419 } }, { "id": "Quazim0t0/Venti-20b", "name": "Venti-20b", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6641, "hfopenllm_v2/BBH": 0.6901, "hfopenllm_v2/MATH Level 5": 0.3391, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.448, "hfopenllm_v2/MMLU-PRO": 0.5386 } }, { "id": "Quazim0t0/Venti-Blend-sce", "name": "Venti-Blend-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6879, "hfopenllm_v2/BBH": 0.6843, "hfopenllm_v2/MATH Level 5": 0.4056, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4389, "hfopenllm_v2/MMLU-PRO": 0.5414 } }, { "id": "Quazim0t0/Vine-14b-sce", "name": "Vine-14b-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6733, "hfopenllm_v2/BBH": 0.6891, "hfopenllm_v2/MATH Level 5": 0.5008, "hfopenllm_v2/GPQA": 0.3339, "hfopenllm_v2/MUSR": 0.4323, "hfopenllm_v2/MMLU-PRO": 0.5408 } }, { "id": "Quazim0t0/Wendy-14B", "name": "Wendy-14B", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6772, "hfopenllm_v2/BBH": 0.6958, "hfopenllm_v2/MATH Level 5": 0.4834, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.4428, "hfopenllm_v2/MMLU-PRO": 0.5435 } }, { "id": "Quazim0t0/Wu-14b-sce", "name": "Wu-14b-sce", "developer": "Quazim0t0", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6718, "hfopenllm_v2/BBH": 0.6885, "hfopenllm_v2/MATH Level 5": 0.2613, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.4114, "hfopenllm_v2/MMLU-PRO": 0.5293 } }, { "id": "Qwen/Qwen1.5-0.5B", "name": "Qwen1.5-0.5B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1706, "hfopenllm_v2/BBH": 0.3154, "hfopenllm_v2/MATH Level 5": 0.0174, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3616, "hfopenllm_v2/MMLU-PRO": 0.1307 } }, { "id": "Qwen/Qwen1.5-0.5B-Chat", "name": "Qwen/Qwen1.5-0.5B-Chat", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1807, "hfopenllm_v2/BBH": 0.3167, "hfopenllm_v2/MATH Level 5": 0.0068, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3837, "hfopenllm_v2/MMLU-PRO": 0.1213, "reward-bench/Score": 0.5298, "reward-bench/Chat": 0.3547, "reward-bench/Chat Hard": 0.6294, "reward-bench/Safety": 0.5703, "reward-bench/Reasoning": 0.5984, "reward-bench/Prior Sets (0.5 weight)": 0.4629 } }, { "id": "Qwen/Qwen1.5-1.8B", "name": "Qwen1.5-1.8B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2154, "hfopenllm_v2/BBH": 0.3476, "hfopenllm_v2/MATH Level 5": 0.0317, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.3605, "hfopenllm_v2/MMLU-PRO": 0.1882 } }, { "id": "Qwen/Qwen1.5-1.8B-Chat", "name": "Qwen/Qwen1.5-1.8B-Chat", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2019, "hfopenllm_v2/BBH": 0.3256, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.426, "hfopenllm_v2/MMLU-PRO": 0.1804, "reward-bench/Score": 0.589, "reward-bench/Chat": 0.5615, "reward-bench/Chat Hard": 0.6031, "reward-bench/Safety": 0.4838, "reward-bench/Reasoning": 0.7793, "reward-bench/Prior Sets (0.5 weight)": 0.4453 } }, { "id": "Qwen/Qwen1.5-110B", "name": "Qwen1.5-110B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3422, "hfopenllm_v2/BBH": 0.61, "hfopenllm_v2/MATH Level 5": 0.247, "hfopenllm_v2/GPQA": 0.3523, "hfopenllm_v2/MUSR": 0.4408, "hfopenllm_v2/MMLU-PRO": 0.5361 } }, { "id": "qwen/qwen1.5-110b-chat", "name": "Qwen1.5 Chat 110B", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.55, "helm_lite/NarrativeQA": 0.721, "helm_lite/NaturalQuestions (closed-book)": 0.35, "helm_lite/OpenbookQA": 0.922, "helm_lite/MMLU": 0.704, "helm_lite/MATH": 0.568, "helm_lite/GSM8K": 0.815, "helm_lite/LegalBench": 0.624, "helm_lite/MedQA": 0.64, "helm_lite/WMT 2014": 0.192, "helm_mmlu/MMLU All Subjects": 0.768, "helm_mmlu/Abstract Algebra": 0.57, "helm_mmlu/Anatomy": 0.696, "helm_mmlu/College Physics": 0.51, "helm_mmlu/Computer Security": 0.82, "helm_mmlu/Econometrics": 0.64, "helm_mmlu/Global Facts": 0.51, "helm_mmlu/Jurisprudence": 0.833, "helm_mmlu/Philosophy": 0.823, "helm_mmlu/Professional Psychology": 0.82, "helm_mmlu/Us Foreign Policy": 0.87, "helm_mmlu/Astronomy": 0.901, "helm_mmlu/Business Ethics": 0.8, "helm_mmlu/Clinical Knowledge": 0.766, "helm_mmlu/Conceptual Physics": 0.838, "helm_mmlu/Electrical Engineering": 0.752, "helm_mmlu/Elementary Mathematics": 0.669, "helm_mmlu/Formal Logic": 0.643, "helm_mmlu/High School World History": 0.903, "helm_mmlu/Human Sexuality": 0.855, "helm_mmlu/International Law": 0.876, "helm_mmlu/Logical Fallacies": 0.828, "helm_mmlu/Machine Learning": 0.634, "helm_mmlu/Management": 0.835, "helm_mmlu/Marketing": 0.919, "helm_mmlu/Medical Genetics": 0.85, "helm_mmlu/Miscellaneous": 0.934, "helm_mmlu/Moral Scenarios": 0.783, "helm_mmlu/Nutrition": 0.804, "helm_mmlu/Prehistory": 0.867, "helm_mmlu/Public Relations": 0.773, "helm_mmlu/Security Studies": 0.735, "helm_mmlu/Sociology": 0.866, "helm_mmlu/Virology": 0.542, "helm_mmlu/World Religions": 0.871, "helm_mmlu/Mean win rate": 0.875, "hfopenllm_v2/IFEval": 0.5939, "hfopenllm_v2/BBH": 0.6184, "hfopenllm_v2/MATH Level 5": 0.2341, "hfopenllm_v2/GPQA": 0.3414, "hfopenllm_v2/MUSR": 0.4522, "hfopenllm_v2/MMLU-PRO": 0.4825 } }, { "id": "qwen/qwen1.5-14b", "name": "Qwen1.5 14B", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.425, "helm_lite/NarrativeQA": 0.711, "helm_lite/NaturalQuestions (closed-book)": 0.3, "helm_lite/OpenbookQA": 0.862, "helm_lite/MMLU": 0.626, "helm_lite/MATH": 0.686, "helm_lite/GSM8K": 0.693, "helm_lite/LegalBench": 0.593, "helm_lite/MedQA": 0.515, "helm_lite/WMT 2014": 0.178, "helm_mmlu/MMLU All Subjects": 0.686, "helm_mmlu/Abstract Algebra": 0.4, "helm_mmlu/Anatomy": 0.637, "helm_mmlu/College Physics": 0.48, "helm_mmlu/Computer Security": 0.84, "helm_mmlu/Econometrics": 0.561, "helm_mmlu/Global Facts": 0.49, "helm_mmlu/Jurisprudence": 0.769, "helm_mmlu/Philosophy": 0.717, "helm_mmlu/Professional Psychology": 0.699, "helm_mmlu/Us Foreign Policy": 0.87, "helm_mmlu/Astronomy": 0.724, "helm_mmlu/Business Ethics": 0.75, "helm_mmlu/Clinical Knowledge": 0.736, "helm_mmlu/Conceptual Physics": 0.694, "helm_mmlu/Electrical Engineering": 0.683, "helm_mmlu/Elementary Mathematics": 0.603, "helm_mmlu/Formal Logic": 0.492, "helm_mmlu/High School World History": 0.84, "helm_mmlu/Human Sexuality": 0.756, "helm_mmlu/International Law": 0.826, "helm_mmlu/Logical Fallacies": 0.736, "helm_mmlu/Machine Learning": 0.509, "helm_mmlu/Management": 0.816, "helm_mmlu/Marketing": 0.893, "helm_mmlu/Medical Genetics": 0.76, "helm_mmlu/Miscellaneous": 0.835, "helm_mmlu/Moral Scenarios": 0.368, "helm_mmlu/Nutrition": 0.742, "helm_mmlu/Prehistory": 0.71, "helm_mmlu/Public Relations": 0.655, "helm_mmlu/Security Studies": 0.8, "helm_mmlu/Sociology": 0.841, "helm_mmlu/Virology": 0.458, "helm_mmlu/World Religions": 0.842, "helm_mmlu/Mean win rate": 0.796, "hfopenllm_v2/IFEval": 0.2905, "hfopenllm_v2/BBH": 0.508, "hfopenllm_v2/MATH Level 5": 0.2024, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4186, "hfopenllm_v2/MMLU-PRO": 0.3644 } }, { "id": "Qwen/Qwen1.5-14B-Chat", "name": "Qwen/Qwen1.5-14B-Chat", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4768, "hfopenllm_v2/BBH": 0.5229, "hfopenllm_v2/MATH Level 5": 0.1526, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.44, "hfopenllm_v2/MMLU-PRO": 0.3618, "reward-bench/Score": 0.6864, "reward-bench/Chat": 0.5726, "reward-bench/Chat Hard": 0.7018, "reward-bench/Safety": 0.7122, "reward-bench/Reasoning": 0.8961, "reward-bench/Prior Sets (0.5 weight)": 0.4123 } }, { "id": "qwen/qwen1.5-32b", "name": "Qwen1.5 32B", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.546, "helm_lite/NarrativeQA": 0.589, "helm_lite/NaturalQuestions (closed-book)": 0.353, "helm_lite/OpenbookQA": 0.932, "helm_lite/MMLU": 0.628, "helm_lite/MATH": 0.733, "helm_lite/GSM8K": 0.773, "helm_lite/LegalBench": 0.636, "helm_lite/MedQA": 0.656, "helm_lite/WMT 2014": 0.193, "helm_mmlu/MMLU All Subjects": 0.744, "helm_mmlu/Abstract Algebra": 0.4, "helm_mmlu/Anatomy": 0.644, "helm_mmlu/College Physics": 0.51, "helm_mmlu/Computer Security": 0.77, "helm_mmlu/Econometrics": 0.561, "helm_mmlu/Global Facts": 0.47, "helm_mmlu/Jurisprudence": 0.843, "helm_mmlu/Philosophy": 0.826, "helm_mmlu/Professional Psychology": 0.75, "helm_mmlu/Us Foreign Policy": 0.91, "helm_mmlu/Astronomy": 0.855, "helm_mmlu/Business Ethics": 0.77, "helm_mmlu/Clinical Knowledge": 0.781, "helm_mmlu/Conceptual Physics": 0.766, "helm_mmlu/Electrical Engineering": 0.731, "helm_mmlu/Elementary Mathematics": 0.685, "helm_mmlu/Formal Logic": 0.524, "helm_mmlu/High School World History": 0.869, "helm_mmlu/Human Sexuality": 0.847, "helm_mmlu/International Law": 0.884, "helm_mmlu/Logical Fallacies": 0.822, "helm_mmlu/Machine Learning": 0.616, "helm_mmlu/Management": 0.874, "helm_mmlu/Marketing": 0.936, "helm_mmlu/Medical Genetics": 0.85, "helm_mmlu/Miscellaneous": 0.884, "helm_mmlu/Moral Scenarios": 0.545, "helm_mmlu/Nutrition": 0.81, "helm_mmlu/Prehistory": 0.83, "helm_mmlu/Public Relations": 0.664, "helm_mmlu/Security Studies": 0.829, "helm_mmlu/Sociology": 0.881, "helm_mmlu/Virology": 0.578, "helm_mmlu/World Religions": 0.854, "helm_mmlu/Mean win rate": 0.624, "hfopenllm_v2/IFEval": 0.3297, "hfopenllm_v2/BBH": 0.5715, "hfopenllm_v2/MATH Level 5": 0.3029, "hfopenllm_v2/GPQA": 0.3297, "hfopenllm_v2/MUSR": 0.4278, "hfopenllm_v2/MMLU-PRO": 0.45 } }, { "id": "Qwen/Qwen1.5-32B-Chat", "name": "Qwen1.5-32B-Chat", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5532, "hfopenllm_v2/BBH": 0.6067, "hfopenllm_v2/MATH Level 5": 0.1956, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.416, "hfopenllm_v2/MMLU-PRO": 0.4457 } }, { "id": "Qwen/Qwen1.5-4B", "name": "Qwen1.5-4B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2445, "hfopenllm_v2/BBH": 0.4054, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.3604, "hfopenllm_v2/MMLU-PRO": 0.246 } }, { "id": "Qwen/Qwen1.5-4B-Chat", "name": "Qwen/Qwen1.5-4B-Chat", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3157, "hfopenllm_v2/BBH": 0.4006, "hfopenllm_v2/MATH Level 5": 0.0279, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3978, "hfopenllm_v2/MMLU-PRO": 0.2396, "reward-bench/Score": 0.5477, "reward-bench/Chat": 0.3883, "reward-bench/Chat Hard": 0.6272, "reward-bench/Safety": 0.5568, "reward-bench/Reasoning": 0.6689, "reward-bench/Prior Sets (0.5 weight)": 0.447 } }, { "id": "qwen/qwen1.5-72b", "name": "Qwen1.5 72B", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.608, "helm_lite/NarrativeQA": 0.601, "helm_lite/NaturalQuestions (closed-book)": 0.417, "helm_lite/OpenbookQA": 0.93, "helm_lite/MMLU": 0.647, "helm_lite/MATH": 0.683, "helm_lite/GSM8K": 0.799, "helm_lite/LegalBench": 0.694, "helm_lite/MedQA": 0.67, "helm_lite/WMT 2014": 0.201, "helm_mmlu/MMLU All Subjects": 0.774, "helm_mmlu/Abstract Algebra": 0.44, "helm_mmlu/Anatomy": 0.733, "helm_mmlu/College Physics": 0.559, "helm_mmlu/Computer Security": 0.81, "helm_mmlu/Econometrics": 0.544, "helm_mmlu/Global Facts": 0.56, "helm_mmlu/Jurisprudence": 0.824, "helm_mmlu/Philosophy": 0.83, "helm_mmlu/Professional Psychology": 0.809, "helm_mmlu/Us Foreign Policy": 0.94, "helm_mmlu/Astronomy": 0.868, "helm_mmlu/Business Ethics": 0.79, "helm_mmlu/Clinical Knowledge": 0.834, "helm_mmlu/Conceptual Physics": 0.821, "helm_mmlu/Electrical Engineering": 0.779, "helm_mmlu/Elementary Mathematics": 0.696, "helm_mmlu/Formal Logic": 0.556, "helm_mmlu/High School World History": 0.899, "helm_mmlu/Human Sexuality": 0.878, "helm_mmlu/International Law": 0.909, "helm_mmlu/Logical Fallacies": 0.853, "helm_mmlu/Machine Learning": 0.67, "helm_mmlu/Management": 0.854, "helm_mmlu/Marketing": 0.949, "helm_mmlu/Medical Genetics": 0.87, "helm_mmlu/Miscellaneous": 0.921, "helm_mmlu/Moral Scenarios": 0.669, "helm_mmlu/Nutrition": 0.859, "helm_mmlu/Prehistory": 0.88, "helm_mmlu/Public Relations": 0.755, "helm_mmlu/Security Studies": 0.824, "helm_mmlu/Sociology": 0.9, "helm_mmlu/Virology": 0.584, "helm_mmlu/World Religions": 0.883, "helm_mmlu/Mean win rate": 0.65 } }, { "id": "Qwen/Qwen1.5-72B-Chat", "name": "Qwen/Qwen1.5-72B-Chat", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6723, "reward-bench/Chat": 0.6229, "reward-bench/Chat Hard": 0.6601, "reward-bench/Safety": 0.6757, "reward-bench/Reasoning": 0.8554, "reward-bench/Prior Sets (0.5 weight)": 0.4226 } }, { "id": "qwen/qwen1.5-7b", "name": "Qwen1.5 7B", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.275, "helm_lite/NarrativeQA": 0.448, "helm_lite/NaturalQuestions (closed-book)": 0.27, "helm_lite/OpenbookQA": 0.806, "helm_lite/MMLU": 0.569, "helm_lite/MATH": 0.561, "helm_lite/GSM8K": 0.6, "helm_lite/LegalBench": 0.523, "helm_lite/MedQA": 0.479, "helm_lite/WMT 2014": 0.153, "helm_mmlu/MMLU All Subjects": 0.626, "helm_mmlu/Abstract Algebra": 0.39, "helm_mmlu/Anatomy": 0.526, "helm_mmlu/College Physics": 0.471, "helm_mmlu/Computer Security": 0.76, "helm_mmlu/Econometrics": 0.447, "helm_mmlu/Global Facts": 0.4, "helm_mmlu/Jurisprudence": 0.778, "helm_mmlu/Philosophy": 0.691, "helm_mmlu/Professional Psychology": 0.603, "helm_mmlu/Us Foreign Policy": 0.84, "helm_mmlu/Astronomy": 0.671, "helm_mmlu/Business Ethics": 0.69, "helm_mmlu/Clinical Knowledge": 0.691, "helm_mmlu/Conceptual Physics": 0.579, "helm_mmlu/Electrical Engineering": 0.572, "helm_mmlu/Elementary Mathematics": 0.5, "helm_mmlu/Formal Logic": 0.397, "helm_mmlu/High School World History": 0.789, "helm_mmlu/Human Sexuality": 0.695, "helm_mmlu/International Law": 0.76, "helm_mmlu/Logical Fallacies": 0.706, "helm_mmlu/Machine Learning": 0.411, "helm_mmlu/Management": 0.816, "helm_mmlu/Marketing": 0.863, "helm_mmlu/Medical Genetics": 0.69, "helm_mmlu/Miscellaneous": 0.765, "helm_mmlu/Moral Scenarios": 0.372, "helm_mmlu/Nutrition": 0.696, "helm_mmlu/Prehistory": 0.688, "helm_mmlu/Public Relations": 0.627, "helm_mmlu/Security Studies": 0.727, "helm_mmlu/Sociology": 0.836, "helm_mmlu/Virology": 0.488, "helm_mmlu/World Religions": 0.778, "helm_mmlu/Mean win rate": 0.843, "hfopenllm_v2/IFEval": 0.2684, "hfopenllm_v2/BBH": 0.456, "hfopenllm_v2/MATH Level 5": 0.0929, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4103, "hfopenllm_v2/MMLU-PRO": 0.2916 } }, { "id": "Qwen/Qwen1.5-7B-Chat", "name": "Qwen/Qwen1.5-7B-Chat", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4371, "hfopenllm_v2/BBH": 0.451, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.3779, "hfopenllm_v2/MMLU-PRO": 0.2951, "reward-bench/Score": 0.675, "reward-bench/Chat": 0.5363, "reward-bench/Chat Hard": 0.6908, "reward-bench/Safety": 0.6919, "reward-bench/Reasoning": 0.9041, "reward-bench/Prior Sets (0.5 weight)": 0.4288 } }, { "id": "Qwen/Qwen1.5-MoE-A2.7B", "name": "Qwen1.5-MoE-A2.7B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.266, "hfopenllm_v2/BBH": 0.4114, "hfopenllm_v2/MATH Level 5": 0.0929, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.4013, "hfopenllm_v2/MMLU-PRO": 0.2778 } }, { "id": "Qwen/Qwen1.5-MoE-A2.7B-Chat", "name": "Qwen/Qwen1.5-MoE-A2.7B-Chat", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3795, "hfopenllm_v2/BBH": 0.4272, "hfopenllm_v2/MATH Level 5": 0.0634, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3899, "hfopenllm_v2/MMLU-PRO": 0.2923, "reward-bench/Score": 0.6644, "reward-bench/Chat": 0.7291, "reward-bench/Chat Hard": 0.6316, "reward-bench/Safety": 0.6284, "reward-bench/Reasoning": 0.774, "reward-bench/Prior Sets (0.5 weight)": 0.4536 } }, { "id": "Qwen/Qwen2-0.5B", "name": "Qwen2-0.5B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1873, "hfopenllm_v2/BBH": 0.3239, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3752, "hfopenllm_v2/MMLU-PRO": 0.172 } }, { "id": "Qwen/Qwen2-0.5B-Instruct", "name": "Qwen2-0.5B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2247, "hfopenllm_v2/BBH": 0.3173, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2466, "hfopenllm_v2/MUSR": 0.3353, "hfopenllm_v2/MMLU-PRO": 0.1531 } }, { "id": "Qwen/Qwen2-1.5B", "name": "Qwen2-1.5B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2113, "hfopenllm_v2/BBH": 0.3575, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3658, "hfopenllm_v2/MMLU-PRO": 0.2552 } }, { "id": "Qwen/Qwen2-1.5B-Instruct", "name": "Qwen2-1.5B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3371, "hfopenllm_v2/BBH": 0.3852, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.4293, "hfopenllm_v2/MMLU-PRO": 0.2501 } }, { "id": "Qwen/Qwen2-57B-A14B", "name": "Qwen2-57B-A14B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3113, "hfopenllm_v2/BBH": 0.5618, "hfopenllm_v2/MATH Level 5": 0.1866, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4174, "hfopenllm_v2/MMLU-PRO": 0.4916 } }, { "id": "Qwen/Qwen2-57B-A14B-Instruct", "name": "Qwen2-57B-A14B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6338, "hfopenllm_v2/BBH": 0.5888, "hfopenllm_v2/MATH Level 5": 0.2817, "hfopenllm_v2/GPQA": 0.3314, "hfopenllm_v2/MUSR": 0.4361, "hfopenllm_v2/MMLU-PRO": 0.4575 } }, { "id": "Qwen/Qwen2-72B", "name": "Qwen2-72B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3824, "hfopenllm_v2/BBH": 0.6617, "hfopenllm_v2/MATH Level 5": 0.3112, "hfopenllm_v2/GPQA": 0.3943, "hfopenllm_v2/MUSR": 0.4704, "hfopenllm_v2/MMLU-PRO": 0.5731 } }, { "id": "qwen/qwen2-72b-instruct", "name": "Qwen2 Instruct 72B", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.77, "helm_lite/NarrativeQA": 0.727, "helm_lite/NaturalQuestions (closed-book)": 0.39, "helm_lite/OpenbookQA": 0.954, "helm_lite/MMLU": 0.769, "helm_lite/MATH": 0.79, "helm_lite/GSM8K": 0.92, "helm_lite/LegalBench": 0.712, "helm_lite/MedQA": 0.746, "helm_lite/WMT 2014": 0.207, "helm_mmlu/MMLU All Subjects": 0.824, "helm_mmlu/Abstract Algebra": 0.67, "helm_mmlu/Anatomy": 0.793, "helm_mmlu/College Physics": 0.598, "helm_mmlu/Computer Security": 0.85, "helm_mmlu/Econometrics": 0.737, "helm_mmlu/Global Facts": 0.58, "helm_mmlu/Jurisprudence": 0.87, "helm_mmlu/Philosophy": 0.859, "helm_mmlu/Professional Psychology": 0.886, "helm_mmlu/Us Foreign Policy": 0.94, "helm_mmlu/Astronomy": 0.934, "helm_mmlu/Business Ethics": 0.82, "helm_mmlu/Clinical Knowledge": 0.868, "helm_mmlu/Conceptual Physics": 0.872, "helm_mmlu/Electrical Engineering": 0.793, "helm_mmlu/Elementary Mathematics": 0.825, "helm_mmlu/Formal Logic": 0.667, "helm_mmlu/High School World History": 0.932, "helm_mmlu/Human Sexuality": 0.893, "helm_mmlu/International Law": 0.893, "helm_mmlu/Logical Fallacies": 0.914, "helm_mmlu/Machine Learning": 0.768, "helm_mmlu/Management": 0.903, "helm_mmlu/Marketing": 0.953, "helm_mmlu/Medical Genetics": 0.9, "helm_mmlu/Miscellaneous": 0.943, "helm_mmlu/Moral Scenarios": 0.815, "helm_mmlu/Nutrition": 0.902, "helm_mmlu/Prehistory": 0.914, "helm_mmlu/Public Relations": 0.745, "helm_mmlu/Security Studies": 0.837, "helm_mmlu/Sociology": 0.935, "helm_mmlu/Virology": 0.56, "helm_mmlu/World Religions": 0.848, "helm_mmlu/Mean win rate": 0.826, "hfopenllm_v2/IFEval": 0.7989, "hfopenllm_v2/BBH": 0.6977, "hfopenllm_v2/MATH Level 5": 0.4177, "hfopenllm_v2/GPQA": 0.3725, "hfopenllm_v2/MUSR": 0.456, "hfopenllm_v2/MMLU-PRO": 0.5403 } }, { "id": "Qwen/Qwen2-7B", "name": "Qwen2-7B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3149, "hfopenllm_v2/BBH": 0.5315, "hfopenllm_v2/MATH Level 5": 0.2039, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4439, "hfopenllm_v2/MMLU-PRO": 0.4183 } }, { "id": "Qwen/Qwen2-7B-Instruct", "name": "Qwen2-7B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5679, "hfopenllm_v2/BBH": 0.5545, "hfopenllm_v2/MATH Level 5": 0.2764, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.3928, "hfopenllm_v2/MMLU-PRO": 0.3847 } }, { "id": "Qwen/Qwen2-Math-72B-Instruct", "name": "Qwen2-Math-72B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5694, "hfopenllm_v2/BBH": 0.6343, "hfopenllm_v2/MATH Level 5": 0.5536, "hfopenllm_v2/GPQA": 0.3683, "hfopenllm_v2/MUSR": 0.4517, "hfopenllm_v2/MMLU-PRO": 0.4273 } }, { "id": "Qwen/Qwen2-Math-7B", "name": "Qwen2-Math-7B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2687, "hfopenllm_v2/BBH": 0.387, "hfopenllm_v2/MATH Level 5": 0.2477, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3593, "hfopenllm_v2/MMLU-PRO": 0.1197 } }, { "id": "Qwen/Qwen2-VL-72B-Instruct", "name": "Qwen2-VL-72B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5982, "hfopenllm_v2/BBH": 0.6946, "hfopenllm_v2/MATH Level 5": 0.3444, "hfopenllm_v2/GPQA": 0.3876, "hfopenllm_v2/MUSR": 0.4492, "hfopenllm_v2/MMLU-PRO": 0.5717 } }, { "id": "Qwen/Qwen2-VL-7B-Instruct", "name": "Qwen2-VL-7B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4599, "hfopenllm_v2/BBH": 0.5465, "hfopenllm_v2/MATH Level 5": 0.1986, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4375, "hfopenllm_v2/MMLU-PRO": 0.4095 } }, { "id": "Qwen/Qwen2.5-0.5B", "name": "Qwen2.5-0.5B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1627, "hfopenllm_v2/BBH": 0.3275, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2466, "hfopenllm_v2/MUSR": 0.3433, "hfopenllm_v2/MMLU-PRO": 0.1906 } }, { "id": "Qwen/Qwen2.5-0.5B-Instruct", "name": "Qwen2.5-0.5B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3071, "hfopenllm_v2/BBH": 0.3341, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3329, "hfopenllm_v2/MMLU-PRO": 0.1697 } }, { "id": "Qwen/Qwen2.5-1.5B", "name": "Qwen2.5-1.5B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2674, "hfopenllm_v2/BBH": 0.4078, "hfopenllm_v2/MATH Level 5": 0.0914, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.3576, "hfopenllm_v2/MMLU-PRO": 0.2855 } }, { "id": "Qwen/Qwen2.5-1.5B-Instruct", "name": "Qwen2.5-1.5B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4476, "hfopenllm_v2/BBH": 0.4289, "hfopenllm_v2/MATH Level 5": 0.2205, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3663, "hfopenllm_v2/MMLU-PRO": 0.2799 } }, { "id": "Qwen/Qwen2.5-14B", "name": "Qwen2.5-14B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3694, "hfopenllm_v2/BBH": 0.6161, "hfopenllm_v2/MATH Level 5": 0.29, "hfopenllm_v2/GPQA": 0.3817, "hfopenllm_v2/MUSR": 0.4502, "hfopenllm_v2/MMLU-PRO": 0.5249 } }, { "id": "Qwen/Qwen2.5-14B-Instruct", "name": "Qwen2.5-14B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8158, "hfopenllm_v2/BBH": 0.639, "hfopenllm_v2/MATH Level 5": 0.5476, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.4101, "hfopenllm_v2/MMLU-PRO": 0.4904 } }, { "id": "Qwen/Qwen2.5-14B-Instruct-1M", "name": "Qwen2.5-14B-Instruct-1M", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8414, "hfopenllm_v2/BBH": 0.6198, "hfopenllm_v2/MATH Level 5": 0.5302, "hfopenllm_v2/GPQA": 0.3431, "hfopenllm_v2/MUSR": 0.418, "hfopenllm_v2/MMLU-PRO": 0.485 } }, { "id": "Qwen/Qwen2.5-32B", "name": "Qwen2.5-32B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4077, "hfopenllm_v2/BBH": 0.6771, "hfopenllm_v2/MATH Level 5": 0.3565, "hfopenllm_v2/GPQA": 0.4119, "hfopenllm_v2/MUSR": 0.4978, "hfopenllm_v2/MMLU-PRO": 0.5805 } }, { "id": "Qwen/Qwen2.5-32B-Instruct", "name": "Qwen2.5-32B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8346, "hfopenllm_v2/BBH": 0.6913, "hfopenllm_v2/MATH Level 5": 0.6254, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4261, "hfopenllm_v2/MMLU-PRO": 0.5667 } }, { "id": "Qwen/Qwen2.5-3B", "name": "Qwen2.5-3B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.269, "hfopenllm_v2/BBH": 0.4612, "hfopenllm_v2/MATH Level 5": 0.148, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4303, "hfopenllm_v2/MMLU-PRO": 0.3203 } }, { "id": "Qwen/Qwen2.5-3B-Instruct", "name": "Qwen2.5-3B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6475, "hfopenllm_v2/BBH": 0.4693, "hfopenllm_v2/MATH Level 5": 0.3678, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3968, "hfopenllm_v2/MMLU-PRO": 0.3255, "theory_of_mind/accuracy on theory_of_mind for scorer model_graded_fact": 0.78 } }, { "id": "Qwen/Qwen2.5-72B", "name": "Qwen2.5-72B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4137, "hfopenllm_v2/BBH": 0.6797, "hfopenllm_v2/MATH Level 5": 0.3912, "hfopenllm_v2/GPQA": 0.4052, "hfopenllm_v2/MUSR": 0.4771, "hfopenllm_v2/MMLU-PRO": 0.5968 } }, { "id": "Qwen/Qwen2.5-72B-Instruct", "name": "Qwen2.5-72B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8638, "hfopenllm_v2/BBH": 0.7273, "hfopenllm_v2/MATH Level 5": 0.5982, "hfopenllm_v2/GPQA": 0.375, "hfopenllm_v2/MUSR": 0.4206, "hfopenllm_v2/MMLU-PRO": 0.5626 } }, { "id": "qwen/qwen2.5-72b-instruct-turbo", "name": "Qwen2.5 Instruct Turbo 72B", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.599, "helm_capabilities/MMLU-Pro": 0.631, "helm_capabilities/GPQA": 0.426, "helm_capabilities/IFEval": 0.806, "helm_capabilities/WildBench": 0.802, "helm_capabilities/Omni-MATH": 0.33, "helm_lite/Mean win rate": 0.745, "helm_lite/NarrativeQA": 0.745, "helm_lite/NaturalQuestions (closed-book)": 0.359, "helm_lite/OpenbookQA": 0.962, "helm_lite/MMLU": 0.77, "helm_lite/MATH": 0.884, "helm_lite/GSM8K": 0.9, "helm_lite/LegalBench": 0.74, "helm_lite/MedQA": 0.753, "helm_lite/WMT 2014": 0.207, "helm_mmlu/MMLU All Subjects": 0.834, "helm_mmlu/Abstract Algebra": 0.68, "helm_mmlu/Anatomy": 0.822, "helm_mmlu/College Physics": 0.588, "helm_mmlu/Computer Security": 0.86, "helm_mmlu/Econometrics": 0.728, "helm_mmlu/Global Facts": 0.61, "helm_mmlu/Jurisprudence": 0.87, "helm_mmlu/Philosophy": 0.839, "helm_mmlu/Professional Psychology": 0.864, "helm_mmlu/Us Foreign Policy": 0.96, "helm_mmlu/Astronomy": 0.934, "helm_mmlu/Business Ethics": 0.85, "helm_mmlu/Clinical Knowledge": 0.872, "helm_mmlu/Conceptual Physics": 0.885, "helm_mmlu/Electrical Engineering": 0.8, "helm_mmlu/Elementary Mathematics": 0.87, "helm_mmlu/Formal Logic": 0.73, "helm_mmlu/High School World History": 0.92, "helm_mmlu/Human Sexuality": 0.878, "helm_mmlu/International Law": 0.893, "helm_mmlu/Logical Fallacies": 0.89, "helm_mmlu/Machine Learning": 0.777, "helm_mmlu/Management": 0.913, "helm_mmlu/Marketing": 0.953, "helm_mmlu/Medical Genetics": 0.92, "helm_mmlu/Miscellaneous": 0.932, "helm_mmlu/Moral Scenarios": 0.787, "helm_mmlu/Nutrition": 0.886, "helm_mmlu/Prehistory": 0.91, "helm_mmlu/Public Relations": 0.782, "helm_mmlu/Security Studies": 0.849, "helm_mmlu/Sociology": 0.925, "helm_mmlu/Virology": 0.584, "helm_mmlu/World Religions": 0.901, "helm_mmlu/Mean win rate": 0.548 } }, { "id": "Qwen/Qwen2.5-7B", "name": "Qwen2.5-7B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3374, "hfopenllm_v2/BBH": 0.5416, "hfopenllm_v2/MATH Level 5": 0.2508, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4424, "hfopenllm_v2/MMLU-PRO": 0.4365, "la_leaderboard/la_leaderboard": 27.61 } }, { "id": "Qwen/Qwen2.5-7B-Instruct", "name": "Qwen2.5-7B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7585, "hfopenllm_v2/BBH": 0.5394, "hfopenllm_v2/MATH Level 5": 0.5, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.402, "hfopenllm_v2/MMLU-PRO": 0.4287 } }, { "id": "Qwen/Qwen2.5-7B-Instruct-1M", "name": "Qwen2.5-7B-Instruct-1M", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7448, "hfopenllm_v2/BBH": 0.5404, "hfopenllm_v2/MATH Level 5": 0.4335, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4087, "hfopenllm_v2/MMLU-PRO": 0.3505 } }, { "id": "qwen/qwen2.5-7b-instruct-turbo", "name": "Qwen2.5 Instruct Turbo 7B", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.529, "helm_capabilities/MMLU-Pro": 0.539, "helm_capabilities/GPQA": 0.341, "helm_capabilities/IFEval": 0.741, "helm_capabilities/WildBench": 0.731, "helm_capabilities/Omni-MATH": 0.294, "helm_lite/Mean win rate": 0.488, "helm_lite/NarrativeQA": 0.742, "helm_lite/NaturalQuestions (closed-book)": 0.205, "helm_lite/OpenbookQA": 0.862, "helm_lite/MMLU": 0.658, "helm_lite/MATH": 0.835, "helm_lite/GSM8K": 0.83, "helm_lite/LegalBench": 0.632, "helm_lite/MedQA": 0.6, "helm_lite/WMT 2014": 0.155, "helm_mmlu/MMLU All Subjects": 0.729, "helm_mmlu/Abstract Algebra": 0.49, "helm_mmlu/Anatomy": 0.689, "helm_mmlu/College Physics": 0.51, "helm_mmlu/Computer Security": 0.79, "helm_mmlu/Econometrics": 0.64, "helm_mmlu/Global Facts": 0.42, "helm_mmlu/Jurisprudence": 0.796, "helm_mmlu/Philosophy": 0.746, "helm_mmlu/Professional Psychology": 0.757, "helm_mmlu/Us Foreign Policy": 0.86, "helm_mmlu/Astronomy": 0.836, "helm_mmlu/Business Ethics": 0.82, "helm_mmlu/Clinical Knowledge": 0.785, "helm_mmlu/Conceptual Physics": 0.736, "helm_mmlu/Electrical Engineering": 0.717, "helm_mmlu/Elementary Mathematics": 0.643, "helm_mmlu/Formal Logic": 0.587, "helm_mmlu/High School World History": 0.878, "helm_mmlu/Human Sexuality": 0.794, "helm_mmlu/International Law": 0.86, "helm_mmlu/Logical Fallacies": 0.773, "helm_mmlu/Machine Learning": 0.554, "helm_mmlu/Management": 0.845, "helm_mmlu/Marketing": 0.919, "helm_mmlu/Medical Genetics": 0.85, "helm_mmlu/Miscellaneous": 0.852, "helm_mmlu/Moral Scenarios": 0.511, "helm_mmlu/Nutrition": 0.778, "helm_mmlu/Prehistory": 0.836, "helm_mmlu/Public Relations": 0.709, "helm_mmlu/Security Studies": 0.682, "helm_mmlu/Sociology": 0.861, "helm_mmlu/Virology": 0.578, "helm_mmlu/World Religions": 0.83, "helm_mmlu/Mean win rate": 0.887 } }, { "id": "Qwen/Qwen2.5-Coder-14B", "name": "Qwen2.5-Coder-14B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3473, "hfopenllm_v2/BBH": 0.5865, "hfopenllm_v2/MATH Level 5": 0.2251, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.3874, "hfopenllm_v2/MMLU-PRO": 0.4521 } }, { "id": "Qwen/Qwen2.5-Coder-14B-Instruct", "name": "Qwen2.5-Coder-14B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6908, "hfopenllm_v2/BBH": 0.614, "hfopenllm_v2/MATH Level 5": 0.3248, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.3915, "hfopenllm_v2/MMLU-PRO": 0.3939 } }, { "id": "Qwen/Qwen2.5-Coder-32B", "name": "Qwen2.5-Coder-32B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4363, "hfopenllm_v2/BBH": 0.6404, "hfopenllm_v2/MATH Level 5": 0.3089, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.4528, "hfopenllm_v2/MMLU-PRO": 0.5303 } }, { "id": "Qwen/Qwen2.5-Coder-32B-Instruct", "name": "Qwen2.5-Coder-32B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7265, "hfopenllm_v2/BBH": 0.6625, "hfopenllm_v2/MATH Level 5": 0.4955, "hfopenllm_v2/GPQA": 0.349, "hfopenllm_v2/MUSR": 0.4386, "hfopenllm_v2/MMLU-PRO": 0.4413 } }, { "id": "Qwen/Qwen2.5-Coder-7B", "name": "Qwen2.5-Coder-7B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3446, "hfopenllm_v2/BBH": 0.4856, "hfopenllm_v2/MATH Level 5": 0.1918, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3449, "hfopenllm_v2/MMLU-PRO": 0.3679 } }, { "id": "Qwen/Qwen2.5-Coder-7B-Instruct", "name": "Qwen2.5-Coder-7B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6147, "hfopenllm_v2/BBH": 0.4999, "hfopenllm_v2/MATH Level 5": 0.031, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4099, "hfopenllm_v2/MMLU-PRO": 0.3354 } }, { "id": "Qwen/Qwen2.5-Math-1.5B-Instruct", "name": "Qwen2.5-Math-1.5B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1856, "hfopenllm_v2/BBH": 0.3752, "hfopenllm_v2/MATH Level 5": 0.2628, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3685, "hfopenllm_v2/MMLU-PRO": 0.1801 } }, { "id": "Qwen/Qwen2.5-Math-72B-Instruct", "name": "Qwen2.5-Math-72B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4003, "hfopenllm_v2/BBH": 0.6452, "hfopenllm_v2/MATH Level 5": 0.6239, "hfopenllm_v2/GPQA": 0.3314, "hfopenllm_v2/MUSR": 0.4473, "hfopenllm_v2/MMLU-PRO": 0.4812 } }, { "id": "Qwen/Qwen2.5-Math-7B", "name": "Qwen2.5-Math-7B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.246, "hfopenllm_v2/BBH": 0.4455, "hfopenllm_v2/MATH Level 5": 0.3051, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.3781, "hfopenllm_v2/MMLU-PRO": 0.2718 } }, { "id": "Qwen/Qwen2.5-Math-7B-Instruct", "name": "Qwen2.5-Math-7B-Instruct", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2636, "hfopenllm_v2/BBH": 0.4388, "hfopenllm_v2/MATH Level 5": 0.5808, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3647, "hfopenllm_v2/MMLU-PRO": 0.282 } }, { "id": "qwen/qwen3-0-6b-fc", "name": "Qwen3-0.6B (FC)", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 92.0, "bfcl/bfcl.overall.overall_accuracy": 23.93, "bfcl/bfcl.overall.total_cost_usd": 0.46, "bfcl/bfcl.overall.latency_mean_s": 0.68, "bfcl/bfcl.overall.latency_std_s": 8.45, "bfcl/bfcl.overall.latency_p95_s": 0.96, "bfcl/bfcl.non_live.ast_accuracy": 71.79, "bfcl/bfcl.non_live.simple_ast_accuracy": 64.17, "bfcl/bfcl.non_live.multiple_ast_accuracy": 86.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 67.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 69.5, "bfcl/bfcl.live.live_accuracy": 56.62, "bfcl/bfcl.live.live_simple_ast_accuracy": 61.24, "bfcl/bfcl.live.live_multiple_ast_accuracy": 56.13, "bfcl/bfcl.live.live_parallel_ast_accuracy": 43.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 37.5, "bfcl/bfcl.multi_turn.accuracy": 3.62, "bfcl/bfcl.multi_turn.base_accuracy": 5.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 2.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 3.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 4.0, "bfcl/bfcl.web_search.accuracy": 1.0, "bfcl/bfcl.web_search.base_accuracy": 1.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 1.0, "bfcl/bfcl.memory.accuracy": 8.6, "bfcl/bfcl.memory.kv_accuracy": 2.58, "bfcl/bfcl.memory.vector_accuracy": 1.94, "bfcl/bfcl.memory.recursive_summarization_accuracy": 21.29, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 80.84 } }, { "id": "qwen/qwen3-0-6b-prompt", "name": "Qwen3-0.6B (Prompt)", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 94.0, "bfcl/bfcl.overall.overall_accuracy": 22.38, "bfcl/bfcl.overall.total_cost_usd": 3.65, "bfcl/bfcl.overall.latency_mean_s": 3.1, "bfcl/bfcl.overall.latency_std_s": 4.32, "bfcl/bfcl.overall.latency_p95_s": 10.31, "bfcl/bfcl.non_live.ast_accuracy": 70.0, "bfcl/bfcl.non_live.simple_ast_accuracy": 64.0, "bfcl/bfcl.non_live.multiple_ast_accuracy": 78.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 75.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 63.0, "bfcl/bfcl.live.live_accuracy": 49.37, "bfcl/bfcl.live.live_simple_ast_accuracy": 57.75, "bfcl/bfcl.live.live_multiple_ast_accuracy": 47.77, "bfcl/bfcl.live.live_parallel_ast_accuracy": 37.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 37.5, "bfcl/bfcl.multi_turn.accuracy": 1.38, "bfcl/bfcl.multi_turn.base_accuracy": 1.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 1.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 1.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 1.0, "bfcl/bfcl.web_search.accuracy": 0.5, "bfcl/bfcl.web_search.base_accuracy": 1.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 8.39, "bfcl/bfcl.memory.kv_accuracy": 1.29, "bfcl/bfcl.memory.vector_accuracy": 2.58, "bfcl/bfcl.memory.recursive_summarization_accuracy": 21.29, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 82.5, "bfcl/bfcl.format_sensitivity.max_delta": 60.5, "bfcl/bfcl.format_sensitivity.stddev": 24.35 } }, { "id": "qwen/qwen3-1-7b-fc", "name": "Qwen3-1.7B (FC)", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 71.0, "bfcl/bfcl.overall.overall_accuracy": 28.41, "bfcl/bfcl.overall.total_cost_usd": 4.33, "bfcl/bfcl.overall.latency_mean_s": 5.12, "bfcl/bfcl.overall.latency_std_s": 7.37, "bfcl/bfcl.overall.latency_p95_s": 13.35, "bfcl/bfcl.non_live.ast_accuracy": 82.92, "bfcl/bfcl.non_live.simple_ast_accuracy": 70.67, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 88.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 80.0, "bfcl/bfcl.live.live_accuracy": 74.61, "bfcl/bfcl.live.live_simple_ast_accuracy": 76.74, "bfcl/bfcl.live.live_multiple_ast_accuracy": 74.26, "bfcl/bfcl.live.live_parallel_ast_accuracy": 62.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 75.0, "bfcl/bfcl.multi_turn.accuracy": 11.0, "bfcl/bfcl.multi_turn.base_accuracy": 15.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 6.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 12.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 11.0, "bfcl/bfcl.web_search.accuracy": 2.5, "bfcl/bfcl.web_search.base_accuracy": 3.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 2.0, "bfcl/bfcl.memory.accuracy": 6.02, "bfcl/bfcl.memory.kv_accuracy": 4.52, "bfcl/bfcl.memory.vector_accuracy": 7.74, "bfcl/bfcl.memory.recursive_summarization_accuracy": 5.81, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 76.54 } }, { "id": "qwen/qwen3-14b-fc", "name": "Qwen3-14B (FC)", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 43.0, "bfcl/bfcl.overall.overall_accuracy": 41.03, "bfcl/bfcl.overall.total_cost_usd": 3.38, "bfcl/bfcl.overall.latency_mean_s": 4.5, "bfcl/bfcl.overall.latency_std_s": 18.84, "bfcl/bfcl.overall.latency_p95_s": 13.34, "bfcl/bfcl.non_live.ast_accuracy": 84.94, "bfcl/bfcl.non_live.simple_ast_accuracy": 74.75, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 80.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 92.0, "bfcl/bfcl.live.live_accuracy": 80.01, "bfcl/bfcl.live.live_simple_ast_accuracy": 85.66, "bfcl/bfcl.live.live_multiple_ast_accuracy": 79.01, "bfcl/bfcl.live.live_parallel_ast_accuracy": 68.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 34.75, "bfcl/bfcl.multi_turn.base_accuracy": 39.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 34.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 33.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 32.5, "bfcl/bfcl.web_search.accuracy": 10.0, "bfcl/bfcl.web_search.base_accuracy": 8.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 12.0, "bfcl/bfcl.memory.accuracy": 19.57, "bfcl/bfcl.memory.kv_accuracy": 7.1, "bfcl/bfcl.memory.vector_accuracy": 16.77, "bfcl/bfcl.memory.recursive_summarization_accuracy": 34.84, "bfcl/bfcl.relevance.relevance_detection_accuracy": 87.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 81.94 } }, { "id": "qwen/qwen3-14b-prompt", "name": "Qwen3-14B (Prompt)", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 47.0, "bfcl/bfcl.overall.overall_accuracy": 37.77, "bfcl/bfcl.overall.total_cost_usd": 1.35, "bfcl/bfcl.overall.latency_mean_s": 1.2, "bfcl/bfcl.overall.latency_std_s": 8.5, "bfcl/bfcl.overall.latency_p95_s": 2.3, "bfcl/bfcl.non_live.ast_accuracy": 89.46, "bfcl/bfcl.non_live.simple_ast_accuracy": 76.83, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 95.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 92.0, "bfcl/bfcl.live.live_accuracy": 79.35, "bfcl/bfcl.live.live_simple_ast_accuracy": 84.5, "bfcl/bfcl.live.live_multiple_ast_accuracy": 78.06, "bfcl/bfcl.live.live_parallel_ast_accuracy": 87.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 75.0, "bfcl/bfcl.multi_turn.accuracy": 26.13, "bfcl/bfcl.multi_turn.base_accuracy": 16.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 37.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 31.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 19.5, "bfcl/bfcl.web_search.accuracy": 10.5, "bfcl/bfcl.web_search.base_accuracy": 6.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 15.0, "bfcl/bfcl.memory.accuracy": 11.18, "bfcl/bfcl.memory.kv_accuracy": 4.52, "bfcl/bfcl.memory.vector_accuracy": 6.45, "bfcl/bfcl.memory.recursive_summarization_accuracy": 22.58, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 87.18, "bfcl/bfcl.format_sensitivity.max_delta": 14.0, "bfcl/bfcl.format_sensitivity.stddev": 3.97 } }, { "id": "qwen/qwen3-235b-a22b-fp8-tput", "name": "Qwen3 235B A22B FP8 Throughput", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.726, "helm_capabilities/MMLU-Pro": 0.817, "helm_capabilities/GPQA": 0.623, "helm_capabilities/IFEval": 0.816, "helm_capabilities/WildBench": 0.828, "helm_capabilities/Omni-MATH": 0.548 } }, { "id": "qwen/qwen3-235b-a22b-instruct-2507-fc", "name": "Qwen3-235B-A22B-Instruct-2507 (FC)", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 31.0, "bfcl/bfcl.overall.overall_accuracy": 47.99, "bfcl/bfcl.overall.total_cost_usd": 2.5, "bfcl/bfcl.overall.latency_mean_s": 2.57, "bfcl/bfcl.overall.latency_std_s": 2.44, "bfcl/bfcl.overall.latency_p95_s": 6.27, "bfcl/bfcl.non_live.ast_accuracy": 37.4, "bfcl/bfcl.non_live.simple_ast_accuracy": 40.58, "bfcl/bfcl.non_live.multiple_ast_accuracy": 36.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 53.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 19.5, "bfcl/bfcl.live.live_accuracy": 68.91, "bfcl/bfcl.live.live_simple_ast_accuracy": 58.53, "bfcl/bfcl.live.live_multiple_ast_accuracy": 71.6, "bfcl/bfcl.live.live_parallel_ast_accuracy": 68.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 62.5, "bfcl/bfcl.multi_turn.accuracy": 45.38, "bfcl/bfcl.multi_turn.base_accuracy": 57.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 35.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 33.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 55.5, "bfcl/bfcl.web_search.accuracy": 54.0, "bfcl/bfcl.web_search.base_accuracy": 57.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 51.0, "bfcl/bfcl.memory.accuracy": 23.87, "bfcl/bfcl.memory.kv_accuracy": 7.1, "bfcl/bfcl.memory.vector_accuracy": 18.71, "bfcl/bfcl.memory.recursive_summarization_accuracy": 45.81, "bfcl/bfcl.relevance.relevance_detection_accuracy": 87.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 81.73 } }, { "id": "qwen/qwen3-235b-a22b-instruct-2507-fp8", "name": "Qwen3 235B A22B Instruct 2507 FP8", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.798, "helm_capabilities/MMLU-Pro": 0.844, "helm_capabilities/GPQA": 0.726, "helm_capabilities/IFEval": 0.835, "helm_capabilities/WildBench": 0.866, "helm_capabilities/Omni-MATH": 0.718 } }, { "id": "qwen/qwen3-235b-a22b-instruct-2507-prompt", "name": "Qwen3-235B-A22B-Instruct-2507 (Prompt)", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 23.0, "bfcl/bfcl.overall.overall_accuracy": 52.15, "bfcl/bfcl.overall.total_cost_usd": 3.12, "bfcl/bfcl.overall.latency_mean_s": 2.56, "bfcl/bfcl.overall.latency_std_s": 2.75, "bfcl/bfcl.overall.latency_p95_s": 7.61, "bfcl/bfcl.non_live.ast_accuracy": 90.33, "bfcl/bfcl.non_live.simple_ast_accuracy": 79.83, "bfcl/bfcl.non_live.multiple_ast_accuracy": 95.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 95.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 91.0, "bfcl/bfcl.live.live_accuracy": 78.68, "bfcl/bfcl.live.live_simple_ast_accuracy": 82.95, "bfcl/bfcl.live.live_multiple_ast_accuracy": 77.78, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 44.62, "bfcl/bfcl.multi_turn.base_accuracy": 54.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 42.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 31.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 50.5, "bfcl/bfcl.web_search.accuracy": 50.5, "bfcl/bfcl.web_search.base_accuracy": 56.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 45.0, "bfcl/bfcl.memory.accuracy": 19.35, "bfcl/bfcl.memory.kv_accuracy": 12.9, "bfcl/bfcl.memory.vector_accuracy": 11.61, "bfcl/bfcl.memory.recursive_summarization_accuracy": 33.55, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 78.89, "bfcl/bfcl.format_sensitivity.max_delta": 8.0, "bfcl/bfcl.format_sensitivity.stddev": 1.95 } }, { "id": "qwen/qwen3-30b-a3b-instruct-2507-fc", "name": "Qwen3-30B-A3B-Instruct-2507 (FC)", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 41.0, "bfcl/bfcl.overall.overall_accuracy": 41.39, "bfcl/bfcl.overall.total_cost_usd": 5.62, "bfcl/bfcl.overall.latency_mean_s": 5.95, "bfcl/bfcl.overall.latency_std_s": 25.48, "bfcl/bfcl.overall.latency_p95_s": 12.7, "bfcl/bfcl.non_live.ast_accuracy": 85.77, "bfcl/bfcl.non_live.simple_ast_accuracy": 68.58, "bfcl/bfcl.non_live.multiple_ast_accuracy": 94.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 91.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 88.5, "bfcl/bfcl.live.live_accuracy": 77.94, "bfcl/bfcl.live.live_simple_ast_accuracy": 83.33, "bfcl/bfcl.live.live_multiple_ast_accuracy": 76.83, "bfcl/bfcl.live.live_parallel_ast_accuracy": 68.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 75.0, "bfcl/bfcl.multi_turn.accuracy": 30.0, "bfcl/bfcl.multi_turn.base_accuracy": 43.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 10.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 25.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 41.0, "bfcl/bfcl.web_search.accuracy": 22.5, "bfcl/bfcl.web_search.base_accuracy": 21.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 24.0, "bfcl/bfcl.memory.accuracy": 17.63, "bfcl/bfcl.memory.kv_accuracy": 9.03, "bfcl/bfcl.memory.vector_accuracy": 9.03, "bfcl/bfcl.memory.recursive_summarization_accuracy": 34.84, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 79.9 } }, { "id": "qwen/qwen3-30b-a3b-instruct-2507-prompt", "name": "Qwen3-30B-A3B-Instruct-2507 (Prompt)", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 53.0, "bfcl/bfcl.overall.overall_accuracy": 36.7, "bfcl/bfcl.overall.total_cost_usd": 1.56, "bfcl/bfcl.overall.latency_mean_s": 1.24, "bfcl/bfcl.overall.latency_std_s": 7.9, "bfcl/bfcl.overall.latency_p95_s": 2.84, "bfcl/bfcl.non_live.ast_accuracy": 88.92, "bfcl/bfcl.non_live.simple_ast_accuracy": 80.67, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 94.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 88.0, "bfcl/bfcl.live.live_accuracy": 78.39, "bfcl/bfcl.live.live_simple_ast_accuracy": 82.56, "bfcl/bfcl.live.live_multiple_ast_accuracy": 77.49, "bfcl/bfcl.live.live_parallel_ast_accuracy": 87.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 66.67, "bfcl/bfcl.multi_turn.accuracy": 23.5, "bfcl/bfcl.multi_turn.base_accuracy": 33.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 16.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 16.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 29.0, "bfcl/bfcl.web_search.accuracy": 17.5, "bfcl/bfcl.web_search.base_accuracy": 15.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 20.0, "bfcl/bfcl.memory.accuracy": 9.68, "bfcl/bfcl.memory.kv_accuracy": 5.81, "bfcl/bfcl.memory.vector_accuracy": 6.45, "bfcl/bfcl.memory.recursive_summarization_accuracy": 16.77, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 74.85, "bfcl/bfcl.format_sensitivity.max_delta": 16.0, "bfcl/bfcl.format_sensitivity.stddev": 4.13 } }, { "id": "qwen/qwen3-32b-fc", "name": "Qwen3-32B (FC)", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 29.0, "bfcl/bfcl.overall.overall_accuracy": 48.71, "bfcl/bfcl.overall.total_cost_usd": 153.08, "bfcl/bfcl.overall.latency_mean_s": 169.87, "bfcl/bfcl.overall.latency_std_s": 164.27, "bfcl/bfcl.overall.latency_p95_s": 473.49, "bfcl/bfcl.non_live.ast_accuracy": 88.77, "bfcl/bfcl.non_live.simple_ast_accuracy": 75.58, "bfcl/bfcl.non_live.multiple_ast_accuracy": 94.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 91.5, "bfcl/bfcl.live.live_accuracy": 82.01, "bfcl/bfcl.live.live_simple_ast_accuracy": 89.53, "bfcl/bfcl.live.live_multiple_ast_accuracy": 80.91, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 50.0, "bfcl/bfcl.multi_turn.accuracy": 47.87, "bfcl/bfcl.multi_turn.base_accuracy": 56.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 52.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 40.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 43.0, "bfcl/bfcl.web_search.accuracy": 21.5, "bfcl/bfcl.web_search.base_accuracy": 25.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 18.0, "bfcl/bfcl.memory.accuracy": 26.67, "bfcl/bfcl.memory.kv_accuracy": 12.26, "bfcl/bfcl.memory.vector_accuracy": 25.81, "bfcl/bfcl.memory.recursive_summarization_accuracy": 41.94, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 76.37 } }, { "id": "qwen/qwen3-32b-prompt", "name": "Qwen3-32B (Prompt)", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 33.0, "bfcl/bfcl.overall.overall_accuracy": 46.78, "bfcl/bfcl.overall.total_cost_usd": 199.47, "bfcl/bfcl.overall.latency_mean_s": 167.54, "bfcl/bfcl.overall.latency_std_s": 160.5, "bfcl/bfcl.overall.latency_p95_s": 457.87, "bfcl/bfcl.non_live.ast_accuracy": 90.27, "bfcl/bfcl.non_live.simple_ast_accuracy": 79.08, "bfcl/bfcl.non_live.multiple_ast_accuracy": 97.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 91.5, "bfcl/bfcl.live.live_accuracy": 82.01, "bfcl/bfcl.live.live_simple_ast_accuracy": 87.21, "bfcl/bfcl.live.live_multiple_ast_accuracy": 81.2, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 62.5, "bfcl/bfcl.multi_turn.accuracy": 43.25, "bfcl/bfcl.multi_turn.base_accuracy": 54.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 46.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 36.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 36.5, "bfcl/bfcl.web_search.accuracy": 26.0, "bfcl/bfcl.web_search.base_accuracy": 34.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 18.0, "bfcl/bfcl.memory.accuracy": 15.7, "bfcl/bfcl.memory.kv_accuracy": 13.55, "bfcl/bfcl.memory.vector_accuracy": 14.19, "bfcl/bfcl.memory.recursive_summarization_accuracy": 19.35, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 82.39, "bfcl/bfcl.format_sensitivity.max_delta": 15.5, "bfcl/bfcl.format_sensitivity.stddev": 3.75 } }, { "id": "qwen/qwen3-4b-instruct-2507-fc", "name": "Qwen3-4B-Instruct-2507 (FC)", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 54.0, "bfcl/bfcl.overall.overall_accuracy": 35.68, "bfcl/bfcl.overall.total_cost_usd": 6.37, "bfcl/bfcl.overall.latency_mean_s": 7.61, "bfcl/bfcl.overall.latency_std_s": 20.36, "bfcl/bfcl.overall.latency_p95_s": 49.18, "bfcl/bfcl.non_live.ast_accuracy": 87.88, "bfcl/bfcl.non_live.simple_ast_accuracy": 75.5, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 92.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 90.0, "bfcl/bfcl.live.live_accuracy": 76.39, "bfcl/bfcl.live.live_simple_ast_accuracy": 79.07, "bfcl/bfcl.live.live_multiple_ast_accuracy": 76.16, "bfcl/bfcl.live.live_parallel_ast_accuracy": 62.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 66.67, "bfcl/bfcl.multi_turn.accuracy": 22.12, "bfcl/bfcl.multi_turn.base_accuracy": 26.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 21.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 15.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 25.5, "bfcl/bfcl.web_search.accuracy": 3.0, "bfcl/bfcl.web_search.base_accuracy": 4.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 2.0, "bfcl/bfcl.memory.accuracy": 17.63, "bfcl/bfcl.memory.kv_accuracy": 16.13, "bfcl/bfcl.memory.vector_accuracy": 12.26, "bfcl/bfcl.memory.recursive_summarization_accuracy": 24.52, "bfcl/bfcl.relevance.relevance_detection_accuracy": 87.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 84.93 } }, { "id": "qwen/qwen3-4b-instruct-2507-prompt", "name": "Qwen3-4B-Instruct-2507 (Prompt)", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 55.0, "bfcl/bfcl.overall.overall_accuracy": 35.52, "bfcl/bfcl.overall.total_cost_usd": 53.66, "bfcl/bfcl.overall.latency_mean_s": 44.7, "bfcl/bfcl.overall.latency_std_s": 163.79, "bfcl/bfcl.overall.latency_p95_s": 208.06, "bfcl/bfcl.non_live.ast_accuracy": 86.44, "bfcl/bfcl.non_live.simple_ast_accuracy": 77.25, "bfcl/bfcl.non_live.multiple_ast_accuracy": 91.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 88.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 89.5, "bfcl/bfcl.live.live_accuracy": 74.69, "bfcl/bfcl.live.live_simple_ast_accuracy": 77.91, "bfcl/bfcl.live.live_multiple_ast_accuracy": 74.17, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 58.33, "bfcl/bfcl.multi_turn.accuracy": 20.5, "bfcl/bfcl.multi_turn.base_accuracy": 24.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 21.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 16.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 20.0, "bfcl/bfcl.web_search.accuracy": 4.5, "bfcl/bfcl.web_search.base_accuracy": 4.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 5.0, "bfcl/bfcl.memory.accuracy": 23.87, "bfcl/bfcl.memory.kv_accuracy": 12.9, "bfcl/bfcl.memory.vector_accuracy": 14.19, "bfcl/bfcl.memory.recursive_summarization_accuracy": 44.52, "bfcl/bfcl.relevance.relevance_detection_accuracy": 87.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 75.87, "bfcl/bfcl.format_sensitivity.max_delta": 18.0, "bfcl/bfcl.format_sensitivity.stddev": 5.22 } }, { "id": "qwen/qwen3-8b-fc", "name": "Qwen3-8B (FC)", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 39.0, "bfcl/bfcl.overall.overall_accuracy": 42.57, "bfcl/bfcl.overall.total_cost_usd": 43.32, "bfcl/bfcl.overall.latency_mean_s": 51.36, "bfcl/bfcl.overall.latency_std_s": 76.14, "bfcl/bfcl.overall.latency_p95_s": 188.98, "bfcl/bfcl.non_live.ast_accuracy": 87.58, "bfcl/bfcl.non_live.simple_ast_accuracy": 72.83, "bfcl/bfcl.non_live.multiple_ast_accuracy": 96.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 92.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 89.0, "bfcl/bfcl.live.live_accuracy": 80.53, "bfcl/bfcl.live.live_simple_ast_accuracy": 84.5, "bfcl/bfcl.live.live_multiple_ast_accuracy": 79.68, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 79.17, "bfcl/bfcl.multi_turn.accuracy": 41.75, "bfcl/bfcl.multi_turn.base_accuracy": 50.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 42.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 40.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 34.5, "bfcl/bfcl.web_search.accuracy": 12.0, "bfcl/bfcl.web_search.base_accuracy": 15.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 9.0, "bfcl/bfcl.memory.accuracy": 14.62, "bfcl/bfcl.memory.kv_accuracy": 5.16, "bfcl/bfcl.memory.vector_accuracy": 7.1, "bfcl/bfcl.memory.recursive_summarization_accuracy": 31.61, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 79.07 } }, { "id": "qwen/qwen3-8b-prompt", "name": "Qwen3-8B (Prompt)", "developer": "qwen", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 44.0, "bfcl/bfcl.overall.overall_accuracy": 40.43, "bfcl/bfcl.overall.total_cost_usd": 63.95, "bfcl/bfcl.overall.latency_mean_s": 54.17, "bfcl/bfcl.overall.latency_std_s": 79.9, "bfcl/bfcl.overall.latency_p95_s": 194.15, "bfcl/bfcl.non_live.ast_accuracy": 88.56, "bfcl/bfcl.non_live.simple_ast_accuracy": 75.25, "bfcl/bfcl.non_live.multiple_ast_accuracy": 95.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 94.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 89.5, "bfcl/bfcl.live.live_accuracy": 80.09, "bfcl/bfcl.live.live_simple_ast_accuracy": 84.5, "bfcl/bfcl.live.live_multiple_ast_accuracy": 78.92, "bfcl/bfcl.live.live_parallel_ast_accuracy": 93.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 75.0, "bfcl/bfcl.multi_turn.accuracy": 33.38, "bfcl/bfcl.multi_turn.base_accuracy": 41.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 38.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 27.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 26.5, "bfcl/bfcl.web_search.accuracy": 13.5, "bfcl/bfcl.web_search.base_accuracy": 19.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 8.0, "bfcl/bfcl.memory.accuracy": 13.12, "bfcl/bfcl.memory.kv_accuracy": 3.87, "bfcl/bfcl.memory.vector_accuracy": 10.32, "bfcl/bfcl.memory.recursive_summarization_accuracy": 25.16, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 82.27, "bfcl/bfcl.format_sensitivity.max_delta": 16.5, "bfcl/bfcl.format_sensitivity.stddev": 5.09 } }, { "id": "Qwen/QwQ-32B", "name": "QwQ-32B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3977, "hfopenllm_v2/BBH": 0.2983, "hfopenllm_v2/MATH Level 5": 0.1609, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.4206, "hfopenllm_v2/MMLU-PRO": 0.1196 } }, { "id": "Qwen/QwQ-32B-Preview", "name": "QwQ-32B-Preview", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4035, "hfopenllm_v2/BBH": 0.6691, "hfopenllm_v2/MATH Level 5": 0.4494, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.411, "hfopenllm_v2/MMLU-PRO": 0.5678 } }, { "id": "Qwen/WorldPM-72B", "name": "Qwen/WorldPM-72B", "developer": "Qwen", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6333, "reward-bench/Factuality": 0.7074, "reward-bench/Precise IF": 0.3125, "reward-bench/Math": 0.6557, "reward-bench/Safety": 0.8533, "reward-bench/Focus": 0.9172, "reward-bench/Ties": 0.3535 } }, { "id": "R-I-S-E/RISE-Judge-Qwen2.5-32B", "name": "R-I-S-E/RISE-Judge-Qwen2.5-32B", "developer": "R-I-S-E", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9266, "reward-bench/Chat": 0.9665, "reward-bench/Chat Hard": 0.8333, "reward-bench/Safety": 0.9189, "reward-bench/Reasoning": 0.9877 } }, { "id": "R-I-S-E/RISE-Judge-Qwen2.5-7B", "name": "R-I-S-E/RISE-Judge-Qwen2.5-7B", "developer": "R-I-S-E", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8819, "reward-bench/Chat": 0.9218, "reward-bench/Chat Hard": 0.7654, "reward-bench/Safety": 0.8797, "reward-bench/Reasoning": 0.9608 } }, { "id": "Rakuten/RakutenAI-2.0-mini-instruct", "name": "RakutenAI-2.0-mini-instruct", "developer": "Rakuten", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6794, "hfopenllm_v2/BBH": 0.2867, "hfopenllm_v2/MATH Level 5": 0.0521, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3249, "hfopenllm_v2/MMLU-PRO": 0.1118 } }, { "id": "Rakuten/RakutenAI-7B", "name": "RakutenAI-7B", "developer": "Rakuten", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1556, "hfopenllm_v2/BBH": 0.4315, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.3738, "hfopenllm_v2/MMLU-PRO": 0.2877 } }, { "id": "Rakuten/RakutenAI-7B-chat", "name": "RakutenAI-7B-chat", "developer": "Rakuten", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2686, "hfopenllm_v2/BBH": 0.4316, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.379, "hfopenllm_v2/MMLU-PRO": 0.2798 } }, { "id": "raphgg/test-2.5-72B", "name": "test-2.5-72B", "developer": "raphgg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8437, "hfopenllm_v2/BBH": 0.7266, "hfopenllm_v2/MATH Level 5": 0.4109, "hfopenllm_v2/GPQA": 0.3893, "hfopenllm_v2/MUSR": 0.4812, "hfopenllm_v2/MMLU-PRO": 0.5837 } }, { "id": "rasyosef/Mistral-NeMo-Minitron-8B-Chat", "name": "Mistral-NeMo-Minitron-8B-Chat", "developer": "rasyosef", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4452, "hfopenllm_v2/BBH": 0.4759, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.4304, "hfopenllm_v2/MMLU-PRO": 0.2404 } }, { "id": "rasyosef/Phi-1_5-Instruct-v0.1", "name": "Phi-1_5-Instruct-v0.1", "developer": "rasyosef", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2402, "hfopenllm_v2/BBH": 0.3118, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3422, "hfopenllm_v2/MMLU-PRO": 0.1562 } }, { "id": "rasyosef/phi-2-instruct-apo", "name": "phi-2-instruct-apo", "developer": "rasyosef", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3146, "hfopenllm_v2/BBH": 0.4445, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3342, "hfopenllm_v2/MMLU-PRO": 0.2155 } }, { "id": "rasyosef/phi-2-instruct-v0.1", "name": "phi-2-instruct-v0.1", "developer": "rasyosef", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3681, "hfopenllm_v2/BBH": 0.4726, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3524, "hfopenllm_v2/MMLU-PRO": 0.2247 } }, { "id": "Ray2333/Gemma-2B-rewardmodel-baseline", "name": "Ray2333/Gemma-2B-rewardmodel-baseline", "developer": "Ray2333", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.729, "reward-bench/Chat": 0.9413, "reward-bench/Chat Hard": 0.4693, "reward-bench/Safety": 0.7865, "reward-bench/Reasoning": 0.7384, "reward-bench/Prior Sets (0.5 weight)": 0.6897 } }, { "id": "Ray2333/Gemma-2B-rewardmodel-ft", "name": "Ray2333/Gemma-2B-rewardmodel-ft", "developer": "Ray2333", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8048, "reward-bench/Chat": 0.7793, "reward-bench/Chat Hard": 0.7478, "reward-bench/Safety": 0.8527, "reward-bench/Reasoning": 0.8393 } }, { "id": "Ray2333/GRM-Gemma-2B-rewardmodel-ft", "name": "Ray2333/GRM-Gemma-2B-rewardmodel-ft", "developer": "Ray2333", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8447, "reward-bench/Chat": 0.8939, "reward-bench/Chat Hard": 0.7522, "reward-bench/Safety": 0.8446, "reward-bench/Reasoning": 0.8881 } }, { "id": "Ray2333/GRM-Gemma-2B-sftreg", "name": "Ray2333/GRM-Gemma-2B-sftreg", "developer": "Ray2333", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7451, "reward-bench/Chat": 0.9553, "reward-bench/Chat Hard": 0.4868, "reward-bench/Safety": 0.7932, "reward-bench/Reasoning": 0.7684, "reward-bench/Prior Sets (0.5 weight)": 0.6983 } }, { "id": "Ray2333/GRM-gemma2-2B-rewardmodel-ft", "name": "Ray2333/GRM-gemma2-2B-rewardmodel-ft", "developer": "Ray2333", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5966, "reward-bench/Chat": 0.9302, "reward-bench/Chat Hard": 0.7719, "reward-bench/Safety": 0.9222, "reward-bench/Reasoning": 0.912, "reward-bench/Factuality": 0.5305, "reward-bench/Precise IF": 0.3125, "reward-bench/Math": 0.5902, "reward-bench/Focus": 0.7455, "reward-bench/Ties": 0.4788 } }, { "id": "Ray2333/GRM-llama3-8B-distill", "name": "Ray2333/GRM-llama3-8B-distill", "developer": "Ray2333", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.589, "reward-bench/Chat": 0.9832, "reward-bench/Chat Hard": 0.6842, "reward-bench/Safety": 0.7222, "reward-bench/Reasoning": 0.9133, "reward-bench/Prior Sets (0.5 weight)": 0.7209, "reward-bench/Factuality": 0.5874, "reward-bench/Precise IF": 0.3875, "reward-bench/Math": 0.5902, "reward-bench/Focus": 0.6727, "reward-bench/Ties": 0.5743 } }, { "id": "Ray2333/GRM-Llama3-8B-rewardmodel-ft", "name": "Ray2333/GRM-Llama3-8B-rewardmodel-ft", "developer": "Ray2333", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6766, "reward-bench/Chat": 0.9553, "reward-bench/Chat Hard": 0.8618, "reward-bench/Safety": 0.9222, "reward-bench/Reasoning": 0.9362, "reward-bench/Factuality": 0.6274, "reward-bench/Precise IF": 0.35, "reward-bench/Math": 0.5847, "reward-bench/Focus": 0.8929, "reward-bench/Ties": 0.6824 } }, { "id": "Ray2333/GRM-llama3-8B-sftreg", "name": "Ray2333/GRM-llama3-8B-sftreg", "developer": "Ray2333", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6089, "reward-bench/Chat": 0.986, "reward-bench/Chat Hard": 0.6776, "reward-bench/Safety": 0.7867, "reward-bench/Reasoning": 0.9229, "reward-bench/Prior Sets (0.5 weight)": 0.7309, "reward-bench/Factuality": 0.6189, "reward-bench/Precise IF": 0.3875, "reward-bench/Math": 0.5792, "reward-bench/Focus": 0.6828, "reward-bench/Ties": 0.5981 } }, { "id": "Ray2333/GRM-llama3.2-3B-rewardmodel-ft", "name": "Ray2333/GRM-llama3.2-3B-rewardmodel-ft", "developer": "Ray2333", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9092, "reward-bench/Chat": 0.9162, "reward-bench/Chat Hard": 0.8487, "reward-bench/Safety": 0.927, "reward-bench/Reasoning": 0.945 } }, { "id": "Ray2333/reward-model-Mistral-7B-instruct-Unifie...", "name": "Ray2333/reward-model-Mistral-7B-instruct-Unifie...", "developer": "Ray2333", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7661, "reward-bench/Chat": 0.9777, "reward-bench/Chat Hard": 0.5066, "reward-bench/Safety": 0.8527, "reward-bench/Reasoning": 0.7389, "reward-bench/Prior Sets (0.5 weight)": 0.7434 } }, { "id": "RDson/WomboCombo-R1-Coder-14B-Preview", "name": "WomboCombo-R1-Coder-14B-Preview", "developer": "RDson", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6286, "hfopenllm_v2/BBH": 0.6392, "hfopenllm_v2/MATH Level 5": 0.5989, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4844, "hfopenllm_v2/MMLU-PRO": 0.5168 } }, { "id": "realtreetune/rho-1b-sft-MATH", "name": "rho-1b-sft-MATH", "developer": "realtreetune", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2121, "hfopenllm_v2/BBH": 0.3144, "hfopenllm_v2/MATH Level 5": 0.0347, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3458, "hfopenllm_v2/MMLU-PRO": 0.1117 } }, { "id": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp", "name": "Gemma-2-Ataraxy-Gemmasutra-9B-slerp", "developer": "recoilme", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7649, "hfopenllm_v2/BBH": 0.5974, "hfopenllm_v2/MATH Level 5": 0.0174, "hfopenllm_v2/GPQA": 0.3305, "hfopenllm_v2/MUSR": 0.4245, "hfopenllm_v2/MMLU-PRO": 0.4207 } }, { "id": "recoilme/recoilme-gemma-2-9B-v0.1", "name": "recoilme-gemma-2-9B-v0.1", "developer": "recoilme", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7515, "hfopenllm_v2/BBH": 0.5995, "hfopenllm_v2/MATH Level 5": 0.2039, "hfopenllm_v2/GPQA": 0.3389, "hfopenllm_v2/MUSR": 0.4191, "hfopenllm_v2/MMLU-PRO": 0.4159 } }, { "id": "recoilme/recoilme-gemma-2-9B-v0.2", "name": "recoilme-gemma-2-9B-v0.2", "developer": "recoilme", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2747, "hfopenllm_v2/BBH": 0.6031, "hfopenllm_v2/MATH Level 5": 0.0831, "hfopenllm_v2/GPQA": 0.3305, "hfopenllm_v2/MUSR": 0.4686, "hfopenllm_v2/MMLU-PRO": 0.4122 } }, { "id": "recoilme/recoilme-gemma-2-9B-v0.3", "name": "recoilme-gemma-2-9B-v0.3", "developer": "recoilme", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5761, "hfopenllm_v2/BBH": 0.602, "hfopenllm_v2/MATH Level 5": 0.1888, "hfopenllm_v2/GPQA": 0.3372, "hfopenllm_v2/MUSR": 0.4632, "hfopenllm_v2/MMLU-PRO": 0.4039 } }, { "id": "recoilme/recoilme-gemma-2-9B-v0.4", "name": "recoilme-gemma-2-9B-v0.4", "developer": "recoilme", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2562, "hfopenllm_v2/BBH": 0.5967, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.3406, "hfopenllm_v2/MUSR": 0.4727, "hfopenllm_v2/MMLU-PRO": 0.4406 } }, { "id": "recoilme/recoilme-gemma-2-9B-v0.5", "name": "recoilme-gemma-2-9B-v0.5", "developer": "recoilme", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7664, "hfopenllm_v2/BBH": 0.5981, "hfopenllm_v2/MATH Level 5": 0.2115, "hfopenllm_v2/GPQA": 0.3364, "hfopenllm_v2/MUSR": 0.4232, "hfopenllm_v2/MMLU-PRO": 0.42 } }, { "id": "redrix/AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS", "name": "AngelSlayer-12B-Unslop-Mell-RPMax-DARKNESS", "developer": "redrix", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.536, "hfopenllm_v2/BBH": 0.5129, "hfopenllm_v2/MATH Level 5": 0.1133, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.3818, "hfopenllm_v2/MMLU-PRO": 0.318 } }, { "id": "redrix/patricide-12B-Unslop-Mell", "name": "patricide-12B-Unslop-Mell", "developer": "redrix", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4074, "hfopenllm_v2/BBH": 0.5399, "hfopenllm_v2/MATH Level 5": 0.1314, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4026, "hfopenllm_v2/MMLU-PRO": 0.357 } }, { "id": "refuelai/Llama-3-Refueled", "name": "Llama-3-Refueled", "developer": "refuelai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.462, "hfopenllm_v2/BBH": 0.5871, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4454, "hfopenllm_v2/MMLU-PRO": 0.3095 } }, { "id": "Replete-AI/L3-Pneuma-8B", "name": "L3-Pneuma-8B", "developer": "Replete-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2413, "hfopenllm_v2/BBH": 0.4909, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.4105, "hfopenllm_v2/MMLU-PRO": 0.3176 } }, { "id": "Replete-AI/L3.1-Pneuma-8B", "name": "L3.1-Pneuma-8B", "developer": "Replete-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7076, "hfopenllm_v2/BBH": 0.505, "hfopenllm_v2/MATH Level 5": 0.2198, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.3871, "hfopenllm_v2/MMLU-PRO": 0.3691 } }, { "id": "Replete-AI/Llama3-8B-Instruct-Replete-Adapted", "name": "Llama3-8B-Instruct-Replete-Adapted", "developer": "Replete-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6915, "hfopenllm_v2/BBH": 0.487, "hfopenllm_v2/MATH Level 5": 0.071, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3634, "hfopenllm_v2/MMLU-PRO": 0.3391 } }, { "id": "Replete-AI/Replete-Coder-Instruct-8b-Merged", "name": "Replete-Coder-Instruct-8b-Merged", "developer": "Replete-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5388, "hfopenllm_v2/BBH": 0.4462, "hfopenllm_v2/MATH Level 5": 0.0778, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.366, "hfopenllm_v2/MMLU-PRO": 0.1805 } }, { "id": "Replete-AI/Replete-Coder-Llama3-8B", "name": "Replete-Coder-Llama3-8B", "developer": "Replete-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4729, "hfopenllm_v2/BBH": 0.3271, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3953, "hfopenllm_v2/MMLU-PRO": 0.1331 } }, { "id": "Replete-AI/Replete-Coder-Qwen2-1.5b", "name": "Replete-Coder-Qwen2-1.5b", "developer": "Replete-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3014, "hfopenllm_v2/BBH": 0.3475, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.4073, "hfopenllm_v2/MMLU-PRO": 0.2147 } }, { "id": "Replete-AI/Replete-LLM-Qwen2-7b", "name": "Replete-LLM-Qwen2-7b", "developer": "Replete-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0932, "hfopenllm_v2/BBH": 0.2977, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2475, "hfopenllm_v2/MUSR": 0.3941, "hfopenllm_v2/MMLU-PRO": 0.1157 } }, { "id": "Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview", "name": "Replete-LLM-Qwen2-7b_Beta-Preview", "developer": "Replete-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0858, "hfopenllm_v2/BBH": 0.2929, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.3981, "hfopenllm_v2/MMLU-PRO": 0.1285 } }, { "id": "Replete-AI/Replete-LLM-V2-Llama-3.1-8b", "name": "Replete-LLM-V2-Llama-3.1-8b", "developer": "Replete-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5515, "hfopenllm_v2/BBH": 0.5339, "hfopenllm_v2/MATH Level 5": 0.1405, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4001, "hfopenllm_v2/MMLU-PRO": 0.3753 } }, { "id": "RESMPDEV/EVA-Qwen2.5-1.5B-FRFR", "name": "EVA-Qwen2.5-1.5B-FRFR", "developer": "RESMPDEV", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3082, "hfopenllm_v2/BBH": 0.3932, "hfopenllm_v2/MATH Level 5": 0.1027, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3539, "hfopenllm_v2/MMLU-PRO": 0.277 } }, { "id": "RESMPDEV/Qwen2-Wukong-0.5B", "name": "Qwen2-Wukong-0.5B", "developer": "RESMPDEV", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1854, "hfopenllm_v2/BBH": 0.3085, "hfopenllm_v2/MATH Level 5": 0.0015, "hfopenllm_v2/GPQA": 0.2366, "hfopenllm_v2/MUSR": 0.3525, "hfopenllm_v2/MMLU-PRO": 0.1327 } }, { "id": "RezVortex/Jajuka-3b", "name": "Jajuka-3b", "developer": "RezVortex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6925, "hfopenllm_v2/BBH": 0.4594, "hfopenllm_v2/MATH Level 5": 0.1594, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3671, "hfopenllm_v2/MMLU-PRO": 0.3137 } }, { "id": "RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B", "name": "JAJUKA-WEWILLNEVERFORGETYOU-3B", "developer": "RezVortex", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6858, "hfopenllm_v2/BBH": 0.4619, "hfopenllm_v2/MATH Level 5": 0.1548, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.363, "hfopenllm_v2/MMLU-PRO": 0.3143 } }, { "id": "rhplus0831/maid-yuzu-v7", "name": "maid-yuzu-v7", "developer": "rhplus0831", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6462, "hfopenllm_v2/BBH": 0.4805, "hfopenllm_v2/MATH Level 5": 0.102, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4136, "hfopenllm_v2/MMLU-PRO": 0.354 } }, { "id": "rhymes-ai/Aria", "name": "Aria", "developer": "rhymes-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4773, "hfopenllm_v2/BBH": 0.5695, "hfopenllm_v2/MATH Level 5": 0.1934, "hfopenllm_v2/GPQA": 0.3624, "hfopenllm_v2/MUSR": 0.4338, "hfopenllm_v2/MMLU-PRO": 0.4405 } }, { "id": "rhysjones/phi-2-orange-v2", "name": "phi-2-orange-v2", "developer": "rhysjones", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.367, "hfopenllm_v2/BBH": 0.477, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.363, "hfopenllm_v2/MMLU-PRO": 0.2532 } }, { "id": "riaz/FineLlama-3.1-8B", "name": "FineLlama-3.1-8B", "developer": "riaz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4137, "hfopenllm_v2/BBH": 0.4565, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3776, "hfopenllm_v2/MMLU-PRO": 0.2978 } }, { "id": "RLHFlow/ArmoRM-Llama3-8B-v0.1", "name": "RLHFlow/ArmoRM-Llama3-8B-v0.1", "developer": "RLHFlow", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1897, "hfopenllm_v2/BBH": 0.2876, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.3948, "hfopenllm_v2/MMLU-PRO": 0.1078, "reward-bench/Score": 0.886, "reward-bench/Factuality": 0.6568, "reward-bench/Precise IF": 0.4188, "reward-bench/Math": 0.6612, "reward-bench/Safety": 0.9054, "reward-bench/Focus": 0.7657, "reward-bench/Ties": 0.6629, "reward-bench/Chat": 0.9693, "reward-bench/Chat Hard": 0.7675, "reward-bench/Reasoning": 0.9735, "reward-bench/Prior Sets (0.5 weight)": 0.7429 } }, { "id": "RLHFlow/LLaMA3-iterative-DPO-final", "name": "RLHFlow/LLaMA3-iterative-DPO-final", "developer": "RLHFlow", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.534, "hfopenllm_v2/BBH": 0.5058, "hfopenllm_v2/MATH Level 5": 0.0884, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3673, "hfopenllm_v2/MMLU-PRO": 0.3257, "reward-bench/Score": 0.6783, "reward-bench/Chat": 0.838, "reward-bench/Chat Hard": 0.5921, "reward-bench/Safety": 0.7865, "reward-bench/Reasoning": 0.6161, "reward-bench/Prior Sets (0.5 weight)": 0.4392 } }, { "id": "RLHFlow/pair-preference-model-LLaMA3-8B", "name": "RLHFlow/pair-preference-model-LLaMA3-8B", "developer": "RLHFlow", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8575, "reward-bench/Chat": 0.9832, "reward-bench/Chat Hard": 0.6579, "reward-bench/Safety": 0.8973, "reward-bench/Reasoning": 0.9473, "reward-bench/Prior Sets (0.5 weight)": 0.7458 } }, { "id": "RLHFlow/RewardModel-Mistral-7B-for-DPA-v1", "name": "RLHFlow/RewardModel-Mistral-7B-for-DPA-v1", "developer": "RLHFlow", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6633, "reward-bench/Chat": 0.8799, "reward-bench/Chat Hard": 0.4978, "reward-bench/Safety": 0.7068, "reward-bench/Reasoning": 0.5971, "reward-bench/Prior Sets (0.5 weight)": 0.6068 } }, { "id": "rmdhirr/Gluon-8B", "name": "Gluon-8B", "developer": "rmdhirr", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5053, "hfopenllm_v2/BBH": 0.5153, "hfopenllm_v2/MATH Level 5": 0.1443, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4039, "hfopenllm_v2/MMLU-PRO": 0.3808 } }, { "id": "Ro-xe/FMixIA-7B-DARE-0", "name": "FMixIA-7B-DARE-0", "developer": "Ro-xe", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3341, "hfopenllm_v2/BBH": 0.5035, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4545, "hfopenllm_v2/MMLU-PRO": 0.3016 } }, { "id": "Ro-xe/FMixIA-7B-SLERP-27", "name": "FMixIA-7B-SLERP-27", "developer": "Ro-xe", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3765, "hfopenllm_v2/BBH": 0.5151, "hfopenllm_v2/MATH Level 5": 0.0634, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4412, "hfopenllm_v2/MMLU-PRO": 0.3008 } }, { "id": "Ro-xe/FMixIA-7B-TIES-1", "name": "FMixIA-7B-TIES-1", "developer": "Ro-xe", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3453, "hfopenllm_v2/BBH": 0.5092, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4689, "hfopenllm_v2/MMLU-PRO": 0.2992 } }, { "id": "Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9", "name": "FMixIA-FrankenMerge-9.5B-PT-9", "developer": "Ro-xe", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.194, "hfopenllm_v2/BBH": 0.5088, "hfopenllm_v2/MATH Level 5": 0.003, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.417, "hfopenllm_v2/MMLU-PRO": 0.3657 } }, { "id": "Rombo-Org/Rombo-LLM-V2.5-Qwen-7b", "name": "Rombo-LLM-V2.5-Qwen-7b", "developer": "Rombo-Org", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7482, "hfopenllm_v2/BBH": 0.54, "hfopenllm_v2/MATH Level 5": 0.5068, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.398, "hfopenllm_v2/MMLU-PRO": 0.4283 } }, { "id": "rombodawg/Rombos-Coder-V2.5-Qwen-14b", "name": "Rombos-Coder-V2.5-Qwen-14b", "developer": "rombodawg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7047, "hfopenllm_v2/BBH": 0.6165, "hfopenllm_v2/MATH Level 5": 0.3301, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.3915, "hfopenllm_v2/MMLU-PRO": 0.3939 } }, { "id": "rombodawg/Rombos-Coder-V2.5-Qwen-7b", "name": "Rombos-Coder-V2.5-Qwen-7b", "developer": "rombodawg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.621, "hfopenllm_v2/BBH": 0.5077, "hfopenllm_v2/MATH Level 5": 0.3338, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3979, "hfopenllm_v2/MMLU-PRO": 0.3398 } }, { "id": "rombodawg/Rombos-LLM-V2.5-Qwen-0.5b", "name": "Rombos-LLM-V2.5-Qwen-0.5b", "developer": "rombodawg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2847, "hfopenllm_v2/BBH": 0.3294, "hfopenllm_v2/MATH Level 5": 0.068, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3236, "hfopenllm_v2/MMLU-PRO": 0.1866 } }, { "id": "rombodawg/Rombos-LLM-V2.5-Qwen-1.5b", "name": "Rombos-LLM-V2.5-Qwen-1.5b", "developer": "rombodawg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3402, "hfopenllm_v2/BBH": 0.4257, "hfopenllm_v2/MATH Level 5": 0.0853, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4186, "hfopenllm_v2/MMLU-PRO": 0.2922 } }, { "id": "rombodawg/Rombos-LLM-V2.5-Qwen-14b", "name": "Rombos-LLM-V2.5-Qwen-14b", "developer": "rombodawg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.584, "hfopenllm_v2/BBH": 0.6481, "hfopenllm_v2/MATH Level 5": 0.4554, "hfopenllm_v2/GPQA": 0.3716, "hfopenllm_v2/MUSR": 0.4717, "hfopenllm_v2/MMLU-PRO": 0.5376 } }, { "id": "rombodawg/Rombos-LLM-V2.5-Qwen-32b", "name": "Rombos-LLM-V2.5-Qwen-32b", "developer": "rombodawg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6827, "hfopenllm_v2/BBH": 0.7046, "hfopenllm_v2/MATH Level 5": 0.4955, "hfopenllm_v2/GPQA": 0.3968, "hfopenllm_v2/MUSR": 0.5034, "hfopenllm_v2/MMLU-PRO": 0.5916 } }, { "id": "rombodawg/Rombos-LLM-V2.5-Qwen-3b", "name": "Rombos-LLM-V2.5-Qwen-3b", "developer": "rombodawg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5342, "hfopenllm_v2/BBH": 0.4809, "hfopenllm_v2/MATH Level 5": 0.2795, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4042, "hfopenllm_v2/MMLU-PRO": 0.3761 } }, { "id": "rombodawg/Rombos-LLM-V2.5-Qwen-72b", "name": "Rombos-LLM-V2.5-Qwen-72b", "developer": "rombodawg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7155, "hfopenllm_v2/BBH": 0.723, "hfopenllm_v2/MATH Level 5": 0.5423, "hfopenllm_v2/GPQA": 0.3985, "hfopenllm_v2/MUSR": 0.4599, "hfopenllm_v2/MMLU-PRO": 0.5935 } }, { "id": "rombodawg/Rombos-LLM-V2.5-Qwen-7b", "name": "Rombos-LLM-V2.5-Qwen-7b", "developer": "rombodawg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6237, "hfopenllm_v2/BBH": 0.5544, "hfopenllm_v2/MATH Level 5": 0.3814, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.4291, "hfopenllm_v2/MMLU-PRO": 0.4469 } }, { "id": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b", "name": "Rombos-LLM-V2.5.1-Qwen-3b", "developer": "rombodawg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2566, "hfopenllm_v2/BBH": 0.39, "hfopenllm_v2/MATH Level 5": 0.1208, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3991, "hfopenllm_v2/MMLU-PRO": 0.2741 } }, { "id": "rombodawg/Rombos-LLM-V2.6-Nemotron-70b", "name": "Rombos-LLM-V2.6-Nemotron-70b", "developer": "rombodawg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7527, "hfopenllm_v2/BBH": 0.6938, "hfopenllm_v2/MATH Level 5": 0.3331, "hfopenllm_v2/GPQA": 0.406, "hfopenllm_v2/MUSR": 0.4669, "hfopenllm_v2/MMLU-PRO": 0.5329 } }, { "id": "rombodawg/Rombos-LLM-V2.6-Qwen-14b", "name": "Rombos-LLM-V2.6-Qwen-14b", "developer": "rombodawg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8432, "hfopenllm_v2/BBH": 0.6442, "hfopenllm_v2/MATH Level 5": 0.5211, "hfopenllm_v2/GPQA": 0.3339, "hfopenllm_v2/MUSR": 0.4221, "hfopenllm_v2/MMLU-PRO": 0.4961 } }, { "id": "rombodawg/rombos_Replete-Coder-Instruct-8b-Merged", "name": "rombos_Replete-Coder-Instruct-8b-Merged", "developer": "rombodawg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5388, "hfopenllm_v2/BBH": 0.4462, "hfopenllm_v2/MATH Level 5": 0.0778, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.366, "hfopenllm_v2/MMLU-PRO": 0.1809 } }, { "id": "rombodawg/rombos_Replete-Coder-Llama3-8B", "name": "rombos_Replete-Coder-Llama3-8B", "developer": "rombodawg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4714, "hfopenllm_v2/BBH": 0.3276, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3966, "hfopenllm_v2/MMLU-PRO": 0.1335 } }, { "id": "rootxhacker/Apollo-70B", "name": "Apollo-70B", "developer": "rootxhacker", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5099, "hfopenllm_v2/BBH": 0.6804, "hfopenllm_v2/MATH Level 5": 0.5612, "hfopenllm_v2/GPQA": 0.4572, "hfopenllm_v2/MUSR": 0.4948, "hfopenllm_v2/MMLU-PRO": 0.5279 } }, { "id": "rootxhacker/apollo-7B", "name": "apollo-7B", "developer": "rootxhacker", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2953, "hfopenllm_v2/BBH": 0.3636, "hfopenllm_v2/MATH Level 5": 0.0257, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.4131, "hfopenllm_v2/MMLU-PRO": 0.1748 } }, { "id": "rootxhacker/Apollo_v2-32B", "name": "Apollo_v2-32B", "developer": "rootxhacker", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.428, "hfopenllm_v2/BBH": 0.7072, "hfopenllm_v2/MATH Level 5": 0.4275, "hfopenllm_v2/GPQA": 0.3784, "hfopenllm_v2/MUSR": 0.4994, "hfopenllm_v2/MMLU-PRO": 0.5869 } }, { "id": "rsh345/mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B", "name": "mistral-ft-optimized-1218-NeuralHermes-2.5-Mistral-7B", "developer": "rsh345", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3892, "hfopenllm_v2/BBH": 0.5188, "hfopenllm_v2/MATH Level 5": 0.0733, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4672, "hfopenllm_v2/MMLU-PRO": 0.3054 } }, { "id": "rubenroy/Geneva-12B-GCv2-5m", "name": "Geneva-12B-GCv2-5m", "developer": "rubenroy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2586, "hfopenllm_v2/BBH": 0.5278, "hfopenllm_v2/MATH Level 5": 0.0801, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3525, "hfopenllm_v2/MMLU-PRO": 0.325 } }, { "id": "rubenroy/Gilgamesh-72B", "name": "Gilgamesh-72B", "developer": "rubenroy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8486, "hfopenllm_v2/BBH": 0.7253, "hfopenllm_v2/MATH Level 5": 0.4381, "hfopenllm_v2/GPQA": 0.3943, "hfopenllm_v2/MUSR": 0.4626, "hfopenllm_v2/MMLU-PRO": 0.5802 } }, { "id": "rubenroy/Zurich-14B-GCv2-5m", "name": "Zurich-14B-GCv2-5m", "developer": "rubenroy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6164, "hfopenllm_v2/BBH": 0.6308, "hfopenllm_v2/MATH Level 5": 0.3074, "hfopenllm_v2/GPQA": 0.3616, "hfopenllm_v2/MUSR": 0.4874, "hfopenllm_v2/MMLU-PRO": 0.5233 } }, { "id": "RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2", "name": "LogoS-7Bx2-MoE-13B-v0.2", "developer": "RubielLabarta", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4379, "hfopenllm_v2/BBH": 0.5207, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4226, "hfopenllm_v2/MMLU-PRO": 0.3088 } }, { "id": "ruizhe1217/sft-s1-qwen-0.5b", "name": "sft-s1-qwen-0.5b", "developer": "ruizhe1217", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2749, "hfopenllm_v2/BBH": 0.3301, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3196, "hfopenllm_v2/MMLU-PRO": 0.1892 } }, { "id": "rwitz/go-bruins-v2", "name": "go-bruins-v2", "developer": "rwitz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4096, "hfopenllm_v2/BBH": 0.3799, "hfopenllm_v2/MATH Level 5": 0.0672, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.4138, "hfopenllm_v2/MMLU-PRO": 0.2761 } }, { "id": "RWKV/rwkv-raven-14b", "name": "rwkv-raven-14b", "developer": "RWKV", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0768, "hfopenllm_v2/BBH": 0.3307, "hfopenllm_v2/MATH Level 5": 0.0045, "hfopenllm_v2/GPQA": 0.229, "hfopenllm_v2/MUSR": 0.3951, "hfopenllm_v2/MMLU-PRO": 0.115 } }, { "id": "sabersaleh/Llama2-7B-CPO", "name": "Llama2-7B-CPO", "developer": "sabersaleh", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1545, "hfopenllm_v2/BBH": 0.3458, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.4048, "hfopenllm_v2/MMLU-PRO": 0.1606 } }, { "id": "sabersaleh/Llama2-7B-DPO", "name": "Llama2-7B-DPO", "developer": "sabersaleh", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1453, "hfopenllm_v2/BBH": 0.3512, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.4114, "hfopenllm_v2/MMLU-PRO": 0.1626 } }, { "id": "sabersaleh/Llama2-7B-IPO", "name": "Llama2-7B-IPO", "developer": "sabersaleh", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1769, "hfopenllm_v2/BBH": 0.3475, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.4048, "hfopenllm_v2/MMLU-PRO": 0.1617 } }, { "id": "sabersaleh/Llama2-7B-KTO", "name": "Llama2-7B-KTO", "developer": "sabersaleh", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1528, "hfopenllm_v2/BBH": 0.3501, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.4167, "hfopenllm_v2/MMLU-PRO": 0.1636 } }, { "id": "sabersaleh/Llama2-7B-SimPO", "name": "Llama2-7B-SimPO", "developer": "sabersaleh", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1659, "hfopenllm_v2/BBH": 0.3489, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.4007, "hfopenllm_v2/MMLU-PRO": 0.1641 } }, { "id": "sabersaleh/Llama2-7B-SPO", "name": "Llama2-7B-SPO", "developer": "sabersaleh", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1567, "hfopenllm_v2/BBH": 0.3383, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.3874, "hfopenllm_v2/MMLU-PRO": 0.1757 } }, { "id": "sabersaleh/Llama3", "name": "Llama3", "developer": "sabersaleh", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3321, "hfopenllm_v2/BBH": 0.4782, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.3933, "hfopenllm_v2/MMLU-PRO": 0.3162 } }, { "id": "sabersalehk/Llama3-001-300", "name": "Llama3-001-300", "developer": "sabersalehk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3179, "hfopenllm_v2/BBH": 0.4745, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4064, "hfopenllm_v2/MMLU-PRO": 0.3158 } }, { "id": "sabersalehk/Llama3-SimPO", "name": "Llama3-SimPO", "developer": "sabersalehk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3642, "hfopenllm_v2/BBH": 0.4874, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4046, "hfopenllm_v2/MMLU-PRO": 0.3157 } }, { "id": "sabersalehk/Llama3_001_200", "name": "Llama3_001_200", "developer": "sabersalehk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3218, "hfopenllm_v2/BBH": 0.4728, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4037, "hfopenllm_v2/MMLU-PRO": 0.3183 } }, { "id": "sabersalehk/Llama3_01_300", "name": "Llama3_01_300", "developer": "sabersalehk", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2959, "hfopenllm_v2/BBH": 0.4691, "hfopenllm_v2/MATH Level 5": 0.0498, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4065, "hfopenllm_v2/MMLU-PRO": 0.3124 } }, { "id": "SaisExperiments/Evil-Alpaca-3B-L3.2", "name": "Evil-Alpaca-3B-L3.2", "developer": "SaisExperiments", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3251, "hfopenllm_v2/BBH": 0.4341, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.4198, "hfopenllm_v2/MMLU-PRO": 0.2621 } }, { "id": "SaisExperiments/Gemma-2-2B-Opus-Instruct", "name": "Gemma-2-2B-Opus-Instruct", "developer": "SaisExperiments", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.475, "hfopenllm_v2/BBH": 0.4293, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4057, "hfopenllm_v2/MMLU-PRO": 0.265 } }, { "id": "SaisExperiments/Gemma-2-2B-Stheno-Filtered", "name": "Gemma-2-2B-Stheno-Filtered", "developer": "SaisExperiments", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4197, "hfopenllm_v2/BBH": 0.4149, "hfopenllm_v2/MATH Level 5": 0.0461, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.4003, "hfopenllm_v2/MMLU-PRO": 0.263 } }, { "id": "SaisExperiments/Not-So-Small-Alpaca-24B", "name": "Not-So-Small-Alpaca-24B", "developer": "SaisExperiments", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6244, "hfopenllm_v2/BBH": 0.5339, "hfopenllm_v2/MATH Level 5": 0.1828, "hfopenllm_v2/GPQA": 0.3591, "hfopenllm_v2/MUSR": 0.4282, "hfopenllm_v2/MMLU-PRO": 0.3694 } }, { "id": "SaisExperiments/QwOwO-7B-V1", "name": "QwOwO-7B-V1", "developer": "SaisExperiments", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4556, "hfopenllm_v2/BBH": 0.5431, "hfopenllm_v2/MATH Level 5": 0.386, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3835, "hfopenllm_v2/MMLU-PRO": 0.4224 } }, { "id": "SaisExperiments/RightSheep-Llama3.2-3B", "name": "RightSheep-Llama3.2-3B", "developer": "SaisExperiments", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4156, "hfopenllm_v2/BBH": 0.4241, "hfopenllm_v2/MATH Level 5": 0.0808, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.3767, "hfopenllm_v2/MMLU-PRO": 0.254 } }, { "id": "saishf/Fimbulvetr-Kuro-Lotus-10.7B", "name": "Fimbulvetr-Kuro-Lotus-10.7B", "developer": "saishf", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4939, "hfopenllm_v2/BBH": 0.4342, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4445, "hfopenllm_v2/MMLU-PRO": 0.3389 } }, { "id": "saishf/Neural-SOVLish-Devil-8B-L3", "name": "Neural-SOVLish-Devil-8B-L3", "developer": "saishf", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4199, "hfopenllm_v2/BBH": 0.5142, "hfopenllm_v2/MATH Level 5": 0.0891, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.411, "hfopenllm_v2/MMLU-PRO": 0.3807 } }, { "id": "saishshinde15/TethysAI_Base_Reasoning", "name": "TethysAI_Base_Reasoning", "developer": "saishshinde15", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6369, "hfopenllm_v2/BBH": 0.4519, "hfopenllm_v2/MATH Level 5": 0.3142, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4075, "hfopenllm_v2/MMLU-PRO": 0.3236 } }, { "id": "saishshinde15/TethysAI_Vortex", "name": "TethysAI_Vortex", "developer": "saishshinde15", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4298, "hfopenllm_v2/BBH": 0.4749, "hfopenllm_v2/MATH Level 5": 0.315, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4458, "hfopenllm_v2/MMLU-PRO": 0.3241 } }, { "id": "saishshinde15/TethysAI_Vortex_Reasoning", "name": "TethysAI_Vortex_Reasoning", "developer": "saishshinde15", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4021, "hfopenllm_v2/BBH": 0.4694, "hfopenllm_v2/MATH Level 5": 0.2145, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4084, "hfopenllm_v2/MMLU-PRO": 0.3381 } }, { "id": "sakaltcommunity/novablast-preview", "name": "novablast-preview", "developer": "sakaltcommunity", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.453, "hfopenllm_v2/BBH": 0.7043, "hfopenllm_v2/MATH Level 5": 0.4894, "hfopenllm_v2/GPQA": 0.3817, "hfopenllm_v2/MUSR": 0.5021, "hfopenllm_v2/MMLU-PRO": 0.5915 } }, { "id": "sakaltcommunity/sakaltum-7b", "name": "sakaltum-7b", "developer": "sakaltcommunity", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2604, "hfopenllm_v2/BBH": 0.4575, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3775, "hfopenllm_v2/MMLU-PRO": 0.2769 } }, { "id": "Sakalti/Anemoi-3B", "name": "Anemoi-3B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3804, "hfopenllm_v2/BBH": 0.4922, "hfopenllm_v2/MATH Level 5": 0.1775, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4371, "hfopenllm_v2/MMLU-PRO": 0.3766 } }, { "id": "Sakalti/Euphrates-14B", "name": "Euphrates-14B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2647, "hfopenllm_v2/BBH": 0.6138, "hfopenllm_v2/MATH Level 5": 0.3051, "hfopenllm_v2/GPQA": 0.3935, "hfopenllm_v2/MUSR": 0.4516, "hfopenllm_v2/MMLU-PRO": 0.5255 } }, { "id": "Sakalti/light-1.1-3B", "name": "light-1.1-3B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2735, "hfopenllm_v2/BBH": 0.2803, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3901, "hfopenllm_v2/MMLU-PRO": 0.1209 } }, { "id": "Sakalti/light-3B", "name": "light-3B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5337, "hfopenllm_v2/BBH": 0.4831, "hfopenllm_v2/MATH Level 5": 0.2591, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4015, "hfopenllm_v2/MMLU-PRO": 0.3775 } }, { "id": "Sakalti/light-3b-beta", "name": "light-3b-beta", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5485, "hfopenllm_v2/BBH": 0.4815, "hfopenllm_v2/MATH Level 5": 0.2772, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4015, "hfopenllm_v2/MMLU-PRO": 0.3758 } }, { "id": "Sakalti/light-7b-beta", "name": "light-7b-beta", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6234, "hfopenllm_v2/BBH": 0.5548, "hfopenllm_v2/MATH Level 5": 0.3769, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4291, "hfopenllm_v2/MMLU-PRO": 0.4456 } }, { "id": "Sakalti/llama-3-yanyuedao-8b-instruct", "name": "llama-3-yanyuedao-8b-instruct", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2186, "hfopenllm_v2/BBH": 0.435, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4199, "hfopenllm_v2/MMLU-PRO": 0.2911 } }, { "id": "Sakalti/Llama3.2-3B-Uranus-1", "name": "Llama3.2-3B-Uranus-1", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5335, "hfopenllm_v2/BBH": 0.4437, "hfopenllm_v2/MATH Level 5": 0.1495, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.3669, "hfopenllm_v2/MMLU-PRO": 0.3094 } }, { "id": "Sakalti/magro-7B", "name": "magro-7B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1344, "hfopenllm_v2/BBH": 0.4186, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.446, "hfopenllm_v2/MMLU-PRO": 0.2765 } }, { "id": "Sakalti/Magro-7B-v1.1", "name": "Magro-7B-v1.1", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1204, "hfopenllm_v2/BBH": 0.4179, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4433, "hfopenllm_v2/MMLU-PRO": 0.2764 } }, { "id": "Sakalti/mergekit-01", "name": "mergekit-01", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6234, "hfopenllm_v2/BBH": 0.5548, "hfopenllm_v2/MATH Level 5": 0.3769, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4291, "hfopenllm_v2/MMLU-PRO": 0.4456 } }, { "id": "Sakalti/mergekit-della_linear-vmeykci", "name": "mergekit-della_linear-vmeykci", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1126, "hfopenllm_v2/BBH": 0.2816, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3897, "hfopenllm_v2/MMLU-PRO": 0.1089 } }, { "id": "Sakalti/model-3", "name": "model-3", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6264, "hfopenllm_v2/BBH": 0.5542, "hfopenllm_v2/MATH Level 5": 0.3708, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4264, "hfopenllm_v2/MMLU-PRO": 0.4455 } }, { "id": "Sakalti/Neptuno-3B", "name": "Neptuno-3B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4296, "hfopenllm_v2/BBH": 0.4834, "hfopenllm_v2/MATH Level 5": 0.2553, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4002, "hfopenllm_v2/MMLU-PRO": 0.3773 } }, { "id": "Sakalti/Neptuno-Alpha", "name": "Neptuno-Alpha", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.378, "hfopenllm_v2/BBH": 0.4925, "hfopenllm_v2/MATH Level 5": 0.1835, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4371, "hfopenllm_v2/MMLU-PRO": 0.3767 } }, { "id": "Sakalti/Oxyge1-33B", "name": "Oxyge1-33B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4548, "hfopenllm_v2/BBH": 0.7033, "hfopenllm_v2/MATH Level 5": 0.4962, "hfopenllm_v2/GPQA": 0.3826, "hfopenllm_v2/MUSR": 0.5008, "hfopenllm_v2/MMLU-PRO": 0.5909 } }, { "id": "Sakalti/Phi3.5-Comets-3.8B", "name": "Phi3.5-Comets-3.8B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2094, "hfopenllm_v2/BBH": 0.3335, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.3764, "hfopenllm_v2/MMLU-PRO": 0.1153 } }, { "id": "Sakalti/Qwen2.5-1B-Instruct", "name": "Qwen2.5-1B-Instruct", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1751, "hfopenllm_v2/BBH": 0.3027, "hfopenllm_v2/MATH Level 5": 0.006, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3369, "hfopenllm_v2/MMLU-PRO": 0.1213 } }, { "id": "Sakalti/qwen2.5-2.3B", "name": "qwen2.5-2.3B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1288, "hfopenllm_v2/BBH": 0.2849, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3857, "hfopenllm_v2/MMLU-PRO": 0.1173 } }, { "id": "Sakalti/QwenTest-7", "name": "QwenTest-7", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1672, "hfopenllm_v2/BBH": 0.3063, "hfopenllm_v2/MATH Level 5": 0.0038, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3422, "hfopenllm_v2/MMLU-PRO": 0.1212 } }, { "id": "Sakalti/Saba-Passthrough-2", "name": "Saba-Passthrough-2", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1691, "hfopenllm_v2/BBH": 0.3672, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3844, "hfopenllm_v2/MMLU-PRO": 0.2077 } }, { "id": "Sakalti/Saba1-1.8B", "name": "Saba1-1.8B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3333, "hfopenllm_v2/BBH": 0.4147, "hfopenllm_v2/MATH Level 5": 0.1541, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4239, "hfopenllm_v2/MMLU-PRO": 0.2926 } }, { "id": "Sakalti/Saba1-7B", "name": "Saba1-7B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4585, "hfopenllm_v2/BBH": 0.5489, "hfopenllm_v2/MATH Level 5": 0.3663, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4793, "hfopenllm_v2/MMLU-PRO": 0.4376 } }, { "id": "Sakalti/Saba1.5-1.5B", "name": "Saba1.5-1.5B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3333, "hfopenllm_v2/BBH": 0.4147, "hfopenllm_v2/MATH Level 5": 0.1541, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4239, "hfopenllm_v2/MMLU-PRO": 0.2926 } }, { "id": "Sakalti/Saba1.5-Pro-3B", "name": "Saba1.5-Pro-3B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2386, "hfopenllm_v2/BBH": 0.3623, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.4405, "hfopenllm_v2/MMLU-PRO": 0.1958 } }, { "id": "Sakalti/Saba2-14B-Preview", "name": "Saba2-14B-Preview", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4722, "hfopenllm_v2/BBH": 0.6496, "hfopenllm_v2/MATH Level 5": 0.3127, "hfopenllm_v2/GPQA": 0.3826, "hfopenllm_v2/MUSR": 0.4781, "hfopenllm_v2/MMLU-PRO": 0.5384 } }, { "id": "Sakalti/Saba2-3B", "name": "Saba2-3B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2865, "hfopenllm_v2/BBH": 0.2801, "hfopenllm_v2/MATH Level 5": 0.006, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3927, "hfopenllm_v2/MMLU-PRO": 0.121 } }, { "id": "Sakalti/Sailor-japanese", "name": "Sailor-japanese", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1605, "hfopenllm_v2/BBH": 0.2913, "hfopenllm_v2/MATH Level 5": 0.003, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3912, "hfopenllm_v2/MMLU-PRO": 0.1164 } }, { "id": "Sakalti/Saka-1.5B", "name": "Saka-1.5B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2726, "hfopenllm_v2/BBH": 0.3988, "hfopenllm_v2/MATH Level 5": 0.0801, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3739, "hfopenllm_v2/MMLU-PRO": 0.2415 } }, { "id": "Sakalti/Saka-14B", "name": "Saka-14B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7174, "hfopenllm_v2/BBH": 0.6497, "hfopenllm_v2/MATH Level 5": 0.4094, "hfopenllm_v2/GPQA": 0.396, "hfopenllm_v2/MUSR": 0.4886, "hfopenllm_v2/MMLU-PRO": 0.5396 } }, { "id": "Sakalti/Saka-24B", "name": "Saka-24B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3819, "hfopenllm_v2/BBH": 0.6072, "hfopenllm_v2/MATH Level 5": 0.1805, "hfopenllm_v2/GPQA": 0.3423, "hfopenllm_v2/MUSR": 0.4541, "hfopenllm_v2/MMLU-PRO": 0.4766 } }, { "id": "Sakalti/Saka-7.2B", "name": "Saka-7.2B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1545, "hfopenllm_v2/BBH": 0.2945, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2391, "hfopenllm_v2/MUSR": 0.3711, "hfopenllm_v2/MMLU-PRO": 0.116 } }, { "id": "Sakalti/Saka-7.6B", "name": "Saka-7.6B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4524, "hfopenllm_v2/BBH": 0.5655, "hfopenllm_v2/MATH Level 5": 0.3255, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4489, "hfopenllm_v2/MMLU-PRO": 0.454 } }, { "id": "Sakalti/SakalFusion-7B-Alpha", "name": "SakalFusion-7B-Alpha", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.529, "hfopenllm_v2/BBH": 0.5591, "hfopenllm_v2/MATH Level 5": 0.3844, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.4581, "hfopenllm_v2/MMLU-PRO": 0.4474 } }, { "id": "Sakalti/SakalFusion-7B-Beta", "name": "SakalFusion-7B-Beta", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1809, "hfopenllm_v2/BBH": 0.2881, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2433, "hfopenllm_v2/MUSR": 0.3872, "hfopenllm_v2/MMLU-PRO": 0.109 } }, { "id": "Sakalti/SakaMoe-3x1.6B-Instruct", "name": "SakaMoe-3x1.6B-Instruct", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2371, "hfopenllm_v2/BBH": 0.3282, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3342, "hfopenllm_v2/MMLU-PRO": 0.1882 } }, { "id": "Sakalti/SJT-0.5B", "name": "SJT-0.5B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2425, "hfopenllm_v2/BBH": 0.3306, "hfopenllm_v2/MATH Level 5": 0.0521, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3196, "hfopenllm_v2/MMLU-PRO": 0.1891 } }, { "id": "Sakalti/SJT-1.5B-Alpha", "name": "SJT-1.5B-Alpha", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3449, "hfopenllm_v2/BBH": 0.4241, "hfopenllm_v2/MATH Level 5": 0.0997, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4226, "hfopenllm_v2/MMLU-PRO": 0.2961 } }, { "id": "Sakalti/SJT-1.5B-Alpha-1.1", "name": "SJT-1.5B-Alpha-1.1", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3439, "hfopenllm_v2/BBH": 0.4243, "hfopenllm_v2/MATH Level 5": 0.0959, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4239, "hfopenllm_v2/MMLU-PRO": 0.2966 } }, { "id": "Sakalti/SJT-1.7B", "name": "SJT-1.7B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1776, "hfopenllm_v2/BBH": 0.2934, "hfopenllm_v2/MATH Level 5": 0.0015, "hfopenllm_v2/GPQA": 0.2416, "hfopenllm_v2/MUSR": 0.3964, "hfopenllm_v2/MMLU-PRO": 0.1133 } }, { "id": "Sakalti/SJT-14B", "name": "SJT-14B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5494, "hfopenllm_v2/BBH": 0.6536, "hfopenllm_v2/MATH Level 5": 0.3844, "hfopenllm_v2/GPQA": 0.3867, "hfopenllm_v2/MUSR": 0.4766, "hfopenllm_v2/MMLU-PRO": 0.5381 } }, { "id": "Sakalti/SJT-2.4B", "name": "SJT-2.4B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2804, "hfopenllm_v2/BBH": 0.349, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3699, "hfopenllm_v2/MMLU-PRO": 0.1858 } }, { "id": "Sakalti/SJT-24B-Alpha", "name": "SJT-24B-Alpha", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3206, "hfopenllm_v2/BBH": 0.6081, "hfopenllm_v2/MATH Level 5": 0.253, "hfopenllm_v2/GPQA": 0.3809, "hfopenllm_v2/MUSR": 0.4595, "hfopenllm_v2/MMLU-PRO": 0.4857 } }, { "id": "Sakalti/SJT-2B", "name": "SJT-2B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2151, "hfopenllm_v2/BBH": 0.2936, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2416, "hfopenllm_v2/MUSR": 0.3564, "hfopenllm_v2/MMLU-PRO": 0.1187 } }, { "id": "Sakalti/SJT-2B-V1.1", "name": "SJT-2B-V1.1", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3977, "hfopenllm_v2/BBH": 0.3984, "hfopenllm_v2/MATH Level 5": 0.0483, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.4299, "hfopenllm_v2/MMLU-PRO": 0.2124 } }, { "id": "Sakalti/SJT-3.7B", "name": "SJT-3.7B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1078, "hfopenllm_v2/BBH": 0.3393, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3617, "hfopenllm_v2/MMLU-PRO": 0.1505 } }, { "id": "Sakalti/SJT-4B", "name": "SJT-4B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4077, "hfopenllm_v2/BBH": 0.4886, "hfopenllm_v2/MATH Level 5": 0.1156, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.478, "hfopenllm_v2/MMLU-PRO": 0.3281 } }, { "id": "Sakalti/SJT-7.5B", "name": "SJT-7.5B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4223, "hfopenllm_v2/BBH": 0.5367, "hfopenllm_v2/MATH Level 5": 0.2168, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.4399, "hfopenllm_v2/MMLU-PRO": 0.3951 } }, { "id": "Sakalti/SJT-7B-V1.1", "name": "SJT-7B-V1.1", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4703, "hfopenllm_v2/BBH": 0.5419, "hfopenllm_v2/MATH Level 5": 0.2432, "hfopenllm_v2/GPQA": 0.3339, "hfopenllm_v2/MUSR": 0.4411, "hfopenllm_v2/MMLU-PRO": 0.4412 } }, { "id": "Sakalti/SJT-7B-V1.1-Multilingal", "name": "SJT-7B-V1.1-Multilingal", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1949, "hfopenllm_v2/BBH": 0.292, "hfopenllm_v2/MATH Level 5": 0.0045, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3621, "hfopenllm_v2/MMLU-PRO": 0.1137 } }, { "id": "Sakalti/SJT-8B", "name": "SJT-8B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6535, "hfopenllm_v2/BBH": 0.5282, "hfopenllm_v2/MATH Level 5": 0.2538, "hfopenllm_v2/GPQA": 0.3297, "hfopenllm_v2/MUSR": 0.408, "hfopenllm_v2/MMLU-PRO": 0.4266 } }, { "id": "Sakalti/SJT-8B-V1.1", "name": "SJT-8B-V1.1", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4621, "hfopenllm_v2/BBH": 0.5121, "hfopenllm_v2/MATH Level 5": 0.2069, "hfopenllm_v2/GPQA": 0.3364, "hfopenllm_v2/MUSR": 0.4266, "hfopenllm_v2/MMLU-PRO": 0.4231 } }, { "id": "Sakalti/SJT-900M", "name": "SJT-900M", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.241, "hfopenllm_v2/BBH": 0.3169, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3595, "hfopenllm_v2/MMLU-PRO": 0.1142 } }, { "id": "Sakalti/SJT-Moe2x7.5B", "name": "SJT-Moe2x7.5B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4117, "hfopenllm_v2/BBH": 0.5371, "hfopenllm_v2/MATH Level 5": 0.2145, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.4399, "hfopenllm_v2/MMLU-PRO": 0.3954 } }, { "id": "Sakalti/SJTPass-2", "name": "SJTPass-2", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.24, "hfopenllm_v2/BBH": 0.3302, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3222, "hfopenllm_v2/MMLU-PRO": 0.1902 } }, { "id": "Sakalti/SJTPass-4", "name": "SJTPass-4", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1913, "hfopenllm_v2/BBH": 0.2964, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3898, "hfopenllm_v2/MMLU-PRO": 0.1083 } }, { "id": "Sakalti/SJTPass-5", "name": "SJTPass-5", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2425, "hfopenllm_v2/BBH": 0.3103, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3794, "hfopenllm_v2/MMLU-PRO": 0.1327 } }, { "id": "Sakalti/tara-3.8B", "name": "tara-3.8B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4077, "hfopenllm_v2/BBH": 0.4886, "hfopenllm_v2/MATH Level 5": 0.1156, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.478, "hfopenllm_v2/MMLU-PRO": 0.3281 } }, { "id": "Sakalti/Tara-3.8B-v1.1", "name": "Tara-3.8B-v1.1", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4062, "hfopenllm_v2/BBH": 0.4886, "hfopenllm_v2/MATH Level 5": 0.1156, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.478, "hfopenllm_v2/MMLU-PRO": 0.3281 } }, { "id": "Sakalti/ultiima-14B", "name": "ultiima-14B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5701, "hfopenllm_v2/BBH": 0.6491, "hfopenllm_v2/MATH Level 5": 0.4698, "hfopenllm_v2/GPQA": 0.3742, "hfopenllm_v2/MUSR": 0.4718, "hfopenllm_v2/MMLU-PRO": 0.5381 } }, { "id": "Sakalti/ultiima-14B-v0.2", "name": "ultiima-14B-v0.2", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.707, "hfopenllm_v2/BBH": 0.6472, "hfopenllm_v2/MATH Level 5": 0.3995, "hfopenllm_v2/GPQA": 0.3826, "hfopenllm_v2/MUSR": 0.4794, "hfopenllm_v2/MMLU-PRO": 0.5387 } }, { "id": "Sakalti/ultiima-14B-v0.3", "name": "ultiima-14B-v0.3", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.704, "hfopenllm_v2/BBH": 0.6398, "hfopenllm_v2/MATH Level 5": 0.3965, "hfopenllm_v2/GPQA": 0.3767, "hfopenllm_v2/MUSR": 0.4754, "hfopenllm_v2/MMLU-PRO": 0.5337 } }, { "id": "Sakalti/ultiima-14B-v0.4", "name": "ultiima-14B-v0.4", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3008, "hfopenllm_v2/BBH": 0.642, "hfopenllm_v2/MATH Level 5": 0.3535, "hfopenllm_v2/GPQA": 0.396, "hfopenllm_v2/MUSR": 0.4886, "hfopenllm_v2/MMLU-PRO": 0.5278 } }, { "id": "Sakalti/ultiima-32B", "name": "ultiima-32B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6854, "hfopenllm_v2/BBH": 0.7037, "hfopenllm_v2/MATH Level 5": 0.4962, "hfopenllm_v2/GPQA": 0.3809, "hfopenllm_v2/MUSR": 0.4995, "hfopenllm_v2/MMLU-PRO": 0.591 } }, { "id": "Sakalti/ultiima-72B", "name": "ultiima-72B", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.714, "hfopenllm_v2/BBH": 0.7218, "hfopenllm_v2/MATH Level 5": 0.5355, "hfopenllm_v2/GPQA": 0.4144, "hfopenllm_v2/MUSR": 0.4652, "hfopenllm_v2/MMLU-PRO": 0.5906 } }, { "id": "Sakalti/ultiima-72B-v1.5", "name": "ultiima-72B-v1.5", "developer": "Sakalti", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.655, "hfopenllm_v2/BBH": 0.7392, "hfopenllm_v2/MATH Level 5": 0.4396, "hfopenllm_v2/GPQA": 0.4136, "hfopenllm_v2/MUSR": 0.4691, "hfopenllm_v2/MMLU-PRO": 0.6054 } }, { "id": "sakhan10/quantized_open_llama_3b_v2", "name": "quantized_open_llama_3b_v2", "developer": "sakhan10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1872, "hfopenllm_v2/BBH": 0.302, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.3682, "hfopenllm_v2/MMLU-PRO": 0.1095 } }, { "id": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R", "name": "LLaMA-3-8B-SFR-Iterative-DPO-R", "developer": "Salesforce", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3816, "hfopenllm_v2/BBH": 0.5012, "hfopenllm_v2/MATH Level 5": 0.0914, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3633, "hfopenllm_v2/MMLU-PRO": 0.3172 } }, { "id": "Salesforce/SFR-LLaMa-3.1-70B-Judge-r", "name": "Salesforce/SFR-LLaMa-3.1-70B-Judge-r", "developer": "Salesforce", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9272, "reward-bench/Chat": 0.9693, "reward-bench/Chat Hard": 0.8476, "reward-bench/Safety": 0.9162, "reward-bench/Reasoning": 0.9757 } }, { "id": "Salesforce/SFR-LLaMa-3.1-8B-Judge-r", "name": "Salesforce/SFR-LLaMa-3.1-8B-Judge-r", "developer": "Salesforce", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8865, "reward-bench/Chat": 0.9553, "reward-bench/Chat Hard": 0.7774, "reward-bench/Safety": 0.8622, "reward-bench/Reasoning": 0.9513 } }, { "id": "Salesforce/SFR-nemo-12B-Judge-r", "name": "Salesforce/SFR-nemo-12B-Judge-r", "developer": "Salesforce", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9027, "reward-bench/Chat": 0.9721, "reward-bench/Chat Hard": 0.8224, "reward-bench/Safety": 0.8649, "reward-bench/Reasoning": 0.9513 } }, { "id": "salesforce/xlam-2-1b-fc-r-fc", "name": "xLAM-2-1b-fc-r (FC)", "developer": "salesforce", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 65.0, "bfcl/bfcl.overall.overall_accuracy": 30.44, "bfcl/bfcl.overall.total_cost_usd": 2.79, "bfcl/bfcl.overall.latency_mean_s": 2.84, "bfcl/bfcl.overall.latency_std_s": 2.35, "bfcl/bfcl.overall.latency_p95_s": 6.52, "bfcl/bfcl.non_live.ast_accuracy": 69.04, "bfcl/bfcl.non_live.simple_ast_accuracy": 64.17, "bfcl/bfcl.non_live.multiple_ast_accuracy": 82.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 73.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 56.0, "bfcl/bfcl.live.live_accuracy": 55.14, "bfcl/bfcl.live.live_simple_ast_accuracy": 68.22, "bfcl/bfcl.live.live_multiple_ast_accuracy": 52.8, "bfcl/bfcl.live.live_parallel_ast_accuracy": 43.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 25.0, "bfcl/bfcl.multi_turn.accuracy": 36.0, "bfcl/bfcl.multi_turn.base_accuracy": 45.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 36.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 37.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 25.5, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 3.87, "bfcl/bfcl.memory.kv_accuracy": 3.87, "bfcl/bfcl.memory.vector_accuracy": 3.87, "bfcl/bfcl.memory.recursive_summarization_accuracy": 3.87, "bfcl/bfcl.relevance.relevance_detection_accuracy": 87.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 64.47 } }, { "id": "salesforce/xlam-2-32b-fc-r-fc", "name": "xLAM-2-32b-fc-r (FC)", "developer": "salesforce", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 18.0, "bfcl/bfcl.overall.overall_accuracy": 54.66, "bfcl/bfcl.overall.total_cost_usd": 6.0, "bfcl/bfcl.overall.latency_mean_s": 6.94, "bfcl/bfcl.overall.latency_std_s": 8.21, "bfcl/bfcl.overall.latency_p95_s": 17.66, "bfcl/bfcl.non_live.ast_accuracy": 89.6, "bfcl/bfcl.non_live.simple_ast_accuracy": 80.42, "bfcl/bfcl.non_live.multiple_ast_accuracy": 94.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 93.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 91.0, "bfcl/bfcl.live.live_accuracy": 75.5, "bfcl/bfcl.live.live_simple_ast_accuracy": 82.17, "bfcl/bfcl.live.live_multiple_ast_accuracy": 74.64, "bfcl/bfcl.live.live_parallel_ast_accuracy": 50.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 58.33, "bfcl/bfcl.multi_turn.accuracy": 69.5, "bfcl/bfcl.multi_turn.base_accuracy": 81.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 72.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 67.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 56.5, "bfcl/bfcl.web_search.accuracy": 25.5, "bfcl/bfcl.web_search.base_accuracy": 37.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 14.0, "bfcl/bfcl.memory.accuracy": 20.86, "bfcl/bfcl.memory.kv_accuracy": 6.45, "bfcl/bfcl.memory.vector_accuracy": 10.32, "bfcl/bfcl.memory.recursive_summarization_accuracy": 45.81, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 80.23 } }, { "id": "salesforce/xlam-2-3b-fc-r-fc", "name": "xLAM-2-3b-fc-r (FC)", "developer": "salesforce", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 42.0, "bfcl/bfcl.overall.overall_accuracy": 41.22, "bfcl/bfcl.overall.total_cost_usd": 3.36, "bfcl/bfcl.overall.latency_mean_s": 3.8, "bfcl/bfcl.overall.latency_std_s": 3.59, "bfcl/bfcl.overall.latency_p95_s": 8.79, "bfcl/bfcl.non_live.ast_accuracy": 82.96, "bfcl/bfcl.non_live.simple_ast_accuracy": 75.33, "bfcl/bfcl.non_live.multiple_ast_accuracy": 91.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 86.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 79.0, "bfcl/bfcl.live.live_accuracy": 62.92, "bfcl/bfcl.live.live_simple_ast_accuracy": 73.26, "bfcl/bfcl.live.live_multiple_ast_accuracy": 60.68, "bfcl/bfcl.live.live_parallel_ast_accuracy": 62.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 50.0, "bfcl/bfcl.multi_turn.accuracy": 58.38, "bfcl/bfcl.multi_turn.base_accuracy": 71.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 59.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 57.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 45.5, "bfcl/bfcl.web_search.accuracy": 2.5, "bfcl/bfcl.web_search.base_accuracy": 3.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 2.0, "bfcl/bfcl.memory.accuracy": 11.4, "bfcl/bfcl.memory.kv_accuracy": 5.81, "bfcl/bfcl.memory.vector_accuracy": 5.81, "bfcl/bfcl.memory.recursive_summarization_accuracy": 22.58, "bfcl/bfcl.relevance.relevance_detection_accuracy": 87.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 63.45 } }, { "id": "salesforce/xlam-2-70b-fc-r-fc", "name": "xLAM-2-70b-fc-r (FC)", "developer": "salesforce", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 22.0, "bfcl/bfcl.overall.overall_accuracy": 53.07, "bfcl/bfcl.overall.total_cost_usd": 25.1, "bfcl/bfcl.overall.latency_mean_s": 28.06, "bfcl/bfcl.overall.latency_std_s": 68.77, "bfcl/bfcl.overall.latency_p95_s": 91.21, "bfcl/bfcl.non_live.ast_accuracy": 88.44, "bfcl/bfcl.non_live.simple_ast_accuracy": 78.25, "bfcl/bfcl.non_live.multiple_ast_accuracy": 94.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 92.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 89.5, "bfcl/bfcl.live.live_accuracy": 72.17, "bfcl/bfcl.live.live_simple_ast_accuracy": 77.91, "bfcl/bfcl.live.live_multiple_ast_accuracy": 71.13, "bfcl/bfcl.live.live_parallel_ast_accuracy": 68.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 58.33, "bfcl/bfcl.multi_turn.accuracy": 77.38, "bfcl/bfcl.multi_turn.base_accuracy": 82.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 77.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 74.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 76.0, "bfcl/bfcl.web_search.accuracy": 15.0, "bfcl/bfcl.web_search.base_accuracy": 17.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 13.0, "bfcl/bfcl.memory.accuracy": 14.41, "bfcl/bfcl.memory.kv_accuracy": 2.58, "bfcl/bfcl.memory.vector_accuracy": 10.97, "bfcl/bfcl.memory.recursive_summarization_accuracy": 29.68, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 79.11 } }, { "id": "salesforce/xlam-2-8b-fc-r-fc", "name": "xLAM-2-8b-fc-r (FC)", "developer": "salesforce", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 34.0, "bfcl/bfcl.overall.overall_accuracy": 46.68, "bfcl/bfcl.overall.total_cost_usd": 20.92, "bfcl/bfcl.overall.latency_mean_s": 22.65, "bfcl/bfcl.overall.latency_std_s": 46.92, "bfcl/bfcl.overall.latency_p95_s": 108.81, "bfcl/bfcl.non_live.ast_accuracy": 84.58, "bfcl/bfcl.non_live.simple_ast_accuracy": 73.83, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 87.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 83.5, "bfcl/bfcl.live.live_accuracy": 67.95, "bfcl/bfcl.live.live_simple_ast_accuracy": 75.58, "bfcl/bfcl.live.live_multiple_ast_accuracy": 66.57, "bfcl/bfcl.live.live_parallel_ast_accuracy": 56.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 54.17, "bfcl/bfcl.multi_turn.accuracy": 70.0, "bfcl/bfcl.multi_turn.base_accuracy": 76.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 72.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 65.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 67.0, "bfcl/bfcl.web_search.accuracy": 6.5, "bfcl/bfcl.web_search.base_accuracy": 11.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 2.0, "bfcl/bfcl.memory.accuracy": 13.98, "bfcl/bfcl.memory.kv_accuracy": 5.81, "bfcl/bfcl.memory.vector_accuracy": 15.48, "bfcl/bfcl.memory.recursive_summarization_accuracy": 20.65, "bfcl/bfcl.relevance.relevance_detection_accuracy": 87.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 63.28 } }, { "id": "saltlux/luxia-21.4b-alignment-v1.0", "name": "luxia-21.4b-alignment-v1.0", "developer": "saltlux", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3693, "hfopenllm_v2/BBH": 0.6373, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4328, "hfopenllm_v2/MMLU-PRO": 0.3403 } }, { "id": "saltlux/luxia-21.4b-alignment-v1.2", "name": "luxia-21.4b-alignment-v1.2", "developer": "saltlux", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4115, "hfopenllm_v2/BBH": 0.6371, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4459, "hfopenllm_v2/MMLU-PRO": 0.3473 } }, { "id": "sam-paech/Darkest-muse-v1", "name": "Darkest-muse-v1", "developer": "sam-paech", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7344, "hfopenllm_v2/BBH": 0.5968, "hfopenllm_v2/MATH Level 5": 0.2145, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.4502, "hfopenllm_v2/MMLU-PRO": 0.4184 } }, { "id": "sam-paech/Delirium-v1", "name": "Delirium-v1", "developer": "sam-paech", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7208, "hfopenllm_v2/BBH": 0.5962, "hfopenllm_v2/MATH Level 5": 0.2107, "hfopenllm_v2/GPQA": 0.3431, "hfopenllm_v2/MUSR": 0.4514, "hfopenllm_v2/MMLU-PRO": 0.419 } }, { "id": "sam-paech/Quill-v1", "name": "Quill-v1", "developer": "sam-paech", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7122, "hfopenllm_v2/BBH": 0.5969, "hfopenllm_v2/MATH Level 5": 0.2122, "hfopenllm_v2/GPQA": 0.3398, "hfopenllm_v2/MUSR": 0.4555, "hfopenllm_v2/MMLU-PRO": 0.4171 } }, { "id": "SanjiWatsuki/Kunoichi-DPO-v2-7B", "name": "Kunoichi-DPO-v2-7B", "developer": "SanjiWatsuki", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5431, "hfopenllm_v2/BBH": 0.4416, "hfopenllm_v2/MATH Level 5": 0.0763, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4188, "hfopenllm_v2/MMLU-PRO": 0.3107 } }, { "id": "SanjiWatsuki/Silicon-Maid-7B", "name": "Silicon-Maid-7B", "developer": "SanjiWatsuki", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5368, "hfopenllm_v2/BBH": 0.4128, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4188, "hfopenllm_v2/MMLU-PRO": 0.3083 } }, { "id": "Sao10K/70B-L3.3-Cirrus-x1", "name": "70B-L3.3-Cirrus-x1", "developer": "Sao10K", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6681, "hfopenllm_v2/BBH": 0.7029, "hfopenllm_v2/MATH Level 5": 0.3739, "hfopenllm_v2/GPQA": 0.4497, "hfopenllm_v2/MUSR": 0.4842, "hfopenllm_v2/MMLU-PRO": 0.5378 } }, { "id": "Sao10K/Fimbulvetr-11B-v2", "name": "Fimbulvetr-11B-v2", "developer": "Sao10K", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.51, "hfopenllm_v2/BBH": 0.4544, "hfopenllm_v2/MATH Level 5": 0.068, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4354, "hfopenllm_v2/MMLU-PRO": 0.3301 } }, { "id": "Sao10K/L3-70B-Euryale-v2.1", "name": "L3-70B-Euryale-v2.1", "developer": "Sao10K", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7281, "hfopenllm_v2/BBH": 0.6503, "hfopenllm_v2/MATH Level 5": 0.2243, "hfopenllm_v2/GPQA": 0.3314, "hfopenllm_v2/MUSR": 0.4196, "hfopenllm_v2/MMLU-PRO": 0.5096 } }, { "id": "Sao10K/L3-8B-Lunaris-v1", "name": "L3-8B-Lunaris-v1", "developer": "Sao10K", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6895, "hfopenllm_v2/BBH": 0.5235, "hfopenllm_v2/MATH Level 5": 0.0906, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.3727, "hfopenllm_v2/MMLU-PRO": 0.3787 } }, { "id": "Sao10K/L3-8B-Niitama-v1", "name": "L3-8B-Niitama-v1", "developer": "Sao10K", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6791, "hfopenllm_v2/BBH": 0.5303, "hfopenllm_v2/MATH Level 5": 0.0982, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.3807, "hfopenllm_v2/MMLU-PRO": 0.3701 } }, { "id": "Sao10K/L3-8B-Stheno-v3.2", "name": "L3-8B-Stheno-v3.2", "developer": "Sao10K", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6873, "hfopenllm_v2/BBH": 0.5228, "hfopenllm_v2/MATH Level 5": 0.0929, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.3794, "hfopenllm_v2/MMLU-PRO": 0.3768 } }, { "id": "Sao10K/L3-8B-Stheno-v3.3-32K", "name": "L3-8B-Stheno-v3.3-32K", "developer": "Sao10K", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4604, "hfopenllm_v2/BBH": 0.3844, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3725, "hfopenllm_v2/MMLU-PRO": 0.1896 } }, { "id": "Sao10K/MN-12B-Lyra-v3", "name": "MN-12B-Lyra-v3", "developer": "Sao10K", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4486, "hfopenllm_v2/BBH": 0.4804, "hfopenllm_v2/MATH Level 5": 0.0937, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4019, "hfopenllm_v2/MMLU-PRO": 0.3249 } }, { "id": "sarvamai/OpenHathi-7B-Hi-v0.1-Base", "name": "OpenHathi-7B-Hi-v0.1-Base", "developer": "sarvamai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1804, "hfopenllm_v2/BBH": 0.3354, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3658, "hfopenllm_v2/MMLU-PRO": 0.1543 } }, { "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B", "name": "Linkbricks-Horizon-AI-Avengers-V1-32B", "developer": "Saxo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7972, "hfopenllm_v2/BBH": 0.7001, "hfopenllm_v2/MATH Level 5": 0.6027, "hfopenllm_v2/GPQA": 0.3624, "hfopenllm_v2/MUSR": 0.4538, "hfopenllm_v2/MMLU-PRO": 0.5793 } }, { "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B", "name": "Linkbricks-Horizon-AI-Avengers-V2-32B", "developer": "Saxo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7956, "hfopenllm_v2/BBH": 0.7023, "hfopenllm_v2/MATH Level 5": 0.5665, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.4166, "hfopenllm_v2/MMLU-PRO": 0.572 } }, { "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B", "name": "Linkbricks-Horizon-AI-Avengers-V3-32B", "developer": "Saxo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8249, "hfopenllm_v2/BBH": 0.6913, "hfopenllm_v2/MATH Level 5": 0.6178, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4275, "hfopenllm_v2/MMLU-PRO": 0.5664 } }, { "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B", "name": "Linkbricks-Horizon-AI-Avengers-V4-32B", "developer": "Saxo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7631, "hfopenllm_v2/BBH": 0.692, "hfopenllm_v2/MATH Level 5": 0.5363, "hfopenllm_v2/GPQA": 0.3616, "hfopenllm_v2/MUSR": 0.4643, "hfopenllm_v2/MMLU-PRO": 0.5752 } }, { "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B", "name": "Linkbricks-Horizon-AI-Avengers-V5-32B", "developer": "Saxo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7516, "hfopenllm_v2/BBH": 0.6929, "hfopenllm_v2/MATH Level 5": 0.5461, "hfopenllm_v2/GPQA": 0.3557, "hfopenllm_v2/MUSR": 0.4709, "hfopenllm_v2/MMLU-PRO": 0.5762 } }, { "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B", "name": "Linkbricks-Horizon-AI-Avengers-V6-32B", "developer": "Saxo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8209, "hfopenllm_v2/BBH": 0.689, "hfopenllm_v2/MATH Level 5": 0.6224, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.4274, "hfopenllm_v2/MMLU-PRO": 0.5672 } }, { "id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B", "name": "Linkbricks-Horizon-AI-Korean-Avengers-V2-27B", "developer": "Saxo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8146, "hfopenllm_v2/BBH": 0.6463, "hfopenllm_v2/MATH Level 5": 0.2802, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4139, "hfopenllm_v2/MMLU-PRO": 0.4599 } }, { "id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B", "name": "Linkbricks-Horizon-AI-Korean-Avengers-V3-27B", "developer": "Saxo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8142, "hfopenllm_v2/BBH": 0.6404, "hfopenllm_v2/MATH Level 5": 0.2492, "hfopenllm_v2/GPQA": 0.3591, "hfopenllm_v2/MUSR": 0.4467, "hfopenllm_v2/MMLU-PRO": 0.4524 } }, { "id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B", "name": "Linkbricks-Horizon-AI-Korean-Superb-22B", "developer": "Saxo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6767, "hfopenllm_v2/BBH": 0.5626, "hfopenllm_v2/MATH Level 5": 0.2372, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.3908, "hfopenllm_v2/MMLU-PRO": 0.3871 } }, { "id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B", "name": "Linkbricks-Horizon-AI-Korean-Superb-27B", "developer": "Saxo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7768, "hfopenllm_v2/BBH": 0.6518, "hfopenllm_v2/MATH Level 5": 0.2719, "hfopenllm_v2/GPQA": 0.3599, "hfopenllm_v2/MUSR": 0.4791, "hfopenllm_v2/MMLU-PRO": 0.4647 } }, { "id": "Saxo/Linkbricks-Horizon-AI-Superb-27B", "name": "Linkbricks-Horizon-AI-Superb-27B", "developer": "Saxo", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7302, "hfopenllm_v2/BBH": 0.6186, "hfopenllm_v2/MATH Level 5": 0.2221, "hfopenllm_v2/GPQA": 0.3574, "hfopenllm_v2/MUSR": 0.465, "hfopenllm_v2/MMLU-PRO": 0.406 } }, { "id": "schnapss/testmerge-7b", "name": "testmerge-7b", "developer": "schnapss", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3922, "hfopenllm_v2/BBH": 0.5187, "hfopenllm_v2/MATH Level 5": 0.0687, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4686, "hfopenllm_v2/MMLU-PRO": 0.306 } }, { "id": "Schrieffer/Llama-SARM-4B", "name": "Schrieffer/Llama-SARM-4B", "developer": "Schrieffer", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7379, "reward-bench/Factuality": 0.6874, "reward-bench/Precise IF": 0.4281, "reward-bench/Math": 0.6448, "reward-bench/Safety": 0.9178, "reward-bench/Focus": 0.9556, "reward-bench/Ties": 0.7939 } }, { "id": "sci-m-wang/deepseek-llm-7b-chat-sa-v0.1", "name": "deepseek-llm-7b-chat-sa-v0.1", "developer": "sci-m-wang", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4036, "hfopenllm_v2/BBH": 0.3718, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.4173, "hfopenllm_v2/MMLU-PRO": 0.2209 } }, { "id": "sci-m-wang/Mistral-7B-Instruct-sa-v0.1", "name": "Mistral-7B-Instruct-sa-v0.1", "developer": "sci-m-wang", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4335, "hfopenllm_v2/BBH": 0.3273, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.39, "hfopenllm_v2/MMLU-PRO": 0.2362 } }, { "id": "sci-m-wang/Phi-3-mini-4k-instruct-sa-v0.1", "name": "Phi-3-mini-4k-instruct-sa-v0.1", "developer": "sci-m-wang", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5021, "hfopenllm_v2/BBH": 0.5502, "hfopenllm_v2/MATH Level 5": 0.148, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4073, "hfopenllm_v2/MMLU-PRO": 0.3985 } }, { "id": "SeaLLMs/SeaLLM-7B-v2", "name": "SeaLLM-7B-v2", "developer": "SeaLLMs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3671, "hfopenllm_v2/BBH": 0.4902, "hfopenllm_v2/MATH Level 5": 0.0853, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.407, "hfopenllm_v2/MMLU-PRO": 0.3083 } }, { "id": "SeaLLMs/SeaLLM-7B-v2.5", "name": "SeaLLM-7B-v2.5", "developer": "SeaLLMs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4522, "hfopenllm_v2/BBH": 0.498, "hfopenllm_v2/MATH Level 5": 0.1088, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.4203, "hfopenllm_v2/MMLU-PRO": 0.3203 } }, { "id": "SeaLLMs/SeaLLMs-v3-7B-Chat", "name": "SeaLLMs-v3-7B-Chat", "developer": "SeaLLMs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4377, "hfopenllm_v2/BBH": 0.5266, "hfopenllm_v2/MATH Level 5": 0.1858, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4174, "hfopenllm_v2/MMLU-PRO": 0.3895 } }, { "id": "securin/Securin-LLM-V2.5-Qwen-1.5B", "name": "Securin-LLM-V2.5-Qwen-1.5B", "developer": "securin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1492, "hfopenllm_v2/BBH": 0.3158, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3606, "hfopenllm_v2/MMLU-PRO": 0.1615 } }, { "id": "senseable/WestLake-7B-v2", "name": "WestLake-7B-v2", "developer": "senseable", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4419, "hfopenllm_v2/BBH": 0.4073, "hfopenllm_v2/MATH Level 5": 0.0483, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.3937, "hfopenllm_v2/MMLU-PRO": 0.2764 } }, { "id": "SenseLLM/ReflectionCoder-CL-34B", "name": "ReflectionCoder-CL-34B", "developer": "SenseLLM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4008, "hfopenllm_v2/BBH": 0.3953, "hfopenllm_v2/MATH Level 5": 0.0332, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.4155, "hfopenllm_v2/MMLU-PRO": 0.1424 } }, { "id": "SenseLLM/ReflectionCoder-DS-33B", "name": "ReflectionCoder-DS-33B", "developer": "SenseLLM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3787, "hfopenllm_v2/BBH": 0.3449, "hfopenllm_v2/MATH Level 5": 0.0302, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3343, "hfopenllm_v2/MMLU-PRO": 0.1202 } }, { "id": "SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B", "name": "Dobby-Mini-Leashed-Llama-3.1-8B", "developer": "SentientAGI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7847, "hfopenllm_v2/BBH": 0.5138, "hfopenllm_v2/MATH Level 5": 0.1858, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4254, "hfopenllm_v2/MMLU-PRO": 0.3694 } }, { "id": "SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B", "name": "Dobby-Mini-Unhinged-Llama-3.1-8B", "developer": "SentientAGI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7457, "hfopenllm_v2/BBH": 0.5142, "hfopenllm_v2/MATH Level 5": 0.1563, "hfopenllm_v2/GPQA": 0.3062, "hfopenllm_v2/MUSR": 0.4013, "hfopenllm_v2/MMLU-PRO": 0.3585 } }, { "id": "SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo", "name": "SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo", "developer": "SeppeV", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0955, "hfopenllm_v2/BBH": 0.3073, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.4032, "hfopenllm_v2/MMLU-PRO": 0.1161 } }, { "id": "sequelbox/gemma-2-9B-MOTH", "name": "gemma-2-9B-MOTH", "developer": "sequelbox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2059, "hfopenllm_v2/BBH": 0.308, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3409, "hfopenllm_v2/MMLU-PRO": 0.114 } }, { "id": "sequelbox/Llama3.1-70B-PlumChat", "name": "Llama3.1-70B-PlumChat", "developer": "sequelbox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5616, "hfopenllm_v2/BBH": 0.6753, "hfopenllm_v2/MATH Level 5": 0.3029, "hfopenllm_v2/GPQA": 0.3909, "hfopenllm_v2/MUSR": 0.4774, "hfopenllm_v2/MMLU-PRO": 0.5164 } }, { "id": "sequelbox/Llama3.1-8B-MOTH", "name": "Llama3.1-8B-MOTH", "developer": "sequelbox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5245, "hfopenllm_v2/BBH": 0.4902, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3689, "hfopenllm_v2/MMLU-PRO": 0.3339 } }, { "id": "sequelbox/Llama3.1-8B-PlumChat", "name": "Llama3.1-8B-PlumChat", "developer": "sequelbox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4243, "hfopenllm_v2/BBH": 0.3873, "hfopenllm_v2/MATH Level 5": 0.0363, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3755, "hfopenllm_v2/MMLU-PRO": 0.2127 } }, { "id": "sequelbox/Llama3.1-8B-PlumCode", "name": "Llama3.1-8B-PlumCode", "developer": "sequelbox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2045, "hfopenllm_v2/BBH": 0.3368, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3773, "hfopenllm_v2/MMLU-PRO": 0.2335 } }, { "id": "sequelbox/Llama3.1-8B-PlumMath", "name": "Llama3.1-8B-PlumMath", "developer": "sequelbox", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2242, "hfopenllm_v2/BBH": 0.4032, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.3919, "hfopenllm_v2/MMLU-PRO": 0.2975 } }, { "id": "sethuiyer/Llama-3.1-8B-Experimental-1206-Instruct", "name": "Llama-3.1-8B-Experimental-1206-Instruct", "developer": "sethuiyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6967, "hfopenllm_v2/BBH": 0.5104, "hfopenllm_v2/MATH Level 5": 0.1118, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.3966, "hfopenllm_v2/MMLU-PRO": 0.3529 } }, { "id": "sethuiyer/Llama-3.1-8B-Experimental-1208-Instruct", "name": "Llama-3.1-8B-Experimental-1208-Instruct", "developer": "sethuiyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.61, "hfopenllm_v2/BBH": 0.4964, "hfopenllm_v2/MATH Level 5": 0.0891, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.379, "hfopenllm_v2/MMLU-PRO": 0.3511 } }, { "id": "sethuiyer/Llamaverse-3.1-8B-Instruct", "name": "Llamaverse-3.1-8B-Instruct", "developer": "sethuiyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6185, "hfopenllm_v2/BBH": 0.5414, "hfopenllm_v2/MATH Level 5": 0.1858, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.3762, "hfopenllm_v2/MMLU-PRO": 0.3523 } }, { "id": "sethuiyer/LlamaZero-3.1-8B-Experimental-1208", "name": "LlamaZero-3.1-8B-Experimental-1208", "developer": "sethuiyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6051, "hfopenllm_v2/BBH": 0.4981, "hfopenllm_v2/MATH Level 5": 0.108, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.382, "hfopenllm_v2/MMLU-PRO": 0.3 } }, { "id": "sethuiyer/Llamazing-3.1-8B-Instruct", "name": "Llamazing-3.1-8B-Instruct", "developer": "sethuiyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5711, "hfopenllm_v2/BBH": 0.5291, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.3976, "hfopenllm_v2/MMLU-PRO": 0.3606 } }, { "id": "sethuiyer/Qwen2.5-7B-Anvita", "name": "Qwen2.5-7B-Anvita", "developer": "sethuiyer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.648, "hfopenllm_v2/BBH": 0.5466, "hfopenllm_v2/MATH Level 5": 0.2017, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.4337, "hfopenllm_v2/MMLU-PRO": 0.4166 } }, { "id": "SF-Foundation/TextEval-Llama3.1-70B", "name": "SF-Foundation/TextEval-Llama3.1-70B", "developer": "SF-Foundation", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9348, "reward-bench/Chat": 0.9413, "reward-bench/Chat Hard": 0.9013, "reward-bench/Safety": 0.9324, "reward-bench/Reasoning": 0.9641 } }, { "id": "SF-Foundation/TextEval-OffsetBias-12B", "name": "SF-Foundation/TextEval-OffsetBias-12B", "developer": "SF-Foundation", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9105, "reward-bench/Chat": 0.919, "reward-bench/Chat Hard": 0.8662, "reward-bench/Safety": 0.9203, "reward-bench/Reasoning": 0.9365 } }, { "id": "sfairXC/FsfairX-LLaMA3-RM-v0.1", "name": "sfairXC/FsfairX-LLaMA3-RM-v0.1", "developer": "sfairXC", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6292, "reward-bench/Chat": 0.9944, "reward-bench/Chat Hard": 0.6513, "reward-bench/Safety": 0.7667, "reward-bench/Reasoning": 0.8644, "reward-bench/Prior Sets (0.5 weight)": 0.7492, "reward-bench/Factuality": 0.5916, "reward-bench/Precise IF": 0.4188, "reward-bench/Math": 0.6284, "reward-bench/Focus": 0.7051, "reward-bench/Ties": 0.6647 } }, { "id": "shadowml/BeagSake-7B", "name": "BeagSake-7B", "developer": "shadowml", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5216, "hfopenllm_v2/BBH": 0.4711, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4124, "hfopenllm_v2/MMLU-PRO": 0.2585 } }, { "id": "shadowml/Mixolar-4x7b", "name": "Mixolar-4x7b", "developer": "shadowml", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3893, "hfopenllm_v2/BBH": 0.5216, "hfopenllm_v2/MATH Level 5": 0.0582, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4258, "hfopenllm_v2/MMLU-PRO": 0.3305 } }, { "id": "Sharathhebbar24/chat_gpt2_dpo", "name": "chat_gpt2_dpo", "developer": "Sharathhebbar24", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0986, "hfopenllm_v2/BBH": 0.2902, "hfopenllm_v2/MATH Level 5": 0.0053, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3818, "hfopenllm_v2/MMLU-PRO": 0.1142 } }, { "id": "Sharathhebbar24/SSH_355M", "name": "SSH_355M", "developer": "Sharathhebbar24", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1424, "hfopenllm_v2/BBH": 0.3099, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.4178, "hfopenllm_v2/MMLU-PRO": 0.1176 } }, { "id": "shastraai/Shastra-LLAMA2-Math-Commonsense-SFT", "name": "Shastra-LLAMA2-Math-Commonsense-SFT", "developer": "shastraai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3042, "hfopenllm_v2/BBH": 0.3843, "hfopenllm_v2/MATH Level 5": 0.0174, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3604, "hfopenllm_v2/MMLU-PRO": 0.1997 } }, { "id": "ShikaiChen/LDL-Reward-Gemma-2-27B-v0.1", "name": "ShikaiChen/LDL-Reward-Gemma-2-27B-v0.1", "developer": "ShikaiChen", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9499, "reward-bench/Factuality": 0.7558, "reward-bench/Precise IF": 0.35, "reward-bench/Math": 0.6448, "reward-bench/Safety": 0.9378, "reward-bench/Focus": 0.9131, "reward-bench/Ties": 0.7633, "reward-bench/Chat": 0.9637, "reward-bench/Chat Hard": 0.9079, "reward-bench/Reasoning": 0.9903 } }, { "id": "shivam9980/mistral-7b-news-cnn-merged", "name": "mistral-7b-news-cnn-merged", "developer": "shivam9980", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4634, "hfopenllm_v2/BBH": 0.3635, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4523, "hfopenllm_v2/MMLU-PRO": 0.2827 } }, { "id": "shivam9980/NEPALI-LLM", "name": "NEPALI-LLM", "developer": "shivam9980", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0417, "hfopenllm_v2/BBH": 0.3828, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.4122, "hfopenllm_v2/MMLU-PRO": 0.2064 } }, { "id": "shivank21/mistral_dpo_self", "name": "mistral_dpo_self", "developer": "shivank21", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3403, "hfopenllm_v2/BBH": 0.3216, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2408, "hfopenllm_v2/MUSR": 0.3247, "hfopenllm_v2/MMLU-PRO": 0.2214 } }, { "id": "Shreyash2010/Uma-4x4B-Instruct-v0.1", "name": "Uma-4x4B-Instruct-v0.1", "developer": "Shreyash2010", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5517, "hfopenllm_v2/BBH": 0.5512, "hfopenllm_v2/MATH Level 5": 0.1775, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.4441, "hfopenllm_v2/MMLU-PRO": 0.387 } }, { "id": "shuttleai/shuttle-3", "name": "shuttle-3", "developer": "shuttleai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8154, "hfopenllm_v2/BBH": 0.742, "hfopenllm_v2/MATH Level 5": 0.46, "hfopenllm_v2/GPQA": 0.4119, "hfopenllm_v2/MUSR": 0.4377, "hfopenllm_v2/MMLU-PRO": 0.5716 } }, { "id": "shyamieee/Padma-v7.0", "name": "Padma-v7.0", "developer": "shyamieee", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3841, "hfopenllm_v2/BBH": 0.5119, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.4386, "hfopenllm_v2/MMLU-PRO": 0.3029 } }, { "id": "Sicarius-Prototyping/bacon_and_food", "name": "bacon_and_food", "developer": "Sicarius-Prototyping", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.586, "hfopenllm_v2/BBH": 0.4725, "hfopenllm_v2/MATH Level 5": 0.0982, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.3884, "hfopenllm_v2/MMLU-PRO": 0.3263 } }, { "id": "Sicarius-Prototyping/Brainy_LLAMA", "name": "Brainy_LLAMA", "developer": "Sicarius-Prototyping", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5204, "hfopenllm_v2/BBH": 0.5117, "hfopenllm_v2/MATH Level 5": 0.1337, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4143, "hfopenllm_v2/MMLU-PRO": 0.3849 } }, { "id": "Sicarius-Prototyping/Micropenis_1B", "name": "Micropenis_1B", "developer": "Sicarius-Prototyping", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3461, "hfopenllm_v2/BBH": 0.3372, "hfopenllm_v2/MATH Level 5": 0.0461, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3325, "hfopenllm_v2/MMLU-PRO": 0.186 } }, { "id": "SicariusSicariiStuff/2B-ad", "name": "2B-ad", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4379, "hfopenllm_v2/BBH": 0.4092, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4015, "hfopenllm_v2/MMLU-PRO": 0.2662 } }, { "id": "SicariusSicariiStuff/2B_or_not_2B", "name": "2B_or_not_2B", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2062, "hfopenllm_v2/BBH": 0.3416, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2475, "hfopenllm_v2/MUSR": 0.3791, "hfopenllm_v2/MMLU-PRO": 0.1399 } }, { "id": "SicariusSicariiStuff/dn_ep02", "name": "dn_ep02", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5064, "hfopenllm_v2/BBH": 0.5266, "hfopenllm_v2/MATH Level 5": 0.142, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.4316, "hfopenllm_v2/MMLU-PRO": 0.3998 } }, { "id": "SicariusSicariiStuff/Dusk_Rainbow", "name": "Dusk_Rainbow", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3588, "hfopenllm_v2/BBH": 0.4772, "hfopenllm_v2/MATH Level 5": 0.0748, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4025, "hfopenllm_v2/MMLU-PRO": 0.3443 } }, { "id": "SicariusSicariiStuff/Eximius_Persona_5B", "name": "Eximius_Persona_5B", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.656, "hfopenllm_v2/BBH": 0.4512, "hfopenllm_v2/MATH Level 5": 0.102, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3818, "hfopenllm_v2/MMLU-PRO": 0.314 } }, { "id": "SicariusSicariiStuff/Impish_LLAMA_3B", "name": "Impish_LLAMA_3B", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.463, "hfopenllm_v2/BBH": 0.4091, "hfopenllm_v2/MATH Level 5": 0.1125, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3673, "hfopenllm_v2/MMLU-PRO": 0.2941 } }, { "id": "SicariusSicariiStuff/Impish_Mind_8B", "name": "Impish_Mind_8B", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3179, "hfopenllm_v2/BBH": 0.4674, "hfopenllm_v2/MATH Level 5": 0.105, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.407, "hfopenllm_v2/MMLU-PRO": 0.3309 } }, { "id": "SicariusSicariiStuff/Impish_QWEN_14B-1M", "name": "Impish_QWEN_14B-1M", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7868, "hfopenllm_v2/BBH": 0.6283, "hfopenllm_v2/MATH Level 5": 0.3965, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.4615, "hfopenllm_v2/MMLU-PRO": 0.5044 } }, { "id": "SicariusSicariiStuff/Impish_QWEN_7B-1M", "name": "Impish_QWEN_7B-1M", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6382, "hfopenllm_v2/BBH": 0.5372, "hfopenllm_v2/MATH Level 5": 0.3089, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4074, "hfopenllm_v2/MMLU-PRO": 0.4265 } }, { "id": "SicariusSicariiStuff/LLAMA-3_8B_Unaligned_BETA", "name": "LLAMA-3_8B_Unaligned_BETA", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3713, "hfopenllm_v2/BBH": 0.4717, "hfopenllm_v2/MATH Level 5": 0.0838, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4119, "hfopenllm_v2/MMLU-PRO": 0.3465 } }, { "id": "SicariusSicariiStuff/Phi-Line_14B", "name": "Phi-Line_14B", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6496, "hfopenllm_v2/BBH": 0.6154, "hfopenllm_v2/MATH Level 5": 0.386, "hfopenllm_v2/GPQA": 0.3532, "hfopenllm_v2/MUSR": 0.4479, "hfopenllm_v2/MMLU-PRO": 0.5454 } }, { "id": "SicariusSicariiStuff/Phi-lthy4", "name": "Phi-lthy4", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7679, "hfopenllm_v2/BBH": 0.5879, "hfopenllm_v2/MATH Level 5": 0.1367, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4083, "hfopenllm_v2/MMLU-PRO": 0.4333 } }, { "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncencored", "name": "Qwen2.5-14B_Uncencored", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3158, "hfopenllm_v2/BBH": 0.6309, "hfopenllm_v2/MATH Level 5": 0.318, "hfopenllm_v2/GPQA": 0.3817, "hfopenllm_v2/MUSR": 0.4517, "hfopenllm_v2/MMLU-PRO": 0.5266 } }, { "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored", "name": "Qwen2.5-14B_Uncensored", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3173, "hfopenllm_v2/BBH": 0.6309, "hfopenllm_v2/MATH Level 5": 0.318, "hfopenllm_v2/GPQA": 0.3817, "hfopenllm_v2/MUSR": 0.4517, "hfopenllm_v2/MMLU-PRO": 0.5266 } }, { "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct", "name": "Qwen2.5-14B_Uncensored_Instruct", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3789, "hfopenllm_v2/BBH": 0.5937, "hfopenllm_v2/MATH Level 5": 0.3285, "hfopenllm_v2/GPQA": 0.3297, "hfopenllm_v2/MUSR": 0.3697, "hfopenllm_v2/MMLU-PRO": 0.5127 } }, { "id": "SicariusSicariiStuff/Redemption_Wind_24B", "name": "Redemption_Wind_24B", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2501, "hfopenllm_v2/BBH": 0.6428, "hfopenllm_v2/MATH Level 5": 0.1858, "hfopenllm_v2/GPQA": 0.3834, "hfopenllm_v2/MUSR": 0.4262, "hfopenllm_v2/MMLU-PRO": 0.5432 } }, { "id": "SicariusSicariiStuff/Winged_Imp_8B", "name": "Winged_Imp_8B", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.743, "hfopenllm_v2/BBH": 0.512, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4148, "hfopenllm_v2/MMLU-PRO": 0.3639 } }, { "id": "SicariusSicariiStuff/Wingless_Imp_8B", "name": "Wingless_Imp_8B", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.743, "hfopenllm_v2/BBH": 0.512, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4148, "hfopenllm_v2/MMLU-PRO": 0.3639 } }, { "id": "SicariusSicariiStuff/Zion_Alpha", "name": "Zion_Alpha", "developer": "SicariusSicariiStuff", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3324, "hfopenllm_v2/BBH": 0.4932, "hfopenllm_v2/MATH Level 5": 0.0521, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4727, "hfopenllm_v2/MMLU-PRO": 0.3132 } }, { "id": "silma-ai/SILMA-9B-Instruct-v1.0", "name": "SILMA-9B-Instruct-v1.0", "developer": "silma-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5842, "hfopenllm_v2/BBH": 0.5219, "hfopenllm_v2/MATH Level 5": 0.1163, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.4637, "hfopenllm_v2/MMLU-PRO": 0.392 } }, { "id": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0", "name": "SILMA-Kashif-2B-Instruct-v1.0", "developer": "silma-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1181, "hfopenllm_v2/BBH": 0.3793, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.4043, "hfopenllm_v2/MMLU-PRO": 0.2258 } }, { "id": "siqi00/Mistral-7B-DFT", "name": "Mistral-7B-DFT", "developer": "siqi00", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5569, "hfopenllm_v2/BBH": 0.4665, "hfopenllm_v2/MATH Level 5": 0.0378, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4191, "hfopenllm_v2/MMLU-PRO": 0.2963 } }, { "id": "siqi00/Mistral-7B-DFT2", "name": "Mistral-7B-DFT2", "developer": "siqi00", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5804, "hfopenllm_v2/BBH": 0.3968, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4401, "hfopenllm_v2/MMLU-PRO": 0.2852 } }, { "id": "skumar9/Llama-medx_v2", "name": "Llama-medx_v2", "developer": "skumar9", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4462, "hfopenllm_v2/BBH": 0.4909, "hfopenllm_v2/MATH Level 5": 0.0914, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.3661, "hfopenllm_v2/MMLU-PRO": 0.3463 } }, { "id": "skymizer/Llama2-7b-sft-chat-custom-template-dpo", "name": "Llama2-7b-sft-chat-custom-template-dpo", "developer": "skymizer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2353, "hfopenllm_v2/BBH": 0.3688, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2391, "hfopenllm_v2/MUSR": 0.4429, "hfopenllm_v2/MMLU-PRO": 0.1946 } }, { "id": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora", "name": "SKY-Ko-Llama3.1-8B-lora", "developer": "SkyOrbis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5058, "hfopenllm_v2/BBH": 0.5088, "hfopenllm_v2/MATH Level 5": 0.1548, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.3998, "hfopenllm_v2/MMLU-PRO": 0.3777 } }, { "id": "SkyOrbis/SKY-Ko-Llama3.1-8B-lora-epoch1", "name": "SKY-Ko-Llama3.1-8B-lora-epoch1", "developer": "SkyOrbis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5058, "hfopenllm_v2/BBH": 0.5088, "hfopenllm_v2/MATH Level 5": 0.1548, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.3998, "hfopenllm_v2/MMLU-PRO": 0.3777 } }, { "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch3", "name": "SKY-Ko-Llama3.2-1B-lora-epoch3", "developer": "SkyOrbis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3247, "hfopenllm_v2/BBH": 0.3167, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3382, "hfopenllm_v2/MMLU-PRO": 0.1279 } }, { "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-epoch5", "name": "SKY-Ko-Llama3.2-1B-lora-epoch5", "developer": "SkyOrbis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.436, "hfopenllm_v2/BBH": 0.3406, "hfopenllm_v2/MATH Level 5": 0.0521, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3471, "hfopenllm_v2/MMLU-PRO": 0.1946 } }, { "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch3", "name": "SKY-Ko-Llama3.2-1B-lora-v2-epoch3", "developer": "SkyOrbis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.436, "hfopenllm_v2/BBH": 0.3406, "hfopenllm_v2/MATH Level 5": 0.0521, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3471, "hfopenllm_v2/MMLU-PRO": 0.1946 } }, { "id": "SkyOrbis/SKY-Ko-Llama3.2-1B-lora-v2-epoch5", "name": "SKY-Ko-Llama3.2-1B-lora-v2-epoch5", "developer": "SkyOrbis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4247, "hfopenllm_v2/BBH": 0.3397, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.3458, "hfopenllm_v2/MMLU-PRO": 0.1946 } }, { "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch1", "name": "SKY-Ko-Llama3.2-3B-lora-epoch1", "developer": "SkyOrbis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5331, "hfopenllm_v2/BBH": 0.44, "hfopenllm_v2/MATH Level 5": 0.1458, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.3522, "hfopenllm_v2/MMLU-PRO": 0.3004 } }, { "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch2", "name": "SKY-Ko-Llama3.2-3B-lora-epoch2", "developer": "SkyOrbis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5331, "hfopenllm_v2/BBH": 0.44, "hfopenllm_v2/MATH Level 5": 0.1458, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.3522, "hfopenllm_v2/MMLU-PRO": 0.3004 } }, { "id": "SkyOrbis/SKY-Ko-Llama3.2-3B-lora-epoch3", "name": "SKY-Ko-Llama3.2-3B-lora-epoch3", "developer": "SkyOrbis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5331, "hfopenllm_v2/BBH": 0.44, "hfopenllm_v2/MATH Level 5": 0.1458, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.3522, "hfopenllm_v2/MMLU-PRO": 0.3004 } }, { "id": "SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct", "name": "SKY-Ko-Qwen2.5-3B-Instruct", "developer": "SkyOrbis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3534, "hfopenllm_v2/BBH": 0.4265, "hfopenllm_v2/MATH Level 5": 0.0695, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.4024, "hfopenllm_v2/MMLU-PRO": 0.2812 } }, { "id": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000", "name": "SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000", "developer": "SkyOrbis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3819, "hfopenllm_v2/BBH": 0.5078, "hfopenllm_v2/MATH Level 5": 0.1866, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.4436, "hfopenllm_v2/MMLU-PRO": 0.3914 } }, { "id": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000", "name": "SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000", "developer": "SkyOrbis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3812, "hfopenllm_v2/BBH": 0.539, "hfopenllm_v2/MATH Level 5": 0.21, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.4238, "hfopenllm_v2/MMLU-PRO": 0.4238 } }, { "id": "Skywork/Skywork-Critic-Llama-3.1-70B", "name": "Skywork/Skywork-Critic-Llama-3.1-70B", "developer": "Skywork", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9331, "reward-bench/Chat": 0.9665, "reward-bench/Chat Hard": 0.8794, "reward-bench/Safety": 0.9311, "reward-bench/Reasoning": 0.9554 } }, { "id": "Skywork/Skywork-Critic-Llama-3.1-8B", "name": "Skywork/Skywork-Critic-Llama-3.1-8B", "developer": "Skywork", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8896, "reward-bench/Chat": 0.9358, "reward-bench/Chat Hard": 0.8136, "reward-bench/Safety": 0.9108, "reward-bench/Reasoning": 0.898 } }, { "id": "Skywork/Skywork-o1-Open-Llama-3.1-8B", "name": "Skywork-o1-Open-Llama-3.1-8B", "developer": "Skywork", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3518, "hfopenllm_v2/BBH": 0.4516, "hfopenllm_v2/MATH Level 5": 0.5211, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3156, "hfopenllm_v2/MMLU-PRO": 0.203 } }, { "id": "Skywork/Skywork-Reward-Gemma-2-27B", "name": "Skywork/Skywork-Reward-Gemma-2-27B", "developer": "Skywork", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.938, "reward-bench/Factuality": 0.7368, "reward-bench/Precise IF": 0.4031, "reward-bench/Math": 0.7049, "reward-bench/Safety": 0.9189, "reward-bench/Focus": 0.9323, "reward-bench/Ties": 0.8261, "reward-bench/Chat": 0.9581, "reward-bench/Chat Hard": 0.9145, "reward-bench/Reasoning": 0.9606 } }, { "id": "Skywork/Skywork-Reward-Gemma-2-27B-v0.2", "name": "Skywork/Skywork-Reward-Gemma-2-27B-v0.2", "developer": "Skywork", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7807, "hfopenllm_v2/BBH": 0.636, "hfopenllm_v2/MATH Level 5": 0.2273, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.4231, "hfopenllm_v2/MMLU-PRO": 0.4103, "reward-bench/Score": 0.9426, "reward-bench/Factuality": 0.7674, "reward-bench/Precise IF": 0.375, "reward-bench/Math": 0.6721, "reward-bench/Safety": 0.9297, "reward-bench/Focus": 0.9172, "reward-bench/Ties": 0.8182, "reward-bench/Chat": 0.9609, "reward-bench/Chat Hard": 0.8991, "reward-bench/Reasoning": 0.9807 } }, { "id": "Skywork/Skywork-Reward-Llama-3.1-8B", "name": "Skywork/Skywork-Reward-Llama-3.1-8B", "developer": "Skywork", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7314, "reward-bench/Chat": 0.9581, "reward-bench/Chat Hard": 0.8728, "reward-bench/Safety": 0.9333, "reward-bench/Reasoning": 0.962, "reward-bench/Factuality": 0.6989, "reward-bench/Precise IF": 0.425, "reward-bench/Math": 0.6284, "reward-bench/Focus": 0.9616, "reward-bench/Ties": 0.741 } }, { "id": "Skywork/Skywork-Reward-Llama-3.1-8B-v0.2", "name": "Skywork/Skywork-Reward-Llama-3.1-8B-v0.2", "developer": "Skywork", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7175, "reward-bench/Chat": 0.9469, "reward-bench/Chat Hard": 0.8838, "reward-bench/Safety": 0.9422, "reward-bench/Reasoning": 0.9675, "reward-bench/Factuality": 0.6968, "reward-bench/Precise IF": 0.4062, "reward-bench/Math": 0.6011, "reward-bench/Focus": 0.9414, "reward-bench/Ties": 0.7169 } }, { "id": "Skywork/Skywork-Reward-V2-Llama-3.1-8B", "name": "Skywork/Skywork-Reward-V2-Llama-3.1-8B", "developer": "Skywork", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8413, "reward-bench/Factuality": 0.8463, "reward-bench/Precise IF": 0.6625, "reward-bench/Math": 0.776, "reward-bench/Safety": 0.9667, "reward-bench/Focus": 0.9838, "reward-bench/Ties": 0.8124 } }, { "id": "Skywork/Skywork-Reward-V2-Llama-3.2-1B", "name": "Skywork/Skywork-Reward-V2-Llama-3.2-1B", "developer": "Skywork", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6438, "reward-bench/Factuality": 0.6084, "reward-bench/Precise IF": 0.4562, "reward-bench/Math": 0.6011, "reward-bench/Safety": 0.8733, "reward-bench/Focus": 0.8929, "reward-bench/Ties": 0.4306 } }, { "id": "Skywork/Skywork-Reward-V2-Llama-3.2-3B", "name": "Skywork/Skywork-Reward-V2-Llama-3.2-3B", "developer": "Skywork", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7466, "reward-bench/Factuality": 0.7621, "reward-bench/Precise IF": 0.4562, "reward-bench/Math": 0.694, "reward-bench/Safety": 0.9311, "reward-bench/Focus": 0.9596, "reward-bench/Ties": 0.6768 } }, { "id": "Skywork/Skywork-Reward-V2-Qwen3-0.6B", "name": "Skywork/Skywork-Reward-V2-Qwen3-0.6B", "developer": "Skywork", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6125, "reward-bench/Factuality": 0.58, "reward-bench/Precise IF": 0.4, "reward-bench/Math": 0.7158, "reward-bench/Safety": 0.8444, "reward-bench/Focus": 0.7949, "reward-bench/Ties": 0.3397 } }, { "id": "Skywork/Skywork-Reward-V2-Qwen3-1.7B", "name": "Skywork/Skywork-Reward-V2-Qwen3-1.7B", "developer": "Skywork", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6818, "reward-bench/Factuality": 0.6568, "reward-bench/Precise IF": 0.4437, "reward-bench/Math": 0.7268, "reward-bench/Safety": 0.8911, "reward-bench/Focus": 0.8848, "reward-bench/Ties": 0.4872 } }, { "id": "Skywork/Skywork-Reward-V2-Qwen3-4B", "name": "Skywork/Skywork-Reward-V2-Qwen3-4B", "developer": "Skywork", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7551, "reward-bench/Factuality": 0.7737, "reward-bench/Precise IF": 0.4625, "reward-bench/Math": 0.7322, "reward-bench/Safety": 0.9222, "reward-bench/Focus": 0.9657, "reward-bench/Ties": 0.6743 } }, { "id": "Skywork/Skywork-Reward-V2-Qwen3-8B", "name": "Skywork/Skywork-Reward-V2-Qwen3-8B", "developer": "Skywork", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7837, "reward-bench/Factuality": 0.7989, "reward-bench/Precise IF": 0.5, "reward-bench/Math": 0.7705, "reward-bench/Safety": 0.94, "reward-bench/Focus": 0.9636, "reward-bench/Ties": 0.7294 } }, { "id": "Skywork/Skywork-VL-Reward-7B", "name": "Skywork/Skywork-VL-Reward-7B", "developer": "Skywork", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.9007, "reward-bench/Factuality": 0.6063, "reward-bench/Precise IF": 0.35, "reward-bench/Math": 0.6339, "reward-bench/Safety": 0.9108, "reward-bench/Focus": 0.8909, "reward-bench/Ties": 0.7586, "reward-bench/Chat": 0.8994, "reward-bench/Chat Hard": 0.875, "reward-bench/Reasoning": 0.9176 } }, { "id": "snowflake/snowflake-arctic-instruct", "name": "Arctic Instruct", "developer": "snowflake", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.338, "helm_lite/NarrativeQA": 0.654, "helm_lite/NaturalQuestions (closed-book)": 0.39, "helm_lite/OpenbookQA": 0.828, "helm_lite/MMLU": 0.575, "helm_lite/MATH": 0.519, "helm_lite/GSM8K": 0.768, "helm_lite/LegalBench": 0.588, "helm_lite/MedQA": 0.581, "helm_lite/WMT 2014": 0.172, "helm_mmlu/MMLU All Subjects": 0.677, "helm_mmlu/Abstract Algebra": 0.35, "helm_mmlu/Anatomy": 0.652, "helm_mmlu/College Physics": 0.461, "helm_mmlu/Computer Security": 0.84, "helm_mmlu/Econometrics": 0.5, "helm_mmlu/Global Facts": 0.39, "helm_mmlu/Jurisprudence": 0.741, "helm_mmlu/Philosophy": 0.752, "helm_mmlu/Professional Psychology": 0.724, "helm_mmlu/Us Foreign Policy": 0.88, "helm_mmlu/Astronomy": 0.763, "helm_mmlu/Business Ethics": 0.69, "helm_mmlu/Clinical Knowledge": 0.781, "helm_mmlu/Conceptual Physics": 0.634, "helm_mmlu/Electrical Engineering": 0.662, "helm_mmlu/Elementary Mathematics": 0.481, "helm_mmlu/Formal Logic": 0.444, "helm_mmlu/High School World History": 0.827, "helm_mmlu/Human Sexuality": 0.847, "helm_mmlu/International Law": 0.826, "helm_mmlu/Logical Fallacies": 0.779, "helm_mmlu/Machine Learning": 0.473, "helm_mmlu/Management": 0.796, "helm_mmlu/Marketing": 0.902, "helm_mmlu/Medical Genetics": 0.76, "helm_mmlu/Miscellaneous": 0.875, "helm_mmlu/Moral Scenarios": 0.28, "helm_mmlu/Nutrition": 0.725, "helm_mmlu/Prehistory": 0.79, "helm_mmlu/Public Relations": 0.664, "helm_mmlu/Security Studies": 0.78, "helm_mmlu/Sociology": 0.891, "helm_mmlu/Virology": 0.536, "helm_mmlu/World Religions": 0.854, "helm_mmlu/Mean win rate": 0.565 } }, { "id": "Solshine/Brimful-merged-replete", "name": "Brimful-merged-replete", "developer": "Solshine", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1761, "hfopenllm_v2/BBH": 0.2883, "hfopenllm_v2/MATH Level 5": 0.003, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3421, "hfopenllm_v2/MMLU-PRO": 0.1085 } }, { "id": "Solshine/Llama-3-1-big-thoughtful-passthrough-merge-2", "name": "Llama-3-1-big-thoughtful-passthrough-merge-2", "developer": "Solshine", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2547, "hfopenllm_v2/BBH": 0.3209, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3889, "hfopenllm_v2/MMLU-PRO": 0.1185 } }, { "id": "someon98/qwen-CoMa-0.5b", "name": "qwen-CoMa-0.5b", "developer": "someon98", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2277, "hfopenllm_v2/BBH": 0.2953, "hfopenllm_v2/MATH Level 5": 0.0045, "hfopenllm_v2/GPQA": 0.2399, "hfopenllm_v2/MUSR": 0.4046, "hfopenllm_v2/MMLU-PRO": 0.1099 } }, { "id": "sometimesanotion/ChocoTrio-14B-v1", "name": "ChocoTrio-14B-v1", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7089, "hfopenllm_v2/BBH": 0.6506, "hfopenllm_v2/MATH Level 5": 0.3973, "hfopenllm_v2/GPQA": 0.3851, "hfopenllm_v2/MUSR": 0.4821, "hfopenllm_v2/MMLU-PRO": 0.537 } }, { "id": "sometimesanotion/IF-reasoning-experiment-40", "name": "IF-reasoning-experiment-40", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.633, "hfopenllm_v2/BBH": 0.6112, "hfopenllm_v2/MATH Level 5": 0.3716, "hfopenllm_v2/GPQA": 0.38, "hfopenllm_v2/MUSR": 0.5194, "hfopenllm_v2/MMLU-PRO": 0.5025 } }, { "id": "sometimesanotion/IF-reasoning-experiment-80", "name": "IF-reasoning-experiment-80", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5463, "hfopenllm_v2/BBH": 0.421, "hfopenllm_v2/MATH Level 5": 0.0989, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.5025, "hfopenllm_v2/MMLU-PRO": 0.3368 } }, { "id": "sometimesanotion/KytheraMix-7B-v0.2", "name": "KytheraMix-7B-v0.2", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6129, "hfopenllm_v2/BBH": 0.5635, "hfopenllm_v2/MATH Level 5": 0.2923, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.4594, "hfopenllm_v2/MMLU-PRO": 0.4505 } }, { "id": "sometimesanotion/lamarck-14b-prose-model_stock", "name": "lamarck-14b-prose-model_stock", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4276, "hfopenllm_v2/BBH": 0.6488, "hfopenllm_v2/MATH Level 5": 0.3414, "hfopenllm_v2/GPQA": 0.3935, "hfopenllm_v2/MUSR": 0.4846, "hfopenllm_v2/MMLU-PRO": 0.5354 } }, { "id": "sometimesanotion/lamarck-14b-reason-model_stock", "name": "lamarck-14b-reason-model_stock", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4965, "hfopenllm_v2/BBH": 0.6569, "hfopenllm_v2/MATH Level 5": 0.358, "hfopenllm_v2/GPQA": 0.3842, "hfopenllm_v2/MUSR": 0.4741, "hfopenllm_v2/MMLU-PRO": 0.5402 } }, { "id": "sometimesanotion/Lamarck-14B-v0.1-experimental", "name": "Lamarck-14B-v0.1-experimental", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5354, "hfopenllm_v2/BBH": 0.6583, "hfopenllm_v2/MATH Level 5": 0.358, "hfopenllm_v2/GPQA": 0.3817, "hfopenllm_v2/MUSR": 0.4728, "hfopenllm_v2/MMLU-PRO": 0.5408 } }, { "id": "sometimesanotion/Lamarck-14B-v0.3", "name": "Lamarck-14B-v0.3", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5032, "hfopenllm_v2/BBH": 0.6611, "hfopenllm_v2/MATH Level 5": 0.3406, "hfopenllm_v2/GPQA": 0.3884, "hfopenllm_v2/MUSR": 0.4688, "hfopenllm_v2/MMLU-PRO": 0.5411 } }, { "id": "sometimesanotion/Lamarck-14B-v0.4-Qwenvergence", "name": "Lamarck-14B-v0.4-Qwenvergence", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4906, "hfopenllm_v2/BBH": 0.6535, "hfopenllm_v2/MATH Level 5": 0.3399, "hfopenllm_v2/GPQA": 0.3784, "hfopenllm_v2/MUSR": 0.4847, "hfopenllm_v2/MMLU-PRO": 0.5406 } }, { "id": "sometimesanotion/Lamarck-14B-v0.6", "name": "Lamarck-14B-v0.6", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6973, "hfopenllm_v2/BBH": 0.646, "hfopenllm_v2/MATH Level 5": 0.4041, "hfopenllm_v2/GPQA": 0.3893, "hfopenllm_v2/MUSR": 0.4847, "hfopenllm_v2/MMLU-PRO": 0.54 } }, { "id": "sometimesanotion/Lamarck-14B-v0.6-002-model_stock", "name": "Lamarck-14B-v0.6-002-model_stock", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6692, "hfopenllm_v2/BBH": 0.6143, "hfopenllm_v2/MATH Level 5": 0.3776, "hfopenllm_v2/GPQA": 0.3742, "hfopenllm_v2/MUSR": 0.518, "hfopenllm_v2/MMLU-PRO": 0.5054 } }, { "id": "sometimesanotion/Lamarck-14B-v0.6-model_stock", "name": "Lamarck-14B-v0.6-model_stock", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.679, "hfopenllm_v2/BBH": 0.6269, "hfopenllm_v2/MATH Level 5": 0.4245, "hfopenllm_v2/GPQA": 0.3842, "hfopenllm_v2/MUSR": 0.5007, "hfopenllm_v2/MMLU-PRO": 0.5198 } }, { "id": "sometimesanotion/Lamarck-14B-v0.7-Fusion", "name": "Lamarck-14B-v0.7-Fusion", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6821, "hfopenllm_v2/BBH": 0.6544, "hfopenllm_v2/MATH Level 5": 0.4041, "hfopenllm_v2/GPQA": 0.401, "hfopenllm_v2/MUSR": 0.4991, "hfopenllm_v2/MMLU-PRO": 0.5391 } }, { "id": "sometimesanotion/Lamarck-14B-v0.7-rc1", "name": "Lamarck-14B-v0.7-rc1", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7305, "hfopenllm_v2/BBH": 0.6486, "hfopenllm_v2/MATH Level 5": 0.3852, "hfopenllm_v2/GPQA": 0.3893, "hfopenllm_v2/MUSR": 0.4715, "hfopenllm_v2/MMLU-PRO": 0.5416 } }, { "id": "sometimesanotion/Lamarck-14B-v0.7-rc4", "name": "Lamarck-14B-v0.7-rc4", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7211, "hfopenllm_v2/BBH": 0.651, "hfopenllm_v2/MATH Level 5": 0.4026, "hfopenllm_v2/GPQA": 0.3893, "hfopenllm_v2/MUSR": 0.4912, "hfopenllm_v2/MMLU-PRO": 0.54 } }, { "id": "sometimesanotion/LamarckInfusion-14B-v1", "name": "LamarckInfusion-14B-v1", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7198, "hfopenllm_v2/BBH": 0.6539, "hfopenllm_v2/MATH Level 5": 0.4169, "hfopenllm_v2/GPQA": 0.3909, "hfopenllm_v2/MUSR": 0.4899, "hfopenllm_v2/MMLU-PRO": 0.5376 } }, { "id": "sometimesanotion/LamarckInfusion-14B-v2", "name": "LamarckInfusion-14B-v2", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6812, "hfopenllm_v2/BBH": 0.6564, "hfopenllm_v2/MATH Level 5": 0.4388, "hfopenllm_v2/GPQA": 0.3876, "hfopenllm_v2/MUSR": 0.4993, "hfopenllm_v2/MMLU-PRO": 0.5416 } }, { "id": "sometimesanotion/LamarckInfusion-14B-v2-hi", "name": "LamarckInfusion-14B-v2-hi", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6855, "hfopenllm_v2/BBH": 0.6555, "hfopenllm_v2/MATH Level 5": 0.423, "hfopenllm_v2/GPQA": 0.3884, "hfopenllm_v2/MUSR": 0.4847, "hfopenllm_v2/MMLU-PRO": 0.5405 } }, { "id": "sometimesanotion/LamarckInfusion-14B-v2-lo", "name": "LamarckInfusion-14B-v2-lo", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6788, "hfopenllm_v2/BBH": 0.6528, "hfopenllm_v2/MATH Level 5": 0.4237, "hfopenllm_v2/GPQA": 0.3859, "hfopenllm_v2/MUSR": 0.4991, "hfopenllm_v2/MMLU-PRO": 0.5397 } }, { "id": "sometimesanotion/LamarckInfusion-14B-v3", "name": "LamarckInfusion-14B-v3", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7131, "hfopenllm_v2/BBH": 0.6518, "hfopenllm_v2/MATH Level 5": 0.4124, "hfopenllm_v2/GPQA": 0.3867, "hfopenllm_v2/MUSR": 0.482, "hfopenllm_v2/MMLU-PRO": 0.5407 } }, { "id": "sometimesanotion/Qwen-14B-ProseStock-v4", "name": "Qwen-14B-ProseStock-v4", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4942, "hfopenllm_v2/BBH": 0.6498, "hfopenllm_v2/MATH Level 5": 0.364, "hfopenllm_v2/GPQA": 0.3884, "hfopenllm_v2/MUSR": 0.4938, "hfopenllm_v2/MMLU-PRO": 0.5386 } }, { "id": "sometimesanotion/Qwen-2.5-14B-Virmarckeoso", "name": "Qwen-2.5-14B-Virmarckeoso", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4813, "hfopenllm_v2/BBH": 0.657, "hfopenllm_v2/MATH Level 5": 0.3565, "hfopenllm_v2/GPQA": 0.3792, "hfopenllm_v2/MUSR": 0.4794, "hfopenllm_v2/MMLU-PRO": 0.5377 } }, { "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso", "name": "Qwen2.5-14B-Vimarckoso", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4574, "hfopenllm_v2/BBH": 0.6446, "hfopenllm_v2/MATH Level 5": 0.3384, "hfopenllm_v2/GPQA": 0.3926, "hfopenllm_v2/MUSR": 0.4859, "hfopenllm_v2/MMLU-PRO": 0.5329 } }, { "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v2", "name": "Qwen2.5-14B-Vimarckoso-v2", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4505, "hfopenllm_v2/BBH": 0.655, "hfopenllm_v2/MATH Level 5": 0.358, "hfopenllm_v2/GPQA": 0.3826, "hfopenllm_v2/MUSR": 0.4819, "hfopenllm_v2/MMLU-PRO": 0.538 } }, { "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3", "name": "Qwen2.5-14B-Vimarckoso-v3", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7257, "hfopenllm_v2/BBH": 0.6415, "hfopenllm_v2/MATH Level 5": 0.4003, "hfopenllm_v2/GPQA": 0.38, "hfopenllm_v2/MUSR": 0.4807, "hfopenllm_v2/MMLU-PRO": 0.5343 } }, { "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant", "name": "Qwen2.5-14B-Vimarckoso-v3-IF-Variant", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6413, "hfopenllm_v2/BBH": 0.5521, "hfopenllm_v2/MATH Level 5": 0.2545, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.5319, "hfopenllm_v2/MMLU-PRO": 0.4589 } }, { "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock", "name": "Qwen2.5-14B-Vimarckoso-v3-model_stock", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7162, "hfopenllm_v2/BBH": 0.6421, "hfopenllm_v2/MATH Level 5": 0.4245, "hfopenllm_v2/GPQA": 0.38, "hfopenllm_v2/MUSR": 0.4781, "hfopenllm_v2/MMLU-PRO": 0.5316 } }, { "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01", "name": "Qwen2.5-14B-Vimarckoso-v3-Prose01", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6872, "hfopenllm_v2/BBH": 0.6359, "hfopenllm_v2/MATH Level 5": 0.3995, "hfopenllm_v2/GPQA": 0.3867, "hfopenllm_v2/MUSR": 0.4807, "hfopenllm_v2/MMLU-PRO": 0.5275 } }, { "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1", "name": "Qwen2.5-7B-Gordion-v0.1", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7482, "hfopenllm_v2/BBH": 0.5524, "hfopenllm_v2/MATH Level 5": 0.2915, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.4016, "hfopenllm_v2/MMLU-PRO": 0.43 } }, { "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose", "name": "Qwen2.5-7B-Gordion-v0.1-Prose", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5347, "hfopenllm_v2/BBH": 0.5599, "hfopenllm_v2/MATH Level 5": 0.2893, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4502, "hfopenllm_v2/MMLU-PRO": 0.4525 } }, { "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason", "name": "Qwen2.5-7B-Gordion-v0.1-Reason", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4917, "hfopenllm_v2/BBH": 0.5498, "hfopenllm_v2/MATH Level 5": 0.2621, "hfopenllm_v2/GPQA": 0.3406, "hfopenllm_v2/MUSR": 0.4434, "hfopenllm_v2/MMLU-PRO": 0.4307 } }, { "id": "sometimesanotion/Qwentessential-14B-v1", "name": "Qwentessential-14B-v1", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6279, "hfopenllm_v2/BBH": 0.6545, "hfopenllm_v2/MATH Level 5": 0.4071, "hfopenllm_v2/GPQA": 0.3876, "hfopenllm_v2/MUSR": 0.4873, "hfopenllm_v2/MMLU-PRO": 0.5381 } }, { "id": "sometimesanotion/Qwentinuum-14B-v013", "name": "Qwentinuum-14B-v013", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6711, "hfopenllm_v2/BBH": 0.6087, "hfopenllm_v2/MATH Level 5": 0.3708, "hfopenllm_v2/GPQA": 0.3574, "hfopenllm_v2/MUSR": 0.5154, "hfopenllm_v2/MMLU-PRO": 0.4991 } }, { "id": "sometimesanotion/Qwentinuum-14B-v1", "name": "Qwentinuum-14B-v1", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5032, "hfopenllm_v2/BBH": 0.6573, "hfopenllm_v2/MATH Level 5": 0.3603, "hfopenllm_v2/GPQA": 0.3826, "hfopenllm_v2/MUSR": 0.4781, "hfopenllm_v2/MMLU-PRO": 0.541 } }, { "id": "sometimesanotion/Qwentinuum-14B-v2", "name": "Qwentinuum-14B-v2", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5378, "hfopenllm_v2/BBH": 0.6555, "hfopenllm_v2/MATH Level 5": 0.3754, "hfopenllm_v2/GPQA": 0.3884, "hfopenllm_v2/MUSR": 0.4714, "hfopenllm_v2/MMLU-PRO": 0.5409 } }, { "id": "sometimesanotion/Qwentinuum-14B-v3", "name": "Qwentinuum-14B-v3", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6158, "hfopenllm_v2/BBH": 0.6539, "hfopenllm_v2/MATH Level 5": 0.3535, "hfopenllm_v2/GPQA": 0.3876, "hfopenllm_v2/MUSR": 0.486, "hfopenllm_v2/MMLU-PRO": 0.5413 } }, { "id": "sometimesanotion/Qwentinuum-14B-v5", "name": "Qwentinuum-14B-v5", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6286, "hfopenllm_v2/BBH": 0.655, "hfopenllm_v2/MATH Level 5": 0.3444, "hfopenllm_v2/GPQA": 0.3876, "hfopenllm_v2/MUSR": 0.4874, "hfopenllm_v2/MMLU-PRO": 0.5418 } }, { "id": "sometimesanotion/Qwentinuum-14B-v6", "name": "Qwentinuum-14B-v6", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6304, "hfopenllm_v2/BBH": 0.6545, "hfopenllm_v2/MATH Level 5": 0.3603, "hfopenllm_v2/GPQA": 0.3867, "hfopenllm_v2/MUSR": 0.49, "hfopenllm_v2/MMLU-PRO": 0.54 } }, { "id": "sometimesanotion/Qwentinuum-14B-v6-Prose", "name": "Qwentinuum-14B-v6-Prose", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5643, "hfopenllm_v2/BBH": 0.6545, "hfopenllm_v2/MATH Level 5": 0.3701, "hfopenllm_v2/GPQA": 0.3884, "hfopenllm_v2/MUSR": 0.4913, "hfopenllm_v2/MMLU-PRO": 0.5392 } }, { "id": "sometimesanotion/Qwentinuum-14B-v7", "name": "Qwentinuum-14B-v7", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6109, "hfopenllm_v2/BBH": 0.6551, "hfopenllm_v2/MATH Level 5": 0.3573, "hfopenllm_v2/GPQA": 0.3909, "hfopenllm_v2/MUSR": 0.482, "hfopenllm_v2/MMLU-PRO": 0.541 } }, { "id": "sometimesanotion/Qwentinuum-14B-v8", "name": "Qwentinuum-14B-v8", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5412, "hfopenllm_v2/BBH": 0.6534, "hfopenllm_v2/MATH Level 5": 0.3912, "hfopenllm_v2/GPQA": 0.3834, "hfopenllm_v2/MUSR": 0.4873, "hfopenllm_v2/MMLU-PRO": 0.5412 } }, { "id": "sometimesanotion/Qwentinuum-14B-v9", "name": "Qwentinuum-14B-v9", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5107, "hfopenllm_v2/BBH": 0.658, "hfopenllm_v2/MATH Level 5": 0.3482, "hfopenllm_v2/GPQA": 0.3859, "hfopenllm_v2/MUSR": 0.4781, "hfopenllm_v2/MMLU-PRO": 0.5421 } }, { "id": "sometimesanotion/Qwenvergence-14B-qv256", "name": "Qwenvergence-14B-qv256", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7006, "hfopenllm_v2/BBH": 0.6312, "hfopenllm_v2/MATH Level 5": 0.3897, "hfopenllm_v2/GPQA": 0.3784, "hfopenllm_v2/MUSR": 0.4926, "hfopenllm_v2/MMLU-PRO": 0.5178 } }, { "id": "sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock", "name": "Qwenvergence-14B-v0.6-004-model_stock", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.686, "hfopenllm_v2/BBH": 0.6249, "hfopenllm_v2/MATH Level 5": 0.4094, "hfopenllm_v2/GPQA": 0.3834, "hfopenllm_v2/MUSR": 0.5033, "hfopenllm_v2/MMLU-PRO": 0.5193 } }, { "id": "sometimesanotion/Qwenvergence-14B-v10", "name": "Qwenvergence-14B-v10", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6757, "hfopenllm_v2/BBH": 0.6316, "hfopenllm_v2/MATH Level 5": 0.4789, "hfopenllm_v2/GPQA": 0.3792, "hfopenllm_v2/MUSR": 0.4991, "hfopenllm_v2/MMLU-PRO": 0.5239 } }, { "id": "sometimesanotion/Qwenvergence-14B-v11", "name": "Qwenvergence-14B-v11", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7192, "hfopenllm_v2/BBH": 0.6368, "hfopenllm_v2/MATH Level 5": 0.4645, "hfopenllm_v2/GPQA": 0.3725, "hfopenllm_v2/MUSR": 0.4754, "hfopenllm_v2/MMLU-PRO": 0.5327 } }, { "id": "sometimesanotion/Qwenvergence-14B-v12-Prose", "name": "Qwenvergence-14B-v12-Prose", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5412, "hfopenllm_v2/BBH": 0.6504, "hfopenllm_v2/MATH Level 5": 0.3535, "hfopenllm_v2/GPQA": 0.3867, "hfopenllm_v2/MUSR": 0.4991, "hfopenllm_v2/MMLU-PRO": 0.5381 } }, { "id": "sometimesanotion/Qwenvergence-14B-v12-Prose-DS", "name": "Qwenvergence-14B-v12-Prose-DS", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6173, "hfopenllm_v2/BBH": 0.6507, "hfopenllm_v2/MATH Level 5": 0.4305, "hfopenllm_v2/GPQA": 0.3943, "hfopenllm_v2/MUSR": 0.5151, "hfopenllm_v2/MMLU-PRO": 0.5369 } }, { "id": "sometimesanotion/Qwenvergence-14B-v13-Prose-DS", "name": "Qwenvergence-14B-v13-Prose-DS", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7178, "hfopenllm_v2/BBH": 0.6405, "hfopenllm_v2/MATH Level 5": 0.386, "hfopenllm_v2/GPQA": 0.3834, "hfopenllm_v2/MUSR": 0.4927, "hfopenllm_v2/MMLU-PRO": 0.5349 } }, { "id": "sometimesanotion/Qwenvergence-14B-v15-Prose-MS", "name": "Qwenvergence-14B-v15-Prose-MS", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5032, "hfopenllm_v2/BBH": 0.655, "hfopenllm_v2/MATH Level 5": 0.3633, "hfopenllm_v2/GPQA": 0.3951, "hfopenllm_v2/MUSR": 0.4913, "hfopenllm_v2/MMLU-PRO": 0.5393 } }, { "id": "sometimesanotion/Qwenvergence-14B-v2-Prose", "name": "Qwenvergence-14B-v2-Prose", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4705, "hfopenllm_v2/BBH": 0.6519, "hfopenllm_v2/MATH Level 5": 0.3557, "hfopenllm_v2/GPQA": 0.3935, "hfopenllm_v2/MUSR": 0.4926, "hfopenllm_v2/MMLU-PRO": 0.5372 } }, { "id": "sometimesanotion/Qwenvergence-14B-v3", "name": "Qwenvergence-14B-v3", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5044, "hfopenllm_v2/BBH": 0.6548, "hfopenllm_v2/MATH Level 5": 0.3693, "hfopenllm_v2/GPQA": 0.3842, "hfopenllm_v2/MUSR": 0.4886, "hfopenllm_v2/MMLU-PRO": 0.5386 } }, { "id": "sometimesanotion/Qwenvergence-14B-v3-Prose", "name": "Qwenvergence-14B-v3-Prose", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4918, "hfopenllm_v2/BBH": 0.6513, "hfopenllm_v2/MATH Level 5": 0.3648, "hfopenllm_v2/GPQA": 0.3951, "hfopenllm_v2/MUSR": 0.4939, "hfopenllm_v2/MMLU-PRO": 0.537 } }, { "id": "sometimesanotion/Qwenvergence-14B-v3-Reason", "name": "Qwenvergence-14B-v3-Reason", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5278, "hfopenllm_v2/BBH": 0.6557, "hfopenllm_v2/MATH Level 5": 0.3119, "hfopenllm_v2/GPQA": 0.3842, "hfopenllm_v2/MUSR": 0.4754, "hfopenllm_v2/MMLU-PRO": 0.5396 } }, { "id": "sometimesanotion/Qwenvergence-14B-v6-Prose", "name": "Qwenvergence-14B-v6-Prose", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.599, "hfopenllm_v2/BBH": 0.6544, "hfopenllm_v2/MATH Level 5": 0.3565, "hfopenllm_v2/GPQA": 0.3884, "hfopenllm_v2/MUSR": 0.4887, "hfopenllm_v2/MMLU-PRO": 0.5371 } }, { "id": "sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock", "name": "Qwenvergence-14B-v6-Prose-model_stock", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4811, "hfopenllm_v2/BBH": 0.653, "hfopenllm_v2/MATH Level 5": 0.3603, "hfopenllm_v2/GPQA": 0.3935, "hfopenllm_v2/MUSR": 0.4899, "hfopenllm_v2/MMLU-PRO": 0.5387 } }, { "id": "sometimesanotion/Qwenvergence-14B-v8", "name": "Qwenvergence-14B-v8", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5913, "hfopenllm_v2/BBH": 0.6522, "hfopenllm_v2/MATH Level 5": 0.4048, "hfopenllm_v2/GPQA": 0.3809, "hfopenllm_v2/MUSR": 0.4768, "hfopenllm_v2/MMLU-PRO": 0.5435 } }, { "id": "sometimesanotion/Qwenvergence-14B-v9", "name": "Qwenvergence-14B-v9", "developer": "sometimesanotion", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6598, "hfopenllm_v2/BBH": 0.6166, "hfopenllm_v2/MATH Level 5": 0.4139, "hfopenllm_v2/GPQA": 0.3683, "hfopenllm_v2/MUSR": 0.5141, "hfopenllm_v2/MMLU-PRO": 0.5111 } }, { "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415", "name": "ft-unsloth-zephyr-sft-bnb-4bit-20241014-161415", "developer": "sonthenguyen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2893, "hfopenllm_v2/BBH": 0.3804, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2466, "hfopenllm_v2/MUSR": 0.3861, "hfopenllm_v2/MMLU-PRO": 0.1401 } }, { "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205", "name": "ft-unsloth-zephyr-sft-bnb-4bit-20241014-164205", "developer": "sonthenguyen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3199, "hfopenllm_v2/BBH": 0.3959, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.4272, "hfopenllm_v2/MMLU-PRO": 0.2124 } }, { "id": "sonthenguyen/ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522", "name": "ft-unsloth-zephyr-sft-bnb-4bit-20241014-170522", "developer": "sonthenguyen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3764, "hfopenllm_v2/BBH": 0.3828, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.4404, "hfopenllm_v2/MMLU-PRO": 0.2055 } }, { "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbc-213steps", "name": "zephyr-sft-bnb-4bit-DPO-mtbc-213steps", "developer": "sonthenguyen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4275, "hfopenllm_v2/BBH": 0.4197, "hfopenllm_v2/MATH Level 5": 0.0257, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.4086, "hfopenllm_v2/MMLU-PRO": 0.2709 } }, { "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbo-180steps", "name": "zephyr-sft-bnb-4bit-DPO-mtbo-180steps", "developer": "sonthenguyen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4087, "hfopenllm_v2/BBH": 0.4323, "hfopenllm_v2/MATH Level 5": 0.0234, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3885, "hfopenllm_v2/MMLU-PRO": 0.2748 } }, { "id": "sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps", "name": "zephyr-sft-bnb-4bit-DPO-mtbr-180steps", "developer": "sonthenguyen", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4032, "hfopenllm_v2/BBH": 0.4305, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.4258, "hfopenllm_v2/MMLU-PRO": 0.2711 } }, { "id": "sophosympatheia/Midnight-Miqu-70B-v1.5", "name": "Midnight-Miqu-70B-v1.5", "developer": "sophosympatheia", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6118, "hfopenllm_v2/BBH": 0.5606, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4244, "hfopenllm_v2/MMLU-PRO": 0.3825 } }, { "id": "Sorawiz/Gemma-9B-Base", "name": "Gemma-9B-Base", "developer": "Sorawiz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1667, "hfopenllm_v2/BBH": 0.593, "hfopenllm_v2/MATH Level 5": 0.0982, "hfopenllm_v2/GPQA": 0.3398, "hfopenllm_v2/MUSR": 0.4045, "hfopenllm_v2/MMLU-PRO": 0.4235 } }, { "id": "Sorawiz/Gemma-Creative-9B-Base", "name": "Gemma-Creative-9B-Base", "developer": "Sorawiz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1515, "hfopenllm_v2/BBH": 0.5459, "hfopenllm_v2/MATH Level 5": 0.0778, "hfopenllm_v2/GPQA": 0.3297, "hfopenllm_v2/MUSR": 0.4019, "hfopenllm_v2/MMLU-PRO": 0.4008 } }, { "id": "Sourjayon/DeepSeek-R1-8b-Sify", "name": "DeepSeek-R1-8b-Sify", "developer": "Sourjayon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3679, "hfopenllm_v2/BBH": 0.3379, "hfopenllm_v2/MATH Level 5": 0.2447, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3303, "hfopenllm_v2/MMLU-PRO": 0.1981 } }, { "id": "Sourjayon/DeepSeek-R1-ForumNXT", "name": "DeepSeek-R1-ForumNXT", "developer": "Sourjayon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2603, "hfopenllm_v2/BBH": 0.331, "hfopenllm_v2/MATH Level 5": 0.2576, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3392, "hfopenllm_v2/MMLU-PRO": 0.1648 } }, { "id": "SpaceYL/ECE_Poirot", "name": "ECE_Poirot", "developer": "SpaceYL", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3107, "hfopenllm_v2/BBH": 0.4262, "hfopenllm_v2/MATH Level 5": 0.0914, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4026, "hfopenllm_v2/MMLU-PRO": 0.2883 } }, { "id": "speakleash-ack-cyfronet-agh/bielik-11b-v2-3-instruct-prompt", "name": "Bielik-11B-v2.3-Instruct (Prompt)", "developer": "speakleash-ack-cyfronet-agh", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 99.0, "bfcl/bfcl.overall.overall_accuracy": 21.9, "bfcl/bfcl.overall.total_cost_usd": 22.44, "bfcl/bfcl.overall.latency_mean_s": 23.75, "bfcl/bfcl.overall.latency_std_s": 61.76, "bfcl/bfcl.overall.latency_p95_s": 72.8, "bfcl/bfcl.non_live.ast_accuracy": 81.5, "bfcl/bfcl.non_live.simple_ast_accuracy": 73.0, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 85.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 75.5, "bfcl/bfcl.live.live_accuracy": 67.8, "bfcl/bfcl.live.live_simple_ast_accuracy": 75.58, "bfcl/bfcl.live.live_multiple_ast_accuracy": 66.19, "bfcl/bfcl.live.live_parallel_ast_accuracy": 62.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 58.33, "bfcl/bfcl.multi_turn.accuracy": 2.62, "bfcl/bfcl.multi_turn.base_accuracy": 4.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 3.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 2.5, "bfcl/bfcl.web_search.accuracy": 1.5, "bfcl/bfcl.web_search.base_accuracy": 1.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 2.0, "bfcl/bfcl.memory.accuracy": 11.4, "bfcl/bfcl.memory.kv_accuracy": 7.1, "bfcl/bfcl.memory.vector_accuracy": 4.52, "bfcl/bfcl.memory.recursive_summarization_accuracy": 22.58, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 36.01, "bfcl/bfcl.format_sensitivity.max_delta": 35.0, "bfcl/bfcl.format_sensitivity.stddev": 9.74 } }, { "id": "speakleash/Bielik-11B-v2", "name": "Bielik-11B-v2", "developer": "speakleash", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2381, "hfopenllm_v2/BBH": 0.4931, "hfopenllm_v2/MATH Level 5": 0.0785, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.3924, "hfopenllm_v2/MMLU-PRO": 0.3137 } }, { "id": "speakleash/Bielik-11B-v2.0-Instruct", "name": "Bielik-11B-v2.0-Instruct", "developer": "speakleash", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5252, "hfopenllm_v2/BBH": 0.5362, "hfopenllm_v2/MATH Level 5": 0.1186, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4467, "hfopenllm_v2/MMLU-PRO": 0.3351 } }, { "id": "speakleash/Bielik-11B-v2.1-Instruct", "name": "Bielik-11B-v2.1-Instruct", "developer": "speakleash", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.509, "hfopenllm_v2/BBH": 0.553, "hfopenllm_v2/MATH Level 5": 0.2666, "hfopenllm_v2/GPQA": 0.3372, "hfopenllm_v2/MUSR": 0.4185, "hfopenllm_v2/MMLU-PRO": 0.3447 } }, { "id": "speakleash/Bielik-11B-v2.2-Instruct", "name": "Bielik-11B-v2.2-Instruct", "developer": "speakleash", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5552, "hfopenllm_v2/BBH": 0.5597, "hfopenllm_v2/MATH Level 5": 0.2681, "hfopenllm_v2/GPQA": 0.3314, "hfopenllm_v2/MUSR": 0.4171, "hfopenllm_v2/MMLU-PRO": 0.3487 } }, { "id": "speakleash/Bielik-11B-v2.3-Instruct", "name": "Bielik-11B-v2.3-Instruct", "developer": "speakleash", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5583, "hfopenllm_v2/BBH": 0.5663, "hfopenllm_v2/MATH Level 5": 0.2085, "hfopenllm_v2/GPQA": 0.3406, "hfopenllm_v2/MUSR": 0.4518, "hfopenllm_v2/MMLU-PRO": 0.3444 } }, { "id": "Spestly/Athena-1-3B", "name": "Athena-1-3B", "developer": "Spestly", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5569, "hfopenllm_v2/BBH": 0.4702, "hfopenllm_v2/MATH Level 5": 0.2379, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4362, "hfopenllm_v2/MMLU-PRO": 0.3519 } }, { "id": "Spestly/Atlas-Pro-1.5B-Preview", "name": "Atlas-Pro-1.5B-Preview", "developer": "Spestly", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.243, "hfopenllm_v2/BBH": 0.3499, "hfopenllm_v2/MATH Level 5": 0.3195, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.3354, "hfopenllm_v2/MMLU-PRO": 0.1925 } }, { "id": "Spestly/Atlas-Pro-7B-Preview", "name": "Atlas-Pro-7B-Preview", "developer": "Spestly", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3154, "hfopenllm_v2/BBH": 0.4668, "hfopenllm_v2/MATH Level 5": 0.5083, "hfopenllm_v2/GPQA": 0.3372, "hfopenllm_v2/MUSR": 0.3911, "hfopenllm_v2/MMLU-PRO": 0.297 } }, { "id": "spmurrayzzz/Mistral-Syndicate-7B", "name": "Mistral-Syndicate-7B", "developer": "spmurrayzzz", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2496, "hfopenllm_v2/BBH": 0.4245, "hfopenllm_v2/MATH Level 5": 0.034, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.4386, "hfopenllm_v2/MMLU-PRO": 0.2631 } }, { "id": "spow12/ChatWaifu_12B_v2.0", "name": "ChatWaifu_12B_v2.0", "developer": "spow12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4768, "hfopenllm_v2/BBH": 0.5208, "hfopenllm_v2/MATH Level 5": 0.071, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.4432, "hfopenllm_v2/MMLU-PRO": 0.3388 } }, { "id": "spow12/ChatWaifu_22B_v2.0_preview", "name": "ChatWaifu_22B_v2.0_preview", "developer": "spow12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6745, "hfopenllm_v2/BBH": 0.617, "hfopenllm_v2/MATH Level 5": 0.1888, "hfopenllm_v2/GPQA": 0.3154, "hfopenllm_v2/MUSR": 0.3685, "hfopenllm_v2/MMLU-PRO": 0.3988 } }, { "id": "spow12/ChatWaifu_v1.4", "name": "ChatWaifu_v1.4", "developer": "spow12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5691, "hfopenllm_v2/BBH": 0.5176, "hfopenllm_v2/MATH Level 5": 0.1057, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4743, "hfopenllm_v2/MMLU-PRO": 0.3475 } }, { "id": "spow12/ChatWaifu_v2.0_22B", "name": "ChatWaifu_v2.0_22B", "developer": "spow12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6511, "hfopenllm_v2/BBH": 0.5926, "hfopenllm_v2/MATH Level 5": 0.1858, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.3842, "hfopenllm_v2/MMLU-PRO": 0.3836 } }, { "id": "ssmits/Qwen2.5-95B-Instruct", "name": "Qwen2.5-95B-Instruct", "developer": "ssmits", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8431, "hfopenllm_v2/BBH": 0.7038, "hfopenllm_v2/MATH Level 5": 0.5302, "hfopenllm_v2/GPQA": 0.3641, "hfopenllm_v2/MUSR": 0.4284, "hfopenllm_v2/MMLU-PRO": 0.5217 } }, { "id": "stabilityai/stable-code-instruct-3b", "name": "stabilityai/stable-code-instruct-3b", "developer": "stabilityai", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6216, "reward-bench/Chat": 0.5782, "reward-bench/Chat Hard": 0.5855, "reward-bench/Safety": 0.6554, "reward-bench/Reasoning": 0.7528, "reward-bench/Prior Sets (0.5 weight)": 0.4506 } }, { "id": "stabilityai/StableBeluga2", "name": "StableBeluga2", "developer": "stabilityai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3787, "hfopenllm_v2/BBH": 0.5824, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.473, "hfopenllm_v2/MMLU-PRO": 0.3326 } }, { "id": "stabilityai/stablelm-2-12b", "name": "stablelm-2-12b", "developer": "stabilityai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1569, "hfopenllm_v2/BBH": 0.4509, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.4479, "hfopenllm_v2/MMLU-PRO": 0.3072 } }, { "id": "stabilityai/stablelm-2-12b-chat", "name": "stabilityai/stablelm-2-12b-chat", "developer": "stabilityai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4082, "hfopenllm_v2/BBH": 0.4672, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3914, "hfopenllm_v2/MMLU-PRO": 0.2734, "reward-bench/Score": 0.7642, "reward-bench/Chat": 0.9665, "reward-bench/Chat Hard": 0.5548, "reward-bench/Safety": 0.7811, "reward-bench/Reasoning": 0.8945, "reward-bench/Prior Sets (0.5 weight)": 0.4839 } }, { "id": "stabilityai/stablelm-2-1_6b", "name": "stablelm-2-1_6b", "developer": "stabilityai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1157, "hfopenllm_v2/BBH": 0.3385, "hfopenllm_v2/MATH Level 5": 0.0076, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.3882, "hfopenllm_v2/MMLU-PRO": 0.1464 } }, { "id": "stabilityai/stablelm-2-1_6b-chat", "name": "stablelm-2-1_6b-chat", "developer": "stabilityai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.306, "hfopenllm_v2/BBH": 0.339, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2475, "hfopenllm_v2/MUSR": 0.358, "hfopenllm_v2/MMLU-PRO": 0.1622 } }, { "id": "stabilityai/stablelm-2-zephyr-1_6b", "name": "stabilityai/stablelm-2-zephyr-1_6b", "developer": "stabilityai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3279, "hfopenllm_v2/BBH": 0.3352, "hfopenllm_v2/MATH Level 5": 0.0332, "hfopenllm_v2/GPQA": 0.2433, "hfopenllm_v2/MUSR": 0.3511, "hfopenllm_v2/MMLU-PRO": 0.1714, "reward-bench/Score": 0.6574, "reward-bench/Chat": 0.9665, "reward-bench/Chat Hard": 0.4671, "reward-bench/Safety": 0.6027, "reward-bench/Reasoning": 0.6784, "reward-bench/Prior Sets (0.5 weight)": 0.4868 } }, { "id": "stabilityai/stablelm-3b-4e1t", "name": "stablelm-3b-4e1t", "developer": "stabilityai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2203, "hfopenllm_v2/BBH": 0.3504, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2374, "hfopenllm_v2/MUSR": 0.3778, "hfopenllm_v2/MMLU-PRO": 0.1669 } }, { "id": "stabilityai/stablelm-zephyr-3b", "name": "stabilityai/stablelm-zephyr-3b", "developer": "stabilityai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3683, "hfopenllm_v2/BBH": 0.3866, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2391, "hfopenllm_v2/MUSR": 0.4183, "hfopenllm_v2/MMLU-PRO": 0.1768, "reward-bench/Score": 0.7146, "reward-bench/Chat": 0.8631, "reward-bench/Chat Hard": 0.6009, "reward-bench/Safety": 0.7405, "reward-bench/Reasoning": 0.7573, "reward-bench/Prior Sets (0.5 weight)": 0.5075 } }, { "id": "stanford/Alpaca-7B", "name": "Alpaca 7B", "developer": "stanford", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.381, "helm_classic/MMLU": 0.385, "helm_classic/BoolQ": 0.778, "helm_classic/NarrativeQA": 0.396, "helm_classic/NaturalQuestions (open-book)": 0.592, "helm_classic/QuAC": 0.27, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.243, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.738, "helm_classic/CivilComments": 0.566, "helm_classic/RAFT": 0.486 } }, { "id": "stanfordnlp/SteamSHP-flan-t5-large", "name": "stanfordnlp/SteamSHP-flan-t5-large", "developer": "stanfordnlp", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.4962, "reward-bench/Chat": 0.8575, "reward-bench/Chat Hard": 0.3311, "reward-bench/Safety": 0.3743, "reward-bench/Reasoning": 0.3563, "reward-bench/Prior Sets (0.5 weight)": 0.6273 } }, { "id": "stanfordnlp/SteamSHP-flan-t5-xl", "name": "stanfordnlp/SteamSHP-flan-t5-xl", "developer": "stanfordnlp", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5135, "reward-bench/Chat": 0.8547, "reward-bench/Chat Hard": 0.3684, "reward-bench/Safety": 0.3784, "reward-bench/Reasoning": 0.3841, "reward-bench/Prior Sets (0.5 weight)": 0.6498 } }, { "id": "Stark2008/GutenLaserPi", "name": "GutenLaserPi", "developer": "Stark2008", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4227, "hfopenllm_v2/BBH": 0.5212, "hfopenllm_v2/MATH Level 5": 0.0785, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.462, "hfopenllm_v2/MMLU-PRO": 0.3106 } }, { "id": "Stark2008/LayleleFlamPi", "name": "LayleleFlamPi", "developer": "Stark2008", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4284, "hfopenllm_v2/BBH": 0.5116, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4608, "hfopenllm_v2/MMLU-PRO": 0.3093 } }, { "id": "Stark2008/VisFlamCat", "name": "VisFlamCat", "developer": "Stark2008", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4366, "hfopenllm_v2/BBH": 0.5217, "hfopenllm_v2/MATH Level 5": 0.0763, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4463, "hfopenllm_v2/MMLU-PRO": 0.3144 } }, { "id": "Steelskull/L3.3-MS-Nevoria-70b", "name": "L3.3-MS-Nevoria-70b", "developer": "Steelskull", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6963, "hfopenllm_v2/BBH": 0.6998, "hfopenllm_v2/MATH Level 5": 0.3958, "hfopenllm_v2/GPQA": 0.4706, "hfopenllm_v2/MUSR": 0.4682, "hfopenllm_v2/MMLU-PRO": 0.5535 } }, { "id": "Steelskull/L3.3-Nevoria-R1-70b", "name": "L3.3-Nevoria-R1-70b", "developer": "Steelskull", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6024, "hfopenllm_v2/BBH": 0.6972, "hfopenllm_v2/MATH Level 5": 0.463, "hfopenllm_v2/GPQA": 0.469, "hfopenllm_v2/MUSR": 0.4775, "hfopenllm_v2/MMLU-PRO": 0.5463 } }, { "id": "StelleX/Qwen2.5_Math_7B_Cot", "name": "Qwen2.5_Math_7B_Cot", "developer": "StelleX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2143, "hfopenllm_v2/BBH": 0.4313, "hfopenllm_v2/MATH Level 5": 0.3263, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3924, "hfopenllm_v2/MMLU-PRO": 0.281 } }, { "id": "StelleX/Vorisatex-7B-preview", "name": "Vorisatex-7B-preview", "developer": "StelleX", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1515, "hfopenllm_v2/BBH": 0.3112, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.4192, "hfopenllm_v2/MMLU-PRO": 0.1166 } }, { "id": "sthenno-com/miscii-14b-0130", "name": "miscii-14b-0130", "developer": "sthenno-com", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6647, "hfopenllm_v2/BBH": 0.6505, "hfopenllm_v2/MATH Level 5": 0.432, "hfopenllm_v2/GPQA": 0.3817, "hfopenllm_v2/MUSR": 0.4912, "hfopenllm_v2/MMLU-PRO": 0.5363 } }, { "id": "sthenno-com/miscii-14b-0218", "name": "miscii-14b-0218", "developer": "sthenno-com", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7656, "hfopenllm_v2/BBH": 0.6559, "hfopenllm_v2/MATH Level 5": 0.5144, "hfopenllm_v2/GPQA": 0.3834, "hfopenllm_v2/MUSR": 0.4273, "hfopenllm_v2/MMLU-PRO": 0.5298 } }, { "id": "sthenno-com/miscii-14b-1028", "name": "miscii-14b-1028", "developer": "sthenno-com", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8237, "hfopenllm_v2/BBH": 0.6448, "hfopenllm_v2/MATH Level 5": 0.503, "hfopenllm_v2/GPQA": 0.3565, "hfopenllm_v2/MUSR": 0.4182, "hfopenllm_v2/MMLU-PRO": 0.5153 } }, { "id": "sthenno-com/miscii-14b-1225", "name": "miscii-14b-1225", "developer": "sthenno-com", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7878, "hfopenllm_v2/BBH": 0.6572, "hfopenllm_v2/MATH Level 5": 0.4517, "hfopenllm_v2/GPQA": 0.3775, "hfopenllm_v2/MUSR": 0.4366, "hfopenllm_v2/MMLU-PRO": 0.5272 } }, { "id": "sthenno/tempesthenno-0120", "name": "tempesthenno-0120", "developer": "sthenno", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.539, "hfopenllm_v2/BBH": 0.6373, "hfopenllm_v2/MATH Level 5": 0.3353, "hfopenllm_v2/GPQA": 0.3943, "hfopenllm_v2/MUSR": 0.4633, "hfopenllm_v2/MMLU-PRO": 0.529 } }, { "id": "sthenno/tempesthenno-fusion-0309", "name": "tempesthenno-fusion-0309", "developer": "sthenno", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7692, "hfopenllm_v2/BBH": 0.6581, "hfopenllm_v2/MATH Level 5": 0.4766, "hfopenllm_v2/GPQA": 0.37, "hfopenllm_v2/MUSR": 0.4325, "hfopenllm_v2/MMLU-PRO": 0.5258 } }, { "id": "sthenno/tempesthenno-kto-0205-ckpt80", "name": "tempesthenno-kto-0205-ckpt80", "developer": "sthenno", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8054, "hfopenllm_v2/BBH": 0.6543, "hfopenllm_v2/MATH Level 5": 0.4592, "hfopenllm_v2/GPQA": 0.3482, "hfopenllm_v2/MUSR": 0.4248, "hfopenllm_v2/MMLU-PRO": 0.5286 } }, { "id": "sthenno/tempesthenno-nuslerp-001", "name": "tempesthenno-nuslerp-001", "developer": "sthenno", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7926, "hfopenllm_v2/BBH": 0.6578, "hfopenllm_v2/MATH Level 5": 0.4758, "hfopenllm_v2/GPQA": 0.3733, "hfopenllm_v2/MUSR": 0.43, "hfopenllm_v2/MMLU-PRO": 0.5257 } }, { "id": "sthenno/tempesthenno-nuslerp-0124", "name": "tempesthenno-nuslerp-0124", "developer": "sthenno", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7004, "hfopenllm_v2/BBH": 0.6469, "hfopenllm_v2/MATH Level 5": 0.4116, "hfopenllm_v2/GPQA": 0.3901, "hfopenllm_v2/MUSR": 0.4859, "hfopenllm_v2/MMLU-PRO": 0.5352 } }, { "id": "sthenno/tempesthenno-ppo-ckpt40", "name": "tempesthenno-ppo-ckpt40", "developer": "sthenno", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7923, "hfopenllm_v2/BBH": 0.655, "hfopenllm_v2/MATH Level 5": 0.4736, "hfopenllm_v2/GPQA": 0.3775, "hfopenllm_v2/MUSR": 0.4352, "hfopenllm_v2/MMLU-PRO": 0.5292 } }, { "id": "sthenno/tempesthenno-sft-0309-ckpt10", "name": "tempesthenno-sft-0309-ckpt10", "developer": "sthenno", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7744, "hfopenllm_v2/BBH": 0.6552, "hfopenllm_v2/MATH Level 5": 0.4721, "hfopenllm_v2/GPQA": 0.3716, "hfopenllm_v2/MUSR": 0.4364, "hfopenllm_v2/MMLU-PRO": 0.5258 } }, { "id": "sthenno/tempesthenno-sft-0314-stage1-ckpt50", "name": "tempesthenno-sft-0314-stage1-ckpt50", "developer": "sthenno", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7394, "hfopenllm_v2/BBH": 0.6601, "hfopenllm_v2/MATH Level 5": 0.4683, "hfopenllm_v2/GPQA": 0.3733, "hfopenllm_v2/MUSR": 0.4429, "hfopenllm_v2/MMLU-PRO": 0.5302 } }, { "id": "sthenno/tempestissimo-14b-0309", "name": "tempestissimo-14b-0309", "developer": "sthenno", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7549, "hfopenllm_v2/BBH": 0.6587, "hfopenllm_v2/MATH Level 5": 0.4796, "hfopenllm_v2/GPQA": 0.3666, "hfopenllm_v2/MUSR": 0.4312, "hfopenllm_v2/MMLU-PRO": 0.5281 } }, { "id": "streamerbtw1002/Nexuim-R1-7B-Instruct", "name": "Nexuim-R1-7B-Instruct", "developer": "streamerbtw1002", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6934, "hfopenllm_v2/BBH": 0.5175, "hfopenllm_v2/MATH Level 5": 0.4456, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3356, "hfopenllm_v2/MMLU-PRO": 0.4138 } }, { "id": "stupidity-ai/Llama-3-8B-Instruct-MultiMoose", "name": "Llama-3-8B-Instruct-MultiMoose", "developer": "stupidity-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2318, "hfopenllm_v2/BBH": 0.2823, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3485, "hfopenllm_v2/MMLU-PRO": 0.1094 } }, { "id": "suayptalha/Clarus-7B-v0.1", "name": "Clarus-7B-v0.1", "developer": "suayptalha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7454, "hfopenllm_v2/BBH": 0.5497, "hfopenllm_v2/MATH Level 5": 0.4924, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.443, "hfopenllm_v2/MMLU-PRO": 0.4387 } }, { "id": "suayptalha/Clarus-7B-v0.2", "name": "Clarus-7B-v0.2", "developer": "suayptalha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7679, "hfopenllm_v2/BBH": 0.549, "hfopenllm_v2/MATH Level 5": 0.4856, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4417, "hfopenllm_v2/MMLU-PRO": 0.44 } }, { "id": "suayptalha/Clarus-7B-v0.3", "name": "Clarus-7B-v0.3", "developer": "suayptalha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7509, "hfopenllm_v2/BBH": 0.5526, "hfopenllm_v2/MATH Level 5": 0.4879, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4402, "hfopenllm_v2/MMLU-PRO": 0.4385 } }, { "id": "suayptalha/DeepSeek-R1-Distill-Llama-3B", "name": "DeepSeek-R1-Distill-Llama-3B", "developer": "suayptalha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7093, "hfopenllm_v2/BBH": 0.4452, "hfopenllm_v2/MATH Level 5": 0.2092, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3396, "hfopenllm_v2/MMLU-PRO": 0.2978 } }, { "id": "suayptalha/Falcon3-Jessi-v0.4-7B-Slerp", "name": "Falcon3-Jessi-v0.4-7B-Slerp", "developer": "suayptalha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7676, "hfopenllm_v2/BBH": 0.5591, "hfopenllm_v2/MATH Level 5": 0.3965, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4812, "hfopenllm_v2/MMLU-PRO": 0.406 } }, { "id": "suayptalha/HomerCreativeAnvita-Mix-Qw7B", "name": "HomerCreativeAnvita-Mix-Qw7B", "developer": "suayptalha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7808, "hfopenllm_v2/BBH": 0.5565, "hfopenllm_v2/MATH Level 5": 0.361, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4416, "hfopenllm_v2/MMLU-PRO": 0.4445 } }, { "id": "suayptalha/Komodo-Llama-3.2-3B-v2-fp16", "name": "Komodo-Llama-3.2-3B-v2-fp16", "developer": "suayptalha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6341, "hfopenllm_v2/BBH": 0.4355, "hfopenllm_v2/MATH Level 5": 0.1065, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3406, "hfopenllm_v2/MMLU-PRO": 0.2852 } }, { "id": "suayptalha/Lamarckvergence-14B", "name": "Lamarckvergence-14B", "developer": "suayptalha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7656, "hfopenllm_v2/BBH": 0.6517, "hfopenllm_v2/MATH Level 5": 0.54, "hfopenllm_v2/GPQA": 0.3633, "hfopenllm_v2/MUSR": 0.4422, "hfopenllm_v2/MMLU-PRO": 0.5283 } }, { "id": "suayptalha/Lix-14B-v0.1", "name": "Lix-14B-v0.1", "developer": "suayptalha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7813, "hfopenllm_v2/BBH": 0.6608, "hfopenllm_v2/MATH Level 5": 0.5295, "hfopenllm_v2/GPQA": 0.37, "hfopenllm_v2/MUSR": 0.4338, "hfopenllm_v2/MMLU-PRO": 0.5314 } }, { "id": "suayptalha/Luminis-phi-4", "name": "Luminis-phi-4", "developer": "suayptalha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.69, "hfopenllm_v2/BBH": 0.692, "hfopenllm_v2/MATH Level 5": 0.4637, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4572, "hfopenllm_v2/MMLU-PRO": 0.5424 } }, { "id": "suayptalha/Maestro-10B", "name": "Maestro-10B", "developer": "suayptalha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7768, "hfopenllm_v2/BBH": 0.5746, "hfopenllm_v2/MATH Level 5": 0.1911, "hfopenllm_v2/GPQA": 0.3331, "hfopenllm_v2/MUSR": 0.4397, "hfopenllm_v2/MMLU-PRO": 0.4218 } }, { "id": "suayptalha/Rombos-2.5-T.E-8.1", "name": "Rombos-2.5-T.E-8.1", "developer": "suayptalha", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6925, "hfopenllm_v2/BBH": 0.5515, "hfopenllm_v2/MATH Level 5": 0.4924, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.4166, "hfopenllm_v2/MMLU-PRO": 0.4446 } }, { "id": "SultanR/SmolTulu-1.7b-Instruct", "name": "SmolTulu-1.7b-Instruct", "developer": "SultanR", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6541, "hfopenllm_v2/BBH": 0.3713, "hfopenllm_v2/MATH Level 5": 0.0793, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.354, "hfopenllm_v2/MMLU-PRO": 0.171 } }, { "id": "SultanR/SmolTulu-1.7b-it-v0", "name": "SmolTulu-1.7b-it-v0", "developer": "SultanR", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6541, "hfopenllm_v2/BBH": 0.3713, "hfopenllm_v2/MATH Level 5": 0.0793, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.354, "hfopenllm_v2/MMLU-PRO": 0.171 } }, { "id": "SultanR/SmolTulu-1.7b-Reinforced", "name": "SmolTulu-1.7b-Reinforced", "developer": "SultanR", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6791, "hfopenllm_v2/BBH": 0.3552, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3406, "hfopenllm_v2/MMLU-PRO": 0.1763 } }, { "id": "SultanR/SmolTulu-1.7b-RM", "name": "SultanR/SmolTulu-1.7b-RM", "developer": "SultanR", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.5094, "reward-bench/Chat": 0.743, "reward-bench/Chat Hard": 0.4408, "reward-bench/Safety": 0.5716, "reward-bench/Reasoning": 0.2821 } }, { "id": "sumink/bbhqwen", "name": "bbhqwen", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1809, "hfopenllm_v2/BBH": 0.3388, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.4352, "hfopenllm_v2/MMLU-PRO": 0.1617 } }, { "id": "sumink/bbhqwen2", "name": "bbhqwen2", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1533, "hfopenllm_v2/BBH": 0.3066, "hfopenllm_v2/MATH Level 5": 0.006, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.4431, "hfopenllm_v2/MMLU-PRO": 0.1149 } }, { "id": "sumink/bbhqwen3", "name": "bbhqwen3", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1943, "hfopenllm_v2/BBH": 0.2951, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3796, "hfopenllm_v2/MMLU-PRO": 0.1166 } }, { "id": "sumink/bbhqwen4", "name": "bbhqwen4", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1449, "hfopenllm_v2/BBH": 0.3199, "hfopenllm_v2/MATH Level 5": 0.006, "hfopenllm_v2/GPQA": 0.2441, "hfopenllm_v2/MUSR": 0.4029, "hfopenllm_v2/MMLU-PRO": 0.1509 } }, { "id": "sumink/bbhqwen5", "name": "bbhqwen5", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1522, "hfopenllm_v2/BBH": 0.2913, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.4019, "hfopenllm_v2/MMLU-PRO": 0.1131 } }, { "id": "sumink/bbhqwen6", "name": "bbhqwen6", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1893, "hfopenllm_v2/BBH": 0.2782, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.358, "hfopenllm_v2/MMLU-PRO": 0.1153 } }, { "id": "sumink/flflmillama", "name": "flflmillama", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1676, "hfopenllm_v2/BBH": 0.3851, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.3591, "hfopenllm_v2/MMLU-PRO": 0.2096 } }, { "id": "sumink/ftgpt", "name": "ftgpt", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0787, "hfopenllm_v2/BBH": 0.2919, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.4138, "hfopenllm_v2/MMLU-PRO": 0.1172 } }, { "id": "sumink/llamaft", "name": "llamaft", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1609, "hfopenllm_v2/BBH": 0.3763, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3498, "hfopenllm_v2/MMLU-PRO": 0.2114 } }, { "id": "sumink/llamamerge", "name": "llamamerge", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2672, "hfopenllm_v2/BBH": 0.4632, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.424, "hfopenllm_v2/MMLU-PRO": 0.259 } }, { "id": "sumink/llftfl7", "name": "llftfl7", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1714, "hfopenllm_v2/BBH": 0.3786, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3632, "hfopenllm_v2/MMLU-PRO": 0.1743 } }, { "id": "sumink/llmer", "name": "llmer", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3191, "hfopenllm_v2/BBH": 0.4885, "hfopenllm_v2/MATH Level 5": 0.065, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4039, "hfopenllm_v2/MMLU-PRO": 0.3529 } }, { "id": "sumink/Qmerft", "name": "Qmerft", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1564, "hfopenllm_v2/BBH": 0.2939, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3688, "hfopenllm_v2/MMLU-PRO": 0.1157 } }, { "id": "sumink/Qwenftmodel", "name": "Qwenftmodel", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1729, "hfopenllm_v2/BBH": 0.3823, "hfopenllm_v2/MATH Level 5": 0.0891, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3617, "hfopenllm_v2/MMLU-PRO": 0.2339 } }, { "id": "sumink/Qwenmplus", "name": "Qwenmplus", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.204, "hfopenllm_v2/BBH": 0.3676, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.3828, "hfopenllm_v2/MMLU-PRO": 0.1992 } }, { "id": "sumink/Qwensci", "name": "Qwensci", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.174, "hfopenllm_v2/BBH": 0.3282, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3609, "hfopenllm_v2/MMLU-PRO": 0.126 } }, { "id": "sumink/qwft", "name": "qwft", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1197, "hfopenllm_v2/BBH": 0.3002, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3581, "hfopenllm_v2/MMLU-PRO": 0.1129 } }, { "id": "sumink/qwmer", "name": "qwmer", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2212, "hfopenllm_v2/BBH": 0.4299, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.4032, "hfopenllm_v2/MMLU-PRO": 0.2215 } }, { "id": "sumink/solarmer3", "name": "solarmer3", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3741, "hfopenllm_v2/BBH": 0.5266, "hfopenllm_v2/MATH Level 5": 0.0582, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4401, "hfopenllm_v2/MMLU-PRO": 0.3323 } }, { "id": "sumink/somer", "name": "somer", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.299, "hfopenllm_v2/BBH": 0.5194, "hfopenllm_v2/MATH Level 5": 0.0415, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.465, "hfopenllm_v2/MMLU-PRO": 0.3447 } }, { "id": "sumink/somer2", "name": "somer2", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3132, "hfopenllm_v2/BBH": 0.5167, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4663, "hfopenllm_v2/MMLU-PRO": 0.3433 } }, { "id": "sumink/somerft", "name": "somerft", "developer": "sumink", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1431, "hfopenllm_v2/BBH": 0.3093, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.4045, "hfopenllm_v2/MMLU-PRO": 0.1117 } }, { "id": "sunbaby/BrainCog-8B-0.1-Instruct", "name": "BrainCog-8B-0.1-Instruct", "developer": "sunbaby", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4253, "hfopenllm_v2/BBH": 0.4618, "hfopenllm_v2/MATH Level 5": 0.0967, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.3656, "hfopenllm_v2/MMLU-PRO": 0.2858 } }, { "id": "Supichi/BBA-123", "name": "BBA-123", "developer": "Supichi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.208, "hfopenllm_v2/BBH": 0.292, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3499, "hfopenllm_v2/MMLU-PRO": 0.1167 } }, { "id": "Supichi/BBA99", "name": "BBA99", "developer": "Supichi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1407, "hfopenllm_v2/BBH": 0.2769, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3218, "hfopenllm_v2/MMLU-PRO": 0.1112 } }, { "id": "Supichi/BBAI_135_Gemma", "name": "BBAI_135_Gemma", "developer": "Supichi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0656, "hfopenllm_v2/BBH": 0.3568, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3805, "hfopenllm_v2/MMLU-PRO": 0.1672 } }, { "id": "Supichi/BBAI_250_Xia0_gZ", "name": "BBAI_250_Xia0_gZ", "developer": "Supichi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4685, "hfopenllm_v2/BBH": 0.5568, "hfopenllm_v2/MATH Level 5": 0.364, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4579, "hfopenllm_v2/MMLU-PRO": 0.4465 } }, { "id": "Supichi/BBAI_275_Tsunami_gZ", "name": "BBAI_275_Tsunami_gZ", "developer": "Supichi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.537, "hfopenllm_v2/BBH": 0.5531, "hfopenllm_v2/MATH Level 5": 0.3285, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4448, "hfopenllm_v2/MMLU-PRO": 0.4492 } }, { "id": "Supichi/BBAI_525_Tsu_gZ_Xia0", "name": "BBAI_525_Tsu_gZ_Xia0", "developer": "Supichi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5339, "hfopenllm_v2/BBH": 0.5562, "hfopenllm_v2/MATH Level 5": 0.3429, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4474, "hfopenllm_v2/MMLU-PRO": 0.4477 } }, { "id": "Supichi/BBAI_78B_Calme_3_1_Ties", "name": "BBAI_78B_Calme_3_1_Ties", "developer": "Supichi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1828, "hfopenllm_v2/BBH": 0.2828, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.229, "hfopenllm_v2/MUSR": 0.31, "hfopenllm_v2/MMLU-PRO": 0.1144 } }, { "id": "Supichi/BBAI_QWEEN_V000000_LUMEN_14B", "name": "BBAI_QWEEN_V000000_LUMEN_14B", "developer": "Supichi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1815, "hfopenllm_v2/BBH": 0.2297, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2315, "hfopenllm_v2/MUSR": 0.3445, "hfopenllm_v2/MMLU-PRO": 0.116 } }, { "id": "Supichi/BBAIK29", "name": "BBAIK29", "developer": "Supichi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4588, "hfopenllm_v2/BBH": 0.559, "hfopenllm_v2/MATH Level 5": 0.3678, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4501, "hfopenllm_v2/MMLU-PRO": 0.4469 } }, { "id": "Supichi/HF_TOKEN", "name": "HF_TOKEN", "developer": "Supichi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.138, "hfopenllm_v2/BBH": 0.2764, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3272, "hfopenllm_v2/MMLU-PRO": 0.111 } }, { "id": "Supichi/NJS26", "name": "NJS26", "developer": "Supichi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0448, "hfopenllm_v2/BBH": 0.478, "hfopenllm_v2/MATH Level 5": 0.0325, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.3854, "hfopenllm_v2/MMLU-PRO": 0.3037 } }, { "id": "Svak/MN-12B-Inferor-v0.0", "name": "MN-12B-Inferor-v0.0", "developer": "Svak", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5708, "hfopenllm_v2/BBH": 0.5195, "hfopenllm_v2/MATH Level 5": 0.102, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4639, "hfopenllm_v2/MMLU-PRO": 0.3559 } }, { "id": "Svak/MN-12B-Inferor-v0.1", "name": "MN-12B-Inferor-v0.1", "developer": "Svak", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6347, "hfopenllm_v2/BBH": 0.5147, "hfopenllm_v2/MATH Level 5": 0.1261, "hfopenllm_v2/GPQA": 0.3255, "hfopenllm_v2/MUSR": 0.4351, "hfopenllm_v2/MMLU-PRO": 0.3662 } }, { "id": "swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA", "name": "LLaMAntino-3-ANITA-8B-Inst-DPO-ITA", "developer": "swap-uniba", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4815, "hfopenllm_v2/BBH": 0.4936, "hfopenllm_v2/MATH Level 5": 0.0483, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4387, "hfopenllm_v2/MMLU-PRO": 0.3723 } }, { "id": "Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo", "name": "Phi-3-mini-4K-instruct-cpo-simpo", "developer": "Syed-Hasan-8503", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5714, "hfopenllm_v2/BBH": 0.5682, "hfopenllm_v2/MATH Level 5": 0.1571, "hfopenllm_v2/GPQA": 0.3305, "hfopenllm_v2/MUSR": 0.3964, "hfopenllm_v2/MMLU-PRO": 0.3861 } }, { "id": "synergetic/FrankenQwen2.5-14B", "name": "FrankenQwen2.5-14B", "developer": "synergetic", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1869, "hfopenllm_v2/BBH": 0.6048, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3843, "hfopenllm_v2/MMLU-PRO": 0.4382 } }, { "id": "T145/KRONOS-8B-V1-P1", "name": "KRONOS-8B-V1-P1", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.785, "hfopenllm_v2/BBH": 0.5085, "hfopenllm_v2/MATH Level 5": 0.1979, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.3881, "hfopenllm_v2/MMLU-PRO": 0.376 } }, { "id": "T145/KRONOS-8B-V1-P2", "name": "KRONOS-8B-V1-P2", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6724, "hfopenllm_v2/BBH": 0.4772, "hfopenllm_v2/MATH Level 5": 0.1601, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.3568, "hfopenllm_v2/MMLU-PRO": 0.3453 } }, { "id": "T145/KRONOS-8B-V1-P3", "name": "KRONOS-8B-V1-P3", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7137, "hfopenllm_v2/BBH": 0.5128, "hfopenllm_v2/MATH Level 5": 0.1926, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3616, "hfopenllm_v2/MMLU-PRO": 0.3405 } }, { "id": "T145/KRONOS-8B-V2", "name": "KRONOS-8B-V2", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.518, "hfopenllm_v2/BBH": 0.5133, "hfopenllm_v2/MATH Level 5": 0.2266, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.3829, "hfopenllm_v2/MMLU-PRO": 0.3738 } }, { "id": "T145/KRONOS-8B-V3", "name": "KRONOS-8B-V3", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5475, "hfopenllm_v2/BBH": 0.5119, "hfopenllm_v2/MATH Level 5": 0.2598, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.3922, "hfopenllm_v2/MMLU-PRO": 0.3738 } }, { "id": "T145/KRONOS-8B-V4", "name": "KRONOS-8B-V4", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7889, "hfopenllm_v2/BBH": 0.5092, "hfopenllm_v2/MATH Level 5": 0.1949, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.383, "hfopenllm_v2/MMLU-PRO": 0.3786 } }, { "id": "T145/KRONOS-8B-V5", "name": "KRONOS-8B-V5", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5405, "hfopenllm_v2/BBH": 0.5089, "hfopenllm_v2/MATH Level 5": 0.2689, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4055, "hfopenllm_v2/MMLU-PRO": 0.3759 } }, { "id": "T145/KRONOS-8B-V6", "name": "KRONOS-8B-V6", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7022, "hfopenllm_v2/BBH": 0.5034, "hfopenllm_v2/MATH Level 5": 0.2598, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.4121, "hfopenllm_v2/MMLU-PRO": 0.3501 } }, { "id": "T145/KRONOS-8B-V7", "name": "KRONOS-8B-V7", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3529, "hfopenllm_v2/BBH": 0.4526, "hfopenllm_v2/MATH Level 5": 0.111, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3671, "hfopenllm_v2/MMLU-PRO": 0.2697 } }, { "id": "T145/KRONOS-8B-V8", "name": "KRONOS-8B-V8", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.777, "hfopenllm_v2/BBH": 0.5094, "hfopenllm_v2/MATH Level 5": 0.2047, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.3869, "hfopenllm_v2/MMLU-PRO": 0.3782 } }, { "id": "T145/KRONOS-8B-V9", "name": "KRONOS-8B-V9", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7856, "hfopenllm_v2/BBH": 0.5099, "hfopenllm_v2/MATH Level 5": 0.1986, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.3868, "hfopenllm_v2/MMLU-PRO": 0.3752 } }, { "id": "T145/Llama-3.1-8B-Instruct-Zeus", "name": "Llama-3.1-8B-Instruct-Zeus", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7941, "hfopenllm_v2/BBH": 0.5174, "hfopenllm_v2/MATH Level 5": 0.1956, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.3976, "hfopenllm_v2/MMLU-PRO": 0.3893 } }, { "id": "T145/Llama-3.1-8B-Zeus", "name": "Llama-3.1-8B-Zeus", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3518, "hfopenllm_v2/BBH": 0.3671, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3316, "hfopenllm_v2/MMLU-PRO": 0.1332 } }, { "id": "T145/Meta-Llama-3.1-8B-Instruct-TIES", "name": "Meta-Llama-3.1-8B-Instruct-TIES", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5424, "hfopenllm_v2/BBH": 0.507, "hfopenllm_v2/MATH Level 5": 0.21, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3843, "hfopenllm_v2/MMLU-PRO": 0.378 } }, { "id": "T145/qwen-2.5-3B-merge-test", "name": "qwen-2.5-3B-merge-test", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5751, "hfopenllm_v2/BBH": 0.4842, "hfopenllm_v2/MATH Level 5": 0.3202, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4007, "hfopenllm_v2/MMLU-PRO": 0.329 } }, { "id": "T145/ZEUS-8B-V10", "name": "ZEUS-8B-V10", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7707, "hfopenllm_v2/BBH": 0.527, "hfopenllm_v2/MATH Level 5": 0.2115, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.3898, "hfopenllm_v2/MMLU-PRO": 0.3904 } }, { "id": "T145/ZEUS-8B-V11", "name": "ZEUS-8B-V11", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.81, "hfopenllm_v2/BBH": 0.5162, "hfopenllm_v2/MATH Level 5": 0.1964, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.3807, "hfopenllm_v2/MMLU-PRO": 0.3884 } }, { "id": "T145/ZEUS-8B-V12", "name": "ZEUS-8B-V12", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7816, "hfopenllm_v2/BBH": 0.5254, "hfopenllm_v2/MATH Level 5": 0.2115, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.3858, "hfopenllm_v2/MMLU-PRO": 0.3912 } }, { "id": "T145/ZEUS-8B-V13", "name": "ZEUS-8B-V13", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7904, "hfopenllm_v2/BBH": 0.5277, "hfopenllm_v2/MATH Level 5": 0.2137, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.3845, "hfopenllm_v2/MMLU-PRO": 0.3911 } }, { "id": "T145/ZEUS-8B-V13-abliterated", "name": "ZEUS-8B-V13-abliterated", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7878, "hfopenllm_v2/BBH": 0.5198, "hfopenllm_v2/MATH Level 5": 0.179, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.3871, "hfopenllm_v2/MMLU-PRO": 0.3872 } }, { "id": "T145/ZEUS-8B-V14", "name": "ZEUS-8B-V14", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7709, "hfopenllm_v2/BBH": 0.5275, "hfopenllm_v2/MATH Level 5": 0.213, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.3844, "hfopenllm_v2/MMLU-PRO": 0.3914 } }, { "id": "T145/ZEUS-8B-V15", "name": "ZEUS-8B-V15", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7013, "hfopenllm_v2/BBH": 0.5538, "hfopenllm_v2/MATH Level 5": 0.2304, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.402, "hfopenllm_v2/MMLU-PRO": 0.4059 } }, { "id": "T145/ZEUS-8B-V16", "name": "ZEUS-8B-V16", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7925, "hfopenllm_v2/BBH": 0.5266, "hfopenllm_v2/MATH Level 5": 0.2205, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.3951, "hfopenllm_v2/MMLU-PRO": 0.3926 } }, { "id": "T145/ZEUS-8B-V17", "name": "ZEUS-8B-V17", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7941, "hfopenllm_v2/BBH": 0.5251, "hfopenllm_v2/MATH Level 5": 0.2243, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.4016, "hfopenllm_v2/MMLU-PRO": 0.3935 } }, { "id": "T145/ZEUS-8B-V17-abliterated", "name": "ZEUS-8B-V17-abliterated", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7576, "hfopenllm_v2/BBH": 0.52, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4269, "hfopenllm_v2/MMLU-PRO": 0.3622 } }, { "id": "T145/ZEUS-8B-V17-abliterated-V2", "name": "ZEUS-8B-V17-abliterated-V2", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6532, "hfopenllm_v2/BBH": 0.4928, "hfopenllm_v2/MATH Level 5": 0.1118, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3407, "hfopenllm_v2/MMLU-PRO": 0.3402 } }, { "id": "T145/ZEUS-8B-V17-abliterated-V4", "name": "ZEUS-8B-V17-abliterated-V4", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7228, "hfopenllm_v2/BBH": 0.5169, "hfopenllm_v2/MATH Level 5": 0.0937, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4187, "hfopenllm_v2/MMLU-PRO": 0.3774 } }, { "id": "T145/ZEUS-8B-V18", "name": "ZEUS-8B-V18", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7834, "hfopenllm_v2/BBH": 0.527, "hfopenllm_v2/MATH Level 5": 0.2183, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4043, "hfopenllm_v2/MMLU-PRO": 0.3942 } }, { "id": "T145/ZEUS-8B-V19", "name": "ZEUS-8B-V19", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7883, "hfopenllm_v2/BBH": 0.5276, "hfopenllm_v2/MATH Level 5": 0.2205, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.4043, "hfopenllm_v2/MMLU-PRO": 0.3934 } }, { "id": "T145/ZEUS-8B-V2", "name": "ZEUS-8B-V2", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8029, "hfopenllm_v2/BBH": 0.5194, "hfopenllm_v2/MATH Level 5": 0.216, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.391, "hfopenllm_v2/MMLU-PRO": 0.3896 } }, { "id": "T145/ZEUS-8B-V2-abliterated", "name": "ZEUS-8B-V2-abliterated", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7895, "hfopenllm_v2/BBH": 0.5129, "hfopenllm_v2/MATH Level 5": 0.2115, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.3911, "hfopenllm_v2/MMLU-PRO": 0.3825 } }, { "id": "T145/ZEUS-8B-V2-ORPO", "name": "ZEUS-8B-V2-ORPO", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7187, "hfopenllm_v2/BBH": 0.5075, "hfopenllm_v2/MATH Level 5": 0.1828, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.3935, "hfopenllm_v2/MMLU-PRO": 0.3678 } }, { "id": "T145/ZEUS-8B-V20", "name": "ZEUS-8B-V20", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7956, "hfopenllm_v2/BBH": 0.5244, "hfopenllm_v2/MATH Level 5": 0.219, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4043, "hfopenllm_v2/MMLU-PRO": 0.393 } }, { "id": "T145/ZEUS-8B-V21", "name": "ZEUS-8B-V21", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3785, "hfopenllm_v2/BBH": 0.3398, "hfopenllm_v2/MATH Level 5": 0.1594, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3262, "hfopenllm_v2/MMLU-PRO": 0.1714 } }, { "id": "T145/ZEUS-8B-V22", "name": "ZEUS-8B-V22", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7995, "hfopenllm_v2/BBH": 0.5245, "hfopenllm_v2/MATH Level 5": 0.2228, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.399, "hfopenllm_v2/MMLU-PRO": 0.3938 } }, { "id": "T145/ZEUS-8B-V23", "name": "ZEUS-8B-V23", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7621, "hfopenllm_v2/BBH": 0.5195, "hfopenllm_v2/MATH Level 5": 0.182, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.3922, "hfopenllm_v2/MMLU-PRO": 0.3666 } }, { "id": "T145/ZEUS-8B-V24", "name": "ZEUS-8B-V24", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6, "hfopenllm_v2/BBH": 0.4778, "hfopenllm_v2/MATH Level 5": 0.1458, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3729, "hfopenllm_v2/MMLU-PRO": 0.3285 } }, { "id": "T145/ZEUS-8B-V25", "name": "ZEUS-8B-V25", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.332, "hfopenllm_v2/BBH": 0.4547, "hfopenllm_v2/MATH Level 5": 0.2039, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3488, "hfopenllm_v2/MMLU-PRO": 0.2885 } }, { "id": "T145/ZEUS-8B-V26", "name": "ZEUS-8B-V26", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6708, "hfopenllm_v2/BBH": 0.5232, "hfopenllm_v2/MATH Level 5": 0.1246, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4016, "hfopenllm_v2/MMLU-PRO": 0.3907 } }, { "id": "T145/ZEUS-8B-V27", "name": "ZEUS-8B-V27", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6544, "hfopenllm_v2/BBH": 0.523, "hfopenllm_v2/MATH Level 5": 0.1344, "hfopenllm_v2/GPQA": 0.3079, "hfopenllm_v2/MUSR": 0.3977, "hfopenllm_v2/MMLU-PRO": 0.3902 } }, { "id": "T145/ZEUS-8B-V28", "name": "ZEUS-8B-V28", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6353, "hfopenllm_v2/BBH": 0.5254, "hfopenllm_v2/MATH Level 5": 0.1269, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.3896, "hfopenllm_v2/MMLU-PRO": 0.3902 } }, { "id": "T145/ZEUS-8B-V29", "name": "ZEUS-8B-V29", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7418, "hfopenllm_v2/BBH": 0.5253, "hfopenllm_v2/MATH Level 5": 0.1601, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.4003, "hfopenllm_v2/MMLU-PRO": 0.392 } }, { "id": "T145/ZEUS-8B-V2L1", "name": "ZEUS-8B-V2L1", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3192, "hfopenllm_v2/BBH": 0.5013, "hfopenllm_v2/MATH Level 5": 0.1239, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.3882, "hfopenllm_v2/MMLU-PRO": 0.3638 } }, { "id": "T145/ZEUS-8B-V2L2", "name": "ZEUS-8B-V2L2", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8021, "hfopenllm_v2/BBH": 0.5203, "hfopenllm_v2/MATH Level 5": 0.2017, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.3975, "hfopenllm_v2/MMLU-PRO": 0.3884 } }, { "id": "T145/ZEUS-8B-V3", "name": "ZEUS-8B-V3", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7887, "hfopenllm_v2/BBH": 0.5265, "hfopenllm_v2/MATH Level 5": 0.1677, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.4017, "hfopenllm_v2/MMLU-PRO": 0.3804 } }, { "id": "T145/ZEUS-8B-V30", "name": "ZEUS-8B-V30", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7436, "hfopenllm_v2/BBH": 0.5243, "hfopenllm_v2/MATH Level 5": 0.1586, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.4029, "hfopenllm_v2/MMLU-PRO": 0.3944 } }, { "id": "T145/ZEUS-8B-V4", "name": "ZEUS-8B-V4", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7807, "hfopenllm_v2/BBH": 0.5246, "hfopenllm_v2/MATH Level 5": 0.1926, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4029, "hfopenllm_v2/MMLU-PRO": 0.3788 } }, { "id": "T145/ZEUS-8B-V6", "name": "ZEUS-8B-V6", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7838, "hfopenllm_v2/BBH": 0.524, "hfopenllm_v2/MATH Level 5": 0.2024, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4068, "hfopenllm_v2/MMLU-PRO": 0.3759 } }, { "id": "T145/ZEUS-8B-V7", "name": "ZEUS-8B-V7", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7786, "hfopenllm_v2/BBH": 0.507, "hfopenllm_v2/MATH Level 5": 0.148, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4162, "hfopenllm_v2/MMLU-PRO": 0.3812 } }, { "id": "T145/ZEUS-8B-V8", "name": "ZEUS-8B-V8", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7914, "hfopenllm_v2/BBH": 0.5065, "hfopenllm_v2/MATH Level 5": 0.1329, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.4214, "hfopenllm_v2/MMLU-PRO": 0.3761 } }, { "id": "T145/ZEUS-8B-V9", "name": "ZEUS-8B-V9", "developer": "T145", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5551, "hfopenllm_v2/BBH": 0.5207, "hfopenllm_v2/MATH Level 5": 0.2137, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.3949, "hfopenllm_v2/MMLU-PRO": 0.3901 } }, { "id": "talha2001/Beast-Soul-new", "name": "Beast-Soul-new", "developer": "talha2001", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4854, "hfopenllm_v2/BBH": 0.5227, "hfopenllm_v2/MATH Level 5": 0.074, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4459, "hfopenllm_v2/MMLU-PRO": 0.3102 } }, { "id": "tangledgroup/tangled-llama-pints-1.5b-v0.1-instruct", "name": "tangled-llama-pints-1.5b-v0.1-instruct", "developer": "tangledgroup", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1509, "hfopenllm_v2/BBH": 0.3143, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2399, "hfopenllm_v2/MUSR": 0.3761, "hfopenllm_v2/MMLU-PRO": 0.1109 } }, { "id": "tangledgroup/tangled-llama-pints-1.5b-v0.2-instruct", "name": "tangled-llama-pints-1.5b-v0.2-instruct", "developer": "tangledgroup", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1724, "hfopenllm_v2/BBH": 0.3158, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2416, "hfopenllm_v2/MUSR": 0.3643, "hfopenllm_v2/MMLU-PRO": 0.1117 } }, { "id": "tanliboy/lambda-gemma-2-9b-dpo", "name": "lambda-gemma-2-9b-dpo", "developer": "tanliboy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4501, "hfopenllm_v2/BBH": 0.5472, "hfopenllm_v2/MATH Level 5": 0.0944, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4017, "hfopenllm_v2/MMLU-PRO": 0.3792 } }, { "id": "tanliboy/lambda-qwen2.5-14b-dpo-test", "name": "lambda-qwen2.5-14b-dpo-test", "developer": "tanliboy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8231, "hfopenllm_v2/BBH": 0.6394, "hfopenllm_v2/MATH Level 5": 0.5461, "hfopenllm_v2/GPQA": 0.3624, "hfopenllm_v2/MUSR": 0.426, "hfopenllm_v2/MMLU-PRO": 0.4848 } }, { "id": "tanliboy/lambda-qwen2.5-32b-dpo-test", "name": "lambda-qwen2.5-32b-dpo-test", "developer": "tanliboy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8084, "hfopenllm_v2/BBH": 0.6764, "hfopenllm_v2/MATH Level 5": 0.6103, "hfopenllm_v2/GPQA": 0.3565, "hfopenllm_v2/MUSR": 0.4274, "hfopenllm_v2/MMLU-PRO": 0.5657 } }, { "id": "tannedbum/Ellaria-9B", "name": "Ellaria-9B", "developer": "tannedbum", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7826, "hfopenllm_v2/BBH": 0.5942, "hfopenllm_v2/MATH Level 5": 0.2077, "hfopenllm_v2/GPQA": 0.3331, "hfopenllm_v2/MUSR": 0.4151, "hfopenllm_v2/MMLU-PRO": 0.4205 } }, { "id": "tannedbum/L3-Nymeria-Maid-8B", "name": "L3-Nymeria-Maid-8B", "developer": "tannedbum", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.725, "hfopenllm_v2/BBH": 0.5146, "hfopenllm_v2/MATH Level 5": 0.0937, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.3751, "hfopenllm_v2/MMLU-PRO": 0.3747 } }, { "id": "tannedbum/L3-Nymeria-v2-8B", "name": "L3-Nymeria-v2-8B", "developer": "tannedbum", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7168, "hfopenllm_v2/BBH": 0.5224, "hfopenllm_v2/MATH Level 5": 0.0921, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.3699, "hfopenllm_v2/MMLU-PRO": 0.3753 } }, { "id": "tannedbum/L3-Rhaenys-8B", "name": "L3-Rhaenys-8B", "developer": "tannedbum", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7363, "hfopenllm_v2/BBH": 0.5299, "hfopenllm_v2/MATH Level 5": 0.0876, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.3725, "hfopenllm_v2/MMLU-PRO": 0.3799 } }, { "id": "Tarek07/Progenitor-V1.1-LLaMa-70B", "name": "Progenitor-V1.1-LLaMa-70B", "developer": "Tarek07", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6906, "hfopenllm_v2/BBH": 0.6971, "hfopenllm_v2/MATH Level 5": 0.3573, "hfopenllm_v2/GPQA": 0.4581, "hfopenllm_v2/MUSR": 0.4736, "hfopenllm_v2/MMLU-PRO": 0.5465 } }, { "id": "Tarek07/Thalassic-Alpha-LLaMa-70B", "name": "Thalassic-Alpha-LLaMa-70B", "developer": "Tarek07", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7003, "hfopenllm_v2/BBH": 0.694, "hfopenllm_v2/MATH Level 5": 0.315, "hfopenllm_v2/GPQA": 0.4438, "hfopenllm_v2/MUSR": 0.4802, "hfopenllm_v2/MMLU-PRO": 0.5435 } }, { "id": "TeeZee/DoubleBagel-57B-v1.0", "name": "DoubleBagel-57B-v1.0", "developer": "TeeZee", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2336, "hfopenllm_v2/BBH": 0.3251, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.4315, "hfopenllm_v2/MMLU-PRO": 0.1478 } }, { "id": "teknium/CollectiveCognition-v1.1-Mistral-7B", "name": "CollectiveCognition-v1.1-Mistral-7B", "developer": "teknium", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.279, "hfopenllm_v2/BBH": 0.4493, "hfopenllm_v2/MATH Level 5": 0.031, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.3869, "hfopenllm_v2/MMLU-PRO": 0.2837 } }, { "id": "teknium/OpenHermes-13B", "name": "OpenHermes-13B", "developer": "teknium", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2668, "hfopenllm_v2/BBH": 0.4206, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.4043, "hfopenllm_v2/MMLU-PRO": 0.2389 } }, { "id": "teknium/OpenHermes-2-Mistral-7B", "name": "OpenHermes-2-Mistral-7B", "developer": "teknium", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5286, "hfopenllm_v2/BBH": 0.4948, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.452, "hfopenllm_v2/MMLU-PRO": 0.2931 } }, { "id": "teknium/OpenHermes-2.5-Mistral-7B", "name": "OpenHermes-2.5-Mistral-7B", "developer": "teknium", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5571, "hfopenllm_v2/BBH": 0.487, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4242, "hfopenllm_v2/MMLU-PRO": 0.3054 } }, { "id": "teknium/OpenHermes-7B", "name": "OpenHermes-7B", "developer": "teknium", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1813, "hfopenllm_v2/BBH": 0.362, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.4324, "hfopenllm_v2/MMLU-PRO": 0.1933 } }, { "id": "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0", "name": "Indic-gemma-2b-finetuned-sft-Navarasa-2.0", "developer": "Telugu-LLM-Labs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2103, "hfopenllm_v2/BBH": 0.3241, "hfopenllm_v2/MATH Level 5": 0.0272, "hfopenllm_v2/GPQA": 0.2433, "hfopenllm_v2/MUSR": 0.3899, "hfopenllm_v2/MMLU-PRO": 0.1279 } }, { "id": "Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0", "name": "Indic-gemma-7b-finetuned-sft-Navarasa-2.0", "developer": "Telugu-LLM-Labs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3237, "hfopenllm_v2/BBH": 0.4023, "hfopenllm_v2/MATH Level 5": 0.0257, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.4083, "hfopenllm_v2/MMLU-PRO": 0.235 } }, { "id": "TencentARC/LLaMA-Pro-8B", "name": "LLaMA-Pro-8B", "developer": "TencentARC", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2277, "hfopenllm_v2/BBH": 0.3484, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.4018, "hfopenllm_v2/MMLU-PRO": 0.1811 } }, { "id": "TencentARC/LLaMA-Pro-8B-Instruct", "name": "LLaMA-Pro-8B-Instruct", "developer": "TencentARC", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4486, "hfopenllm_v2/BBH": 0.4224, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.419, "hfopenllm_v2/MMLU-PRO": 0.1946 } }, { "id": "TencentARC/MetaMath-Mistral-Pro", "name": "MetaMath-Mistral-Pro", "developer": "TencentARC", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2119, "hfopenllm_v2/BBH": 0.4413, "hfopenllm_v2/MATH Level 5": 0.0763, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3524, "hfopenllm_v2/MMLU-PRO": 0.2472 } }, { "id": "TencentARC/Mistral_Pro_8B_v0.1", "name": "Mistral_Pro_8B_v0.1", "developer": "TencentARC", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2115, "hfopenllm_v2/BBH": 0.4526, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.4242, "hfopenllm_v2/MMLU-PRO": 0.2765 } }, { "id": "tensopolis/falcon3-10b-tensopolis-v1", "name": "falcon3-10b-tensopolis-v1", "developer": "tensopolis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7817, "hfopenllm_v2/BBH": 0.6182, "hfopenllm_v2/MATH Level 5": 0.2749, "hfopenllm_v2/GPQA": 0.3297, "hfopenllm_v2/MUSR": 0.4375, "hfopenllm_v2/MMLU-PRO": 0.442 } }, { "id": "tensopolis/falcon3-10b-tensopolis-v2", "name": "falcon3-10b-tensopolis-v2", "developer": "tensopolis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7792, "hfopenllm_v2/BBH": 0.6182, "hfopenllm_v2/MATH Level 5": 0.2666, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.4297, "hfopenllm_v2/MMLU-PRO": 0.4424 } }, { "id": "tensopolis/lamarckvergence-14b-tensopolis-v1", "name": "lamarckvergence-14b-tensopolis-v1", "developer": "tensopolis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7604, "hfopenllm_v2/BBH": 0.6561, "hfopenllm_v2/MATH Level 5": 0.5166, "hfopenllm_v2/GPQA": 0.3607, "hfopenllm_v2/MUSR": 0.4475, "hfopenllm_v2/MMLU-PRO": 0.525 } }, { "id": "tensopolis/mistral-small-2501-tensopolis-v1", "name": "mistral-small-2501-tensopolis-v1", "developer": "tensopolis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7762, "hfopenllm_v2/BBH": 0.6475, "hfopenllm_v2/MATH Level 5": 0.4441, "hfopenllm_v2/GPQA": 0.3574, "hfopenllm_v2/MUSR": 0.428, "hfopenllm_v2/MMLU-PRO": 0.4465 } }, { "id": "tensopolis/mistral-small-r1-tensopolis", "name": "mistral-small-r1-tensopolis", "developer": "tensopolis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4622, "hfopenllm_v2/BBH": 0.5436, "hfopenllm_v2/MATH Level 5": 0.2908, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.3738, "hfopenllm_v2/MMLU-PRO": 0.4035 } }, { "id": "tensopolis/phi-4-tensopolis-v1", "name": "phi-4-tensopolis-v1", "developer": "tensopolis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6767, "hfopenllm_v2/BBH": 0.6872, "hfopenllm_v2/MATH Level 5": 0.494, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.4141, "hfopenllm_v2/MMLU-PRO": 0.5384 } }, { "id": "tensopolis/qwen2.5-14b-tensopolis-v1", "name": "qwen2.5-14b-tensopolis-v1", "developer": "tensopolis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.799, "hfopenllm_v2/BBH": 0.6364, "hfopenllm_v2/MATH Level 5": 0.5295, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.4193, "hfopenllm_v2/MMLU-PRO": 0.4911 } }, { "id": "tensopolis/qwen2.5-3b-or1-tensopolis", "name": "qwen2.5-3b-or1-tensopolis", "developer": "tensopolis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.354, "hfopenllm_v2/BBH": 0.4421, "hfopenllm_v2/MATH Level 5": 0.173, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3749, "hfopenllm_v2/MMLU-PRO": 0.3197 } }, { "id": "tensopolis/qwen2.5-7b-tensopolis-v1", "name": "qwen2.5-7b-tensopolis-v1", "developer": "tensopolis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7661, "hfopenllm_v2/BBH": 0.5379, "hfopenllm_v2/MATH Level 5": 0.4562, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4339, "hfopenllm_v2/MMLU-PRO": 0.4269 } }, { "id": "tensopolis/qwen2.5-7b-tensopolis-v2", "name": "qwen2.5-7b-tensopolis-v2", "developer": "tensopolis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7521, "hfopenllm_v2/BBH": 0.5415, "hfopenllm_v2/MATH Level 5": 0.4819, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4246, "hfopenllm_v2/MMLU-PRO": 0.4243 } }, { "id": "tensopolis/virtuoso-lite-tensopolis-v1", "name": "virtuoso-lite-tensopolis-v1", "developer": "tensopolis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8069, "hfopenllm_v2/BBH": 0.6102, "hfopenllm_v2/MATH Level 5": 0.2545, "hfopenllm_v2/GPQA": 0.3448, "hfopenllm_v2/MUSR": 0.4582, "hfopenllm_v2/MMLU-PRO": 0.4435 } }, { "id": "tensopolis/virtuoso-lite-tensopolis-v2", "name": "virtuoso-lite-tensopolis-v2", "developer": "tensopolis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8029, "hfopenllm_v2/BBH": 0.61, "hfopenllm_v2/MATH Level 5": 0.25, "hfopenllm_v2/GPQA": 0.3431, "hfopenllm_v2/MUSR": 0.4595, "hfopenllm_v2/MMLU-PRO": 0.444 } }, { "id": "tensopolis/virtuoso-small-tensopolis-v1", "name": "virtuoso-small-tensopolis-v1", "developer": "tensopolis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7856, "hfopenllm_v2/BBH": 0.6415, "hfopenllm_v2/MATH Level 5": 0.3527, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4326, "hfopenllm_v2/MMLU-PRO": 0.4968 } }, { "id": "tensopolis/virtuoso-small-tensopolis-v2", "name": "virtuoso-small-tensopolis-v2", "developer": "tensopolis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.802, "hfopenllm_v2/BBH": 0.6516, "hfopenllm_v2/MATH Level 5": 0.3875, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4352, "hfopenllm_v2/MMLU-PRO": 0.5154 } }, { "id": "tensopolis/virtuoso-small-v2-tensopolis-v1", "name": "virtuoso-small-v2-tensopolis-v1", "developer": "tensopolis", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8419, "hfopenllm_v2/BBH": 0.6545, "hfopenllm_v2/MATH Level 5": 0.4524, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.4509, "hfopenllm_v2/MMLU-PRO": 0.5175 } }, { "id": "tensoropera/Fox-1-1.6B", "name": "Fox-1-1.6B", "developer": "tensoropera", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2766, "hfopenllm_v2/BBH": 0.3307, "hfopenllm_v2/MATH Level 5": 0.0174, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.355, "hfopenllm_v2/MMLU-PRO": 0.1371 } }, { "id": "tenyx/Llama3-TenyxChat-70B", "name": "Llama3-TenyxChat-70B", "developer": "tenyx", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8087, "hfopenllm_v2/BBH": 0.6511, "hfopenllm_v2/MATH Level 5": 0.2356, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.426, "hfopenllm_v2/MMLU-PRO": 0.521 } }, { "id": "TheDrummer/Cydonia-22B-v1.2", "name": "Cydonia-22B-v1.2", "developer": "TheDrummer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5635, "hfopenllm_v2/BBH": 0.5809, "hfopenllm_v2/MATH Level 5": 0.2032, "hfopenllm_v2/GPQA": 0.3305, "hfopenllm_v2/MUSR": 0.4022, "hfopenllm_v2/MMLU-PRO": 0.4141 } }, { "id": "TheDrummer/Gemmasutra-9B-v1", "name": "Gemmasutra-9B-v1", "developer": "TheDrummer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2416, "hfopenllm_v2/BBH": 0.5887, "hfopenllm_v2/MATH Level 5": 0.0831, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4846, "hfopenllm_v2/MMLU-PRO": 0.4045 } }, { "id": "TheDrummer/Gemmasutra-Mini-2B-v1", "name": "Gemmasutra-Mini-2B-v1", "developer": "TheDrummer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2549, "hfopenllm_v2/BBH": 0.3575, "hfopenllm_v2/MATH Level 5": 0.0378, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.349, "hfopenllm_v2/MMLU-PRO": 0.2055 } }, { "id": "TheDrummer/Llama-3SOME-8B-v2", "name": "Llama-3SOME-8B-v2", "developer": "TheDrummer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4508, "hfopenllm_v2/BBH": 0.5203, "hfopenllm_v2/MATH Level 5": 0.0937, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.3833, "hfopenllm_v2/MMLU-PRO": 0.3753 } }, { "id": "TheDrummer/Ministrations-8B-v1", "name": "Ministrations-8B-v1", "developer": "TheDrummer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2822, "hfopenllm_v2/BBH": 0.4877, "hfopenllm_v2/MATH Level 5": 0.1843, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4449, "hfopenllm_v2/MMLU-PRO": 0.3644 } }, { "id": "TheDrummer/Rocinante-12B-v1", "name": "Rocinante-12B-v1", "developer": "TheDrummer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6076, "hfopenllm_v2/BBH": 0.5065, "hfopenllm_v2/MATH Level 5": 0.1269, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4017, "hfopenllm_v2/MMLU-PRO": 0.3477 } }, { "id": "TheDrummer/Tiger-Gemma-9B-v1", "name": "Tiger-Gemma-9B-v1", "developer": "TheDrummer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7282, "hfopenllm_v2/BBH": 0.5704, "hfopenllm_v2/MATH Level 5": 0.1835, "hfopenllm_v2/GPQA": 0.3389, "hfopenllm_v2/MUSR": 0.4162, "hfopenllm_v2/MMLU-PRO": 0.4118 } }, { "id": "TheDrummer/Tiger-Gemma-9B-v2", "name": "Tiger-Gemma-9B-v2", "developer": "TheDrummer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6986, "hfopenllm_v2/BBH": 0.5617, "hfopenllm_v2/MATH Level 5": 0.182, "hfopenllm_v2/GPQA": 0.3398, "hfopenllm_v2/MUSR": 0.4084, "hfopenllm_v2/MMLU-PRO": 0.4112 } }, { "id": "TheDrummer/Tiger-Gemma-9B-v3", "name": "Tiger-Gemma-9B-v3", "developer": "TheDrummer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6821, "hfopenllm_v2/BBH": 0.5812, "hfopenllm_v2/MATH Level 5": 0.1624, "hfopenllm_v2/GPQA": 0.3389, "hfopenllm_v2/MUSR": 0.4004, "hfopenllm_v2/MMLU-PRO": 0.4059 } }, { "id": "TheDrunkenSnail/Daughter-of-Rhodia-12B", "name": "Daughter-of-Rhodia-12B", "developer": "TheDrunkenSnail", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6904, "hfopenllm_v2/BBH": 0.5179, "hfopenllm_v2/MATH Level 5": 0.1224, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4348, "hfopenllm_v2/MMLU-PRO": 0.3641 } }, { "id": "TheDrunkenSnail/Mother-of-Rhodia-12B", "name": "Mother-of-Rhodia-12B", "developer": "TheDrunkenSnail", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6505, "hfopenllm_v2/BBH": 0.4948, "hfopenllm_v2/MATH Level 5": 0.1224, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4124, "hfopenllm_v2/MMLU-PRO": 0.3551 } }, { "id": "TheDrunkenSnail/Son-of-Rhodia", "name": "Son-of-Rhodia", "developer": "TheDrunkenSnail", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7046, "hfopenllm_v2/BBH": 0.5097, "hfopenllm_v2/MATH Level 5": 0.1314, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4203, "hfopenllm_v2/MMLU-PRO": 0.3608 } }, { "id": "TheHierophant/Underground-Cognitive-V0.3-test", "name": "Underground-Cognitive-V0.3-test", "developer": "TheHierophant", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4808, "hfopenllm_v2/BBH": 0.529, "hfopenllm_v2/MATH Level 5": 0.0589, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4351, "hfopenllm_v2/MMLU-PRO": 0.3318 } }, { "id": "theo77186/Qwen2.5-Coder-7B-Instruct-20241106", "name": "Qwen2.5-Coder-7B-Instruct-20241106", "developer": "theo77186", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6101, "hfopenllm_v2/BBH": 0.5008, "hfopenllm_v2/MATH Level 5": 0.3882, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.4073, "hfopenllm_v2/MMLU-PRO": 0.3353 } }, { "id": "theprint/Boptruth-Agatha-7B", "name": "Boptruth-Agatha-7B", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3124, "hfopenllm_v2/BBH": 0.4984, "hfopenllm_v2/MATH Level 5": 0.0551, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4277, "hfopenllm_v2/MMLU-PRO": 0.2861 } }, { "id": "theprint/CleverBoi-7B-v2", "name": "CleverBoi-7B-v2", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.217, "hfopenllm_v2/BBH": 0.4532, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4695, "hfopenllm_v2/MMLU-PRO": 0.2709 } }, { "id": "theprint/CleverBoi-7B-v3", "name": "CleverBoi-7B-v3", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2382, "hfopenllm_v2/BBH": 0.4414, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.4072, "hfopenllm_v2/MMLU-PRO": 0.2868 } }, { "id": "theprint/CleverBoi-Llama-3.1-8B-Instruct", "name": "CleverBoi-Llama-3.1-8B-Instruct", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1682, "hfopenllm_v2/BBH": 0.456, "hfopenllm_v2/MATH Level 5": 0.0491, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4014, "hfopenllm_v2/MMLU-PRO": 0.3075 } }, { "id": "theprint/CleverBoi-Llama-3.1-8B-v2", "name": "CleverBoi-Llama-3.1-8B-v2", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1961, "hfopenllm_v2/BBH": 0.4668, "hfopenllm_v2/MATH Level 5": 0.0529, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.3735, "hfopenllm_v2/MMLU-PRO": 0.3188 } }, { "id": "theprint/CleverBoi-Nemo-12B-v2", "name": "CleverBoi-Nemo-12B-v2", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2046, "hfopenllm_v2/BBH": 0.5241, "hfopenllm_v2/MATH Level 5": 0.1035, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.4187, "hfopenllm_v2/MMLU-PRO": 0.3228 } }, { "id": "theprint/Code-Llama-Bagel-8B", "name": "Code-Llama-Bagel-8B", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.253, "hfopenllm_v2/BBH": 0.4697, "hfopenllm_v2/MATH Level 5": 0.0612, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.368, "hfopenllm_v2/MMLU-PRO": 0.2822 } }, { "id": "theprint/Conversely-Mistral-7B", "name": "Conversely-Mistral-7B", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2608, "hfopenllm_v2/BBH": 0.4672, "hfopenllm_v2/MATH Level 5": 0.0279, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4189, "hfopenllm_v2/MMLU-PRO": 0.2826 } }, { "id": "theprint/Llama-3.2-3B-VanRossum", "name": "Llama-3.2-3B-VanRossum", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4783, "hfopenllm_v2/BBH": 0.4279, "hfopenllm_v2/MATH Level 5": 0.0974, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3442, "hfopenllm_v2/MMLU-PRO": 0.277 } }, { "id": "theprint/phi-3-mini-4k-python", "name": "phi-3-mini-4k-python", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2409, "hfopenllm_v2/BBH": 0.4938, "hfopenllm_v2/MATH Level 5": 0.105, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.3922, "hfopenllm_v2/MMLU-PRO": 0.3577 } }, { "id": "theprint/ReWiz-7B", "name": "ReWiz-7B", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4048, "hfopenllm_v2/BBH": 0.4564, "hfopenllm_v2/MATH Level 5": 0.0408, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.4612, "hfopenllm_v2/MMLU-PRO": 0.267 } }, { "id": "theprint/ReWiz-Llama-3.1-8B-v2", "name": "ReWiz-Llama-3.1-8B-v2", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2379, "hfopenllm_v2/BBH": 0.4632, "hfopenllm_v2/MATH Level 5": 0.0574, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.3814, "hfopenllm_v2/MMLU-PRO": 0.331 } }, { "id": "theprint/ReWiz-Llama-3.2-3B", "name": "ReWiz-Llama-3.2-3B", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4649, "hfopenllm_v2/BBH": 0.4343, "hfopenllm_v2/MATH Level 5": 0.1095, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3614, "hfopenllm_v2/MMLU-PRO": 0.2887 } }, { "id": "theprint/ReWiz-Nemo-12B-Instruct", "name": "ReWiz-Nemo-12B-Instruct", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1062, "hfopenllm_v2/BBH": 0.5092, "hfopenllm_v2/MATH Level 5": 0.1042, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4096, "hfopenllm_v2/MMLU-PRO": 0.3339 } }, { "id": "theprint/ReWiz-Qwen-2.5-14B", "name": "ReWiz-Qwen-2.5-14B", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2785, "hfopenllm_v2/BBH": 0.6179, "hfopenllm_v2/MATH Level 5": 0.2923, "hfopenllm_v2/GPQA": 0.38, "hfopenllm_v2/MUSR": 0.4539, "hfopenllm_v2/MMLU-PRO": 0.5092 } }, { "id": "theprint/ReWiz-Worldbuilder-7B", "name": "ReWiz-Worldbuilder-7B", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.251, "hfopenllm_v2/BBH": 0.4636, "hfopenllm_v2/MATH Level 5": 0.037, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.4572, "hfopenllm_v2/MMLU-PRO": 0.2971 } }, { "id": "theprint/RuDolph-Hermes-7B", "name": "RuDolph-Hermes-7B", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3604, "hfopenllm_v2/BBH": 0.5053, "hfopenllm_v2/MATH Level 5": 0.0514, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4226, "hfopenllm_v2/MMLU-PRO": 0.3073 } }, { "id": "theprint/WorldBuilder-12B", "name": "WorldBuilder-12B", "developer": "theprint", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1374, "hfopenllm_v2/BBH": 0.501, "hfopenllm_v2/MATH Level 5": 0.0446, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4066, "hfopenllm_v2/MMLU-PRO": 0.3192 } }, { "id": "TheTsar1209/nemo-carpmuscle-v0.1", "name": "nemo-carpmuscle-v0.1", "developer": "TheTsar1209", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2276, "hfopenllm_v2/BBH": 0.5084, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4135, "hfopenllm_v2/MMLU-PRO": 0.3406 } }, { "id": "TheTsar1209/qwen-carpmuscle-r-v0.3", "name": "qwen-carpmuscle-r-v0.3", "developer": "TheTsar1209", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4455, "hfopenllm_v2/BBH": 0.6227, "hfopenllm_v2/MATH Level 5": 0.3006, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.4278, "hfopenllm_v2/MMLU-PRO": 0.5103 } }, { "id": "TheTsar1209/qwen-carpmuscle-v0.1", "name": "qwen-carpmuscle-v0.1", "developer": "TheTsar1209", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5622, "hfopenllm_v2/BBH": 0.6434, "hfopenllm_v2/MATH Level 5": 0.2628, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.4161, "hfopenllm_v2/MMLU-PRO": 0.52 } }, { "id": "TheTsar1209/qwen-carpmuscle-v0.2", "name": "qwen-carpmuscle-v0.2", "developer": "TheTsar1209", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5257, "hfopenllm_v2/BBH": 0.6387, "hfopenllm_v2/MATH Level 5": 0.2832, "hfopenllm_v2/GPQA": 0.3557, "hfopenllm_v2/MUSR": 0.4346, "hfopenllm_v2/MMLU-PRO": 0.5147 } }, { "id": "TheTsar1209/qwen-carpmuscle-v0.3", "name": "qwen-carpmuscle-v0.3", "developer": "TheTsar1209", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4476, "hfopenllm_v2/BBH": 0.6152, "hfopenllm_v2/MATH Level 5": 0.3134, "hfopenllm_v2/GPQA": 0.3565, "hfopenllm_v2/MUSR": 0.4132, "hfopenllm_v2/MMLU-PRO": 0.5062 } }, { "id": "TheTsar1209/qwen-carpmuscle-v0.4", "name": "qwen-carpmuscle-v0.4", "developer": "TheTsar1209", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7202, "hfopenllm_v2/BBH": 0.6454, "hfopenllm_v2/MATH Level 5": 0.2772, "hfopenllm_v2/GPQA": 0.3523, "hfopenllm_v2/MUSR": 0.4516, "hfopenllm_v2/MMLU-PRO": 0.5144 } }, { "id": "TheTsar1209/qwen-carpmuscle-v0.4.1", "name": "qwen-carpmuscle-v0.4.1", "developer": "TheTsar1209", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.736, "hfopenllm_v2/BBH": 0.6507, "hfopenllm_v2/MATH Level 5": 0.2779, "hfopenllm_v2/GPQA": 0.3456, "hfopenllm_v2/MUSR": 0.4489, "hfopenllm_v2/MMLU-PRO": 0.5191 } }, { "id": "thinkcoder/llama3-8b-instruct-lora-8-sft", "name": "llama3-8b-instruct-lora-8-sft", "developer": "thinkcoder", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.648, "hfopenllm_v2/BBH": 0.4865, "hfopenllm_v2/MATH Level 5": 0.102, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3235, "hfopenllm_v2/MMLU-PRO": 0.3476 } }, { "id": "thirdeyeai/elevate360m", "name": "elevate360m", "developer": "thirdeyeai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0445, "hfopenllm_v2/BBH": 0.2963, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2408, "hfopenllm_v2/MUSR": 0.3462, "hfopenllm_v2/MMLU-PRO": 0.1077 } }, { "id": "thomas-yanxin/XinYuan-Qwen2-1_5B", "name": "XinYuan-Qwen2-1_5B", "developer": "thomas-yanxin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2986, "hfopenllm_v2/BBH": 0.3635, "hfopenllm_v2/MATH Level 5": 0.0672, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3634, "hfopenllm_v2/MMLU-PRO": 0.2357 } }, { "id": "thomas-yanxin/XinYuan-Qwen2-7B", "name": "XinYuan-Qwen2-7B", "developer": "thomas-yanxin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4438, "hfopenllm_v2/BBH": 0.4937, "hfopenllm_v2/MATH Level 5": 0.1458, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4058, "hfopenllm_v2/MMLU-PRO": 0.3925 } }, { "id": "thomas-yanxin/XinYuan-Qwen2-7B-0917", "name": "XinYuan-Qwen2-7B-0917", "developer": "thomas-yanxin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3719, "hfopenllm_v2/BBH": 0.5169, "hfopenllm_v2/MATH Level 5": 0.1979, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4401, "hfopenllm_v2/MMLU-PRO": 0.4245 } }, { "id": "thomas-yanxin/XinYuan-Qwen2.5-7B-0917", "name": "XinYuan-Qwen2.5-7B-0917", "developer": "thomas-yanxin", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3577, "hfopenllm_v2/BBH": 0.5184, "hfopenllm_v2/MATH Level 5": 0.1934, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3676, "hfopenllm_v2/MMLU-PRO": 0.3882 } }, { "id": "THUDM/glm-4-9b", "name": "glm-4-9b", "developer": "THUDM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1426, "hfopenllm_v2/BBH": 0.5528, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.3163, "hfopenllm_v2/MUSR": 0.4386, "hfopenllm_v2/MMLU-PRO": 0.4145 } }, { "id": "THUDM/glm-4-9b-chat", "name": "glm-4-9b-chat", "developer": "THUDM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0, "hfopenllm_v2/BBH": 0.4736, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.3994, "hfopenllm_v2/MMLU-PRO": 0.3167 } }, { "id": "THUDM/glm-4-9b-chat-1m", "name": "glm-4-9b-chat-1m", "developer": "THUDM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0, "hfopenllm_v2/BBH": 0.418, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.3795, "hfopenllm_v2/MMLU-PRO": 0.3163 } }, { "id": "THUDM/glm-4-9b-chat-1m-hf", "name": "glm-4-9b-chat-1m-hf", "developer": "THUDM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5341, "hfopenllm_v2/BBH": 0.3901, "hfopenllm_v2/MATH Level 5": 0.0483, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.3689, "hfopenllm_v2/MMLU-PRO": 0.1814 } }, { "id": "THUDM/glm-4-9b-chat-hf", "name": "glm-4-9b-chat-hf", "developer": "THUDM", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6513, "hfopenllm_v2/BBH": 0.4432, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.3593, "hfopenllm_v2/MMLU-PRO": 0.2774 } }, { "id": "tianyil1/MistralForCausalLM_Cal_DPO", "name": "MistralForCausalLM_Cal_DPO", "developer": "tianyil1", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5328, "hfopenllm_v2/BBH": 0.4381, "hfopenllm_v2/MATH Level 5": 0.0287, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.3977, "hfopenllm_v2/MMLU-PRO": 0.2763 } }, { "id": "TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule", "name": "AceCoder-Qwen2.5-7B-Ins-Rule", "developer": "TIGER-Lab", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7424, "hfopenllm_v2/BBH": 0.5404, "hfopenllm_v2/MATH Level 5": 0.4992, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.398, "hfopenllm_v2/MMLU-PRO": 0.4322 } }, { "id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule", "name": "AceCoder-Qwen2.5-Coder-7B-Base-Rule", "developer": "TIGER-Lab", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4408, "hfopenllm_v2/BBH": 0.4902, "hfopenllm_v2/MATH Level 5": 0.2017, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3449, "hfopenllm_v2/MMLU-PRO": 0.3745 } }, { "id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule", "name": "AceCoder-Qwen2.5-Coder-7B-Ins-Rule", "developer": "TIGER-Lab", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6222, "hfopenllm_v2/BBH": 0.5089, "hfopenllm_v2/MATH Level 5": 0.3603, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4046, "hfopenllm_v2/MMLU-PRO": 0.3428 } }, { "id": "TIGER-Lab/AceCodeRM-7B", "name": "AceCodeRM-7B", "developer": "TIGER-Lab", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5855, "hfopenllm_v2/BBH": 0.4773, "hfopenllm_v2/MATH Level 5": 0.3467, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4192, "hfopenllm_v2/MMLU-PRO": 0.3361 } }, { "id": "TIGER-Lab/MAmmoTH2-7B-Plus", "name": "MAmmoTH2-7B-Plus", "developer": "TIGER-Lab", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5575, "hfopenllm_v2/BBH": 0.4235, "hfopenllm_v2/MATH Level 5": 0.1858, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.4124, "hfopenllm_v2/MMLU-PRO": 0.3017 } }, { "id": "TIGER-Lab/Qwen2.5-Math-7B-CFT", "name": "Qwen2.5-Math-7B-CFT", "developer": "TIGER-Lab", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2777, "hfopenllm_v2/BBH": 0.4637, "hfopenllm_v2/MATH Level 5": 0.5574, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.3887, "hfopenllm_v2/MMLU-PRO": 0.2945 } }, { "id": "tii-uae/falcon3-10b-instruct-fc", "name": "Falcon3-10B-Instruct (FC)", "developer": "tii-uae", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 82.0, "bfcl/bfcl.overall.overall_accuracy": 27.01, "bfcl/bfcl.overall.total_cost_usd": 52.59, "bfcl/bfcl.overall.latency_mean_s": 69.27, "bfcl/bfcl.overall.latency_std_s": 92.22, "bfcl/bfcl.overall.latency_p95_s": 190.96, "bfcl/bfcl.non_live.ast_accuracy": 85.0, "bfcl/bfcl.non_live.simple_ast_accuracy": 70.5, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 88.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 87.5, "bfcl/bfcl.live.live_accuracy": 75.43, "bfcl/bfcl.live.live_simple_ast_accuracy": 77.13, "bfcl/bfcl.live.live_multiple_ast_accuracy": 76.16, "bfcl/bfcl.live.live_parallel_ast_accuracy": 50.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 41.67, "bfcl/bfcl.multi_turn.accuracy": 6.5, "bfcl/bfcl.multi_turn.base_accuracy": 6.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 9.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 5.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 5.0, "bfcl/bfcl.web_search.accuracy": 1.5, "bfcl/bfcl.web_search.base_accuracy": 2.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 1.0, "bfcl/bfcl.memory.accuracy": 27.53, "bfcl/bfcl.memory.kv_accuracy": 12.26, "bfcl/bfcl.memory.vector_accuracy": 19.35, "bfcl/bfcl.memory.recursive_summarization_accuracy": 50.97, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 32.09 } }, { "id": "tii-uae/falcon3-1b-instruct-fc", "name": "Falcon3-1B-Instruct (FC)", "developer": "tii-uae", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 106.0, "bfcl/bfcl.overall.overall_accuracy": 11.08, "bfcl/bfcl.overall.total_cost_usd": 1.72, "bfcl/bfcl.overall.latency_mean_s": 5.23, "bfcl/bfcl.overall.latency_std_s": 14.34, "bfcl/bfcl.overall.latency_p95_s": 11.48, "bfcl/bfcl.non_live.ast_accuracy": 9.02, "bfcl/bfcl.non_live.simple_ast_accuracy": 2.58, "bfcl/bfcl.non_live.multiple_ast_accuracy": 6.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 18.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 9.5, "bfcl/bfcl.live.live_accuracy": 2.89, "bfcl/bfcl.live.live_simple_ast_accuracy": 4.26, "bfcl/bfcl.live.live_multiple_ast_accuracy": 2.37, "bfcl/bfcl.live.live_parallel_ast_accuracy": 0.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 12.5, "bfcl/bfcl.multi_turn.accuracy": 0.0, "bfcl/bfcl.multi_turn.base_accuracy": 0.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 0.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 0.0, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 5.81, "bfcl/bfcl.memory.kv_accuracy": 5.16, "bfcl/bfcl.memory.vector_accuracy": 7.74, "bfcl/bfcl.memory.recursive_summarization_accuracy": 4.52, "bfcl/bfcl.relevance.relevance_detection_accuracy": 0.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 87.3 } }, { "id": "tii-uae/falcon3-3b-instruct-fc", "name": "Falcon3-3B-Instruct (FC)", "developer": "tii-uae", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 104.0, "bfcl/bfcl.overall.overall_accuracy": 16.25, "bfcl/bfcl.overall.total_cost_usd": 36.7, "bfcl/bfcl.overall.latency_mean_s": 38.52, "bfcl/bfcl.overall.latency_std_s": 107.47, "bfcl/bfcl.overall.latency_p95_s": 103.62, "bfcl/bfcl.non_live.ast_accuracy": 54.62, "bfcl/bfcl.non_live.simple_ast_accuracy": 56.5, "bfcl/bfcl.non_live.multiple_ast_accuracy": 69.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 67.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 25.5, "bfcl/bfcl.live.live_accuracy": 54.48, "bfcl/bfcl.live.live_simple_ast_accuracy": 57.36, "bfcl/bfcl.live.live_multiple_ast_accuracy": 54.7, "bfcl/bfcl.live.live_parallel_ast_accuracy": 25.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 33.33, "bfcl/bfcl.multi_turn.accuracy": 1.0, "bfcl/bfcl.multi_turn.base_accuracy": 1.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 0.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 1.5, "bfcl/bfcl.web_search.accuracy": 1.0, "bfcl/bfcl.web_search.base_accuracy": 1.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 1.0, "bfcl/bfcl.memory.accuracy": 7.74, "bfcl/bfcl.memory.kv_accuracy": 6.45, "bfcl/bfcl.memory.vector_accuracy": 8.39, "bfcl/bfcl.memory.recursive_summarization_accuracy": 8.39, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 32.92 } }, { "id": "tii-uae/falcon3-7b-instruct-fc", "name": "Falcon3-7B-Instruct (FC)", "developer": "tii-uae", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 91.0, "bfcl/bfcl.overall.overall_accuracy": 24.03, "bfcl/bfcl.overall.total_cost_usd": 73.61, "bfcl/bfcl.overall.latency_mean_s": 93.11, "bfcl/bfcl.overall.latency_std_s": 117.8, "bfcl/bfcl.overall.latency_p95_s": 315.7, "bfcl/bfcl.non_live.ast_accuracy": 82.69, "bfcl/bfcl.non_live.simple_ast_accuracy": 65.75, "bfcl/bfcl.non_live.multiple_ast_accuracy": 89.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 87.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 89.0, "bfcl/bfcl.live.live_accuracy": 68.32, "bfcl/bfcl.live.live_simple_ast_accuracy": 74.81, "bfcl/bfcl.live.live_multiple_ast_accuracy": 66.76, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 62.5, "bfcl/bfcl.multi_turn.accuracy": 5.0, "bfcl/bfcl.multi_turn.base_accuracy": 7.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 4.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 5.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 4.0, "bfcl/bfcl.web_search.accuracy": 0.5, "bfcl/bfcl.web_search.base_accuracy": 1.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 20.65, "bfcl/bfcl.memory.kv_accuracy": 10.32, "bfcl/bfcl.memory.vector_accuracy": 12.9, "bfcl/bfcl.memory.recursive_summarization_accuracy": 38.71, "bfcl/bfcl.relevance.relevance_detection_accuracy": 100.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 31.99 } }, { "id": "tiiuae/falcon-11B", "name": "falcon-11B", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3261, "hfopenllm_v2/BBH": 0.4392, "hfopenllm_v2/MATH Level 5": 0.0279, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3986, "hfopenllm_v2/MMLU-PRO": 0.2389 } }, { "id": "tiiuae/falcon-40b", "name": "Falcon 40B", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.729, "helm_classic/MMLU": 0.509, "helm_classic/BoolQ": 0.819, "helm_classic/NarrativeQA": 0.673, "helm_classic/NaturalQuestions (open-book)": 0.675, "helm_classic/QuAC": 0.307, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.353, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.959, "helm_classic/CivilComments": 0.552, "helm_classic/RAFT": 0.661, "helm_lite/Mean win rate": 0.217, "helm_lite/NarrativeQA": 0.671, "helm_lite/NaturalQuestions (closed-book)": 0.392, "helm_lite/OpenbookQA": 0.662, "helm_lite/MMLU": 0.507, "helm_lite/MATH": 0.128, "helm_lite/GSM8K": 0.267, "helm_lite/LegalBench": 0.442, "helm_lite/MedQA": 0.419, "helm_lite/WMT 2014": 0.162, "hfopenllm_v2/IFEval": 0.2496, "hfopenllm_v2/BBH": 0.4019, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3631, "hfopenllm_v2/MMLU-PRO": 0.2505 } }, { "id": "tiiuae/falcon-40b-instruct", "name": "falcon-40b-instruct", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2454, "hfopenllm_v2/BBH": 0.4054, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3762, "hfopenllm_v2/MMLU-PRO": 0.2261 } }, { "id": "tiiuae/falcon-7b", "name": "Falcon 7B", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.378, "helm_classic/MMLU": 0.286, "helm_classic/BoolQ": 0.753, "helm_classic/NarrativeQA": 0.621, "helm_classic/NaturalQuestions (open-book)": 0.579, "helm_classic/QuAC": 0.332, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.234, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.836, "helm_classic/CivilComments": 0.514, "helm_classic/RAFT": 0.602, "helm_lite/Mean win rate": 0.064, "helm_lite/NarrativeQA": 0.621, "helm_lite/NaturalQuestions (closed-book)": 0.285, "helm_lite/OpenbookQA": 0.26, "helm_lite/MMLU": 0.288, "helm_lite/MATH": 0.044, "helm_lite/GSM8K": 0.055, "helm_lite/LegalBench": 0.346, "helm_lite/MedQA": 0.254, "helm_lite/WMT 2014": 0.094, "hfopenllm_v2/IFEval": 0.1821, "hfopenllm_v2/BBH": 0.3285, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.245, "hfopenllm_v2/MUSR": 0.3778, "hfopenllm_v2/MMLU-PRO": 0.1125 } }, { "id": "tiiuae/falcon-7b-instruct", "name": "falcon-7b-instruct", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1969, "hfopenllm_v2/BBH": 0.3203, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2475, "hfopenllm_v2/MUSR": 0.3634, "hfopenllm_v2/MMLU-PRO": 0.1155 } }, { "id": "tiiuae/Falcon-Instruct-40B", "name": "Falcon-Instruct 40B", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.727, "helm_classic/MMLU": 0.497, "helm_classic/BoolQ": 0.829, "helm_classic/NarrativeQA": 0.625, "helm_classic/NaturalQuestions (open-book)": 0.666, "helm_classic/QuAC": 0.371, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.384, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.959, "helm_classic/CivilComments": 0.603, "helm_classic/RAFT": 0.586 } }, { "id": "tiiuae/Falcon-Instruct-7B", "name": "Falcon-Instruct 7B", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.244, "helm_classic/MMLU": 0.275, "helm_classic/BoolQ": 0.72, "helm_classic/NarrativeQA": 0.476, "helm_classic/NaturalQuestions (open-book)": 0.449, "helm_classic/QuAC": 0.311, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.213, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.852, "helm_classic/CivilComments": 0.511, "helm_classic/RAFT": 0.523 } }, { "id": "tiiuae/falcon-mamba-7b", "name": "falcon-mamba-7b", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3336, "hfopenllm_v2/BBH": 0.4285, "hfopenllm_v2/MATH Level 5": 0.0446, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.421, "hfopenllm_v2/MMLU-PRO": 0.2302 } }, { "id": "tiiuae/Falcon3-10B-Base", "name": "Falcon3-10B-Base", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3648, "hfopenllm_v2/BBH": 0.595, "hfopenllm_v2/MATH Level 5": 0.2492, "hfopenllm_v2/GPQA": 0.3456, "hfopenllm_v2/MUSR": 0.4398, "hfopenllm_v2/MMLU-PRO": 0.424 } }, { "id": "tiiuae/Falcon3-10B-Instruct", "name": "Falcon3-10B-Instruct", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7817, "hfopenllm_v2/BBH": 0.617, "hfopenllm_v2/MATH Level 5": 0.2764, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4323, "hfopenllm_v2/MMLU-PRO": 0.4429 } }, { "id": "tiiuae/Falcon3-1B-Base", "name": "Falcon3-1B-Base", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2428, "hfopenllm_v2/BBH": 0.3571, "hfopenllm_v2/MATH Level 5": 0.0332, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.4147, "hfopenllm_v2/MMLU-PRO": 0.1608 } }, { "id": "tiiuae/Falcon3-1B-Instruct", "name": "Falcon3-1B-Instruct", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5557, "hfopenllm_v2/BBH": 0.3745, "hfopenllm_v2/MATH Level 5": 0.0634, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.4189, "hfopenllm_v2/MMLU-PRO": 0.1838 } }, { "id": "tiiuae/Falcon3-3B-Base", "name": "Falcon3-3B-Base", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2765, "hfopenllm_v2/BBH": 0.4421, "hfopenllm_v2/MATH Level 5": 0.1178, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.375, "hfopenllm_v2/MMLU-PRO": 0.2879 } }, { "id": "tiiuae/Falcon3-3B-Instruct", "name": "Falcon3-3B-Instruct", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6977, "hfopenllm_v2/BBH": 0.4754, "hfopenllm_v2/MATH Level 5": 0.25, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4136, "hfopenllm_v2/MMLU-PRO": 0.3005 } }, { "id": "tiiuae/Falcon3-7B-Base", "name": "Falcon3-7B-Base", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3416, "hfopenllm_v2/BBH": 0.5099, "hfopenllm_v2/MATH Level 5": 0.1941, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.4702, "hfopenllm_v2/MMLU-PRO": 0.391 } }, { "id": "tiiuae/Falcon3-7B-Instruct", "name": "Falcon3-7B-Instruct", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7612, "hfopenllm_v2/BBH": 0.5632, "hfopenllm_v2/MATH Level 5": 0.4086, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4827, "hfopenllm_v2/MMLU-PRO": 0.4087 } }, { "id": "tiiuae/Falcon3-Mamba-7B-Base", "name": "Falcon3-Mamba-7B-Base", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2891, "hfopenllm_v2/BBH": 0.4699, "hfopenllm_v2/MATH Level 5": 0.1941, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.3431, "hfopenllm_v2/MMLU-PRO": 0.3038 } }, { "id": "tiiuae/Falcon3-Mamba-7B-Instruct", "name": "Falcon3-Mamba-7B-Instruct", "developer": "tiiuae", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7165, "hfopenllm_v2/BBH": 0.4679, "hfopenllm_v2/MATH Level 5": 0.3006, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.3869, "hfopenllm_v2/MMLU-PRO": 0.3369 } }, { "id": "Tijmen2/cosmosage-v3", "name": "cosmosage-v3", "developer": "Tijmen2", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4482, "hfopenllm_v2/BBH": 0.4551, "hfopenllm_v2/MATH Level 5": 0.0506, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4199, "hfopenllm_v2/MMLU-PRO": 0.2486 } }, { "id": "tinycompany/BiBo-v0.3", "name": "BiBo-v0.3", "developer": "tinycompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5184, "hfopenllm_v2/BBH": 0.4642, "hfopenllm_v2/MATH Level 5": 0.0876, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.395, "hfopenllm_v2/MMLU-PRO": 0.2995 } }, { "id": "tinycompany/BiBo-v0.7", "name": "BiBo-v0.7", "developer": "tinycompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3738, "hfopenllm_v2/BBH": 0.4311, "hfopenllm_v2/MATH Level 5": 0.0823, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.4044, "hfopenllm_v2/MMLU-PRO": 0.265 } }, { "id": "tinycompany/ShawtyIsBad-bgem3", "name": "ShawtyIsBad-bgem3", "developer": "tinycompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2608, "hfopenllm_v2/BBH": 0.3853, "hfopenllm_v2/MATH Level 5": 0.0483, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.3695, "hfopenllm_v2/MMLU-PRO": 0.2583 } }, { "id": "tinycompany/ShawtyIsBad-e5-large", "name": "ShawtyIsBad-e5-large", "developer": "tinycompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2468, "hfopenllm_v2/BBH": 0.3873, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.372, "hfopenllm_v2/MMLU-PRO": 0.2569 } }, { "id": "tinycompany/ShawtyIsBad-ib", "name": "ShawtyIsBad-ib", "developer": "tinycompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2565, "hfopenllm_v2/BBH": 0.388, "hfopenllm_v2/MATH Level 5": 0.0491, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.3641, "hfopenllm_v2/MMLU-PRO": 0.2581 } }, { "id": "tinycompany/ShawtyIsBad-nomic-moe", "name": "ShawtyIsBad-nomic-moe", "developer": "tinycompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2608, "hfopenllm_v2/BBH": 0.3878, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.3747, "hfopenllm_v2/MMLU-PRO": 0.2572 } }, { "id": "tinycompany/ShawtyIsBad-nomic1.5", "name": "ShawtyIsBad-nomic1.5", "developer": "tinycompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2544, "hfopenllm_v2/BBH": 0.3874, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.3628, "hfopenllm_v2/MMLU-PRO": 0.2567 } }, { "id": "tinycompany/SigmaBoi-base", "name": "SigmaBoi-base", "developer": "tinycompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2447, "hfopenllm_v2/BBH": 0.4314, "hfopenllm_v2/MATH Level 5": 0.0778, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4343, "hfopenllm_v2/MMLU-PRO": 0.2817 } }, { "id": "tinycompany/SigmaBoi-bge-m3", "name": "SigmaBoi-bge-m3", "developer": "tinycompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.245, "hfopenllm_v2/BBH": 0.4351, "hfopenllm_v2/MATH Level 5": 0.0763, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4383, "hfopenllm_v2/MMLU-PRO": 0.2819 } }, { "id": "tinycompany/SigmaBoi-bgem3", "name": "SigmaBoi-bgem3", "developer": "tinycompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.245, "hfopenllm_v2/BBH": 0.4351, "hfopenllm_v2/MATH Level 5": 0.0763, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4383, "hfopenllm_v2/MMLU-PRO": 0.2819 } }, { "id": "tinycompany/SigmaBoi-ib", "name": "SigmaBoi-ib", "developer": "tinycompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2477, "hfopenllm_v2/BBH": 0.4344, "hfopenllm_v2/MATH Level 5": 0.074, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.429, "hfopenllm_v2/MMLU-PRO": 0.2824 } }, { "id": "tinycompany/SigmaBoi-nomic-moe", "name": "SigmaBoi-nomic-moe", "developer": "tinycompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2474, "hfopenllm_v2/BBH": 0.4334, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4316, "hfopenllm_v2/MMLU-PRO": 0.2837 } }, { "id": "tinycompany/SigmaBoi-nomic1.5", "name": "SigmaBoi-nomic1.5", "developer": "tinycompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2447, "hfopenllm_v2/BBH": 0.4371, "hfopenllm_v2/MATH Level 5": 0.0831, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4316, "hfopenllm_v2/MMLU-PRO": 0.2841 } }, { "id": "tinycompany/SigmaBoi-nomic1.5-fp32", "name": "SigmaBoi-nomic1.5-fp32", "developer": "tinycompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2462, "hfopenllm_v2/BBH": 0.4371, "hfopenllm_v2/MATH Level 5": 0.0831, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4316, "hfopenllm_v2/MMLU-PRO": 0.2841 } }, { "id": "tinycompany/Tamed-Shawty", "name": "Tamed-Shawty", "developer": "tinycompany", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3831, "hfopenllm_v2/BBH": 0.3837, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.2626, "hfopenllm_v2/MUSR": 0.3501, "hfopenllm_v2/MMLU-PRO": 0.2601 } }, { "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.1", "name": "TinyLlama-1.1B-Chat-v0.1", "developer": "TinyLlama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1479, "hfopenllm_v2/BBH": 0.3084, "hfopenllm_v2/MATH Level 5": 0.006, "hfopenllm_v2/GPQA": 0.229, "hfopenllm_v2/MUSR": 0.3592, "hfopenllm_v2/MMLU-PRO": 0.1098 } }, { "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.5", "name": "TinyLlama-1.1B-Chat-v0.5", "developer": "TinyLlama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1634, "hfopenllm_v2/BBH": 0.3105, "hfopenllm_v2/MATH Level 5": 0.0038, "hfopenllm_v2/GPQA": 0.2483, "hfopenllm_v2/MUSR": 0.3661, "hfopenllm_v2/MMLU-PRO": 0.1096 } }, { "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", "name": "TinyLlama-1.1B-Chat-v0.6", "developer": "TinyLlama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1574, "hfopenllm_v2/BBH": 0.3067, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3422, "hfopenllm_v2/MMLU-PRO": 0.1149 } }, { "id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "name": "TinyLlama-1.1B-Chat-v1.0", "developer": "TinyLlama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0596, "hfopenllm_v2/BBH": 0.3104, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3515, "hfopenllm_v2/MMLU-PRO": 0.1101 } }, { "id": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", "name": "TinyLlama-1.1B-intermediate-step-1431k-3T", "developer": "TinyLlama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2277, "hfopenllm_v2/BBH": 0.3071, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.338, "hfopenllm_v2/MMLU-PRO": 0.112 } }, { "id": "TinyLlama/TinyLlama_v1.1", "name": "TinyLlama_v1.1", "developer": "TinyLlama", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2001, "hfopenllm_v2/BBH": 0.3024, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.37, "hfopenllm_v2/MMLU-PRO": 0.1049 } }, { "id": "tklohj/WindyFloLLM", "name": "WindyFloLLM", "developer": "tklohj", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2669, "hfopenllm_v2/BBH": 0.4637, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.4253, "hfopenllm_v2/MMLU-PRO": 0.2581 } }, { "id": "ToastyPigeon/Sto-vo-kor-12B", "name": "Sto-vo-kor-12B", "developer": "ToastyPigeon", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5501, "hfopenllm_v2/BBH": 0.5065, "hfopenllm_v2/MATH Level 5": 0.1088, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.3938, "hfopenllm_v2/MMLU-PRO": 0.3398 } }, { "id": "together/RedPajama-INCITE-Base-7B", "name": "RedPajama-INCITE-Base 7B", "developer": "together", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.378, "helm_classic/MMLU": 0.302, "helm_classic/BoolQ": 0.713, "helm_classic/NarrativeQA": 0.617, "helm_classic/NaturalQuestions (open-book)": 0.586, "helm_classic/QuAC": 0.336, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.205, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.752, "helm_classic/CivilComments": 0.547, "helm_classic/RAFT": 0.648 } }, { "id": "together/RedPajama-INCITE-Base-v1-3B", "name": "RedPajama-INCITE-Base-v1 3B", "developer": "together", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.311, "helm_classic/MMLU": 0.263, "helm_classic/BoolQ": 0.685, "helm_classic/NarrativeQA": 0.555, "helm_classic/NaturalQuestions (open-book)": 0.52, "helm_classic/QuAC": 0.309, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.277, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.907, "helm_classic/CivilComments": 0.549, "helm_classic/RAFT": 0.502 } }, { "id": "together/RedPajama-INCITE-Instruct-7B", "name": "RedPajama-INCITE-Instruct 7B", "developer": "together", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.524, "helm_classic/MMLU": 0.363, "helm_classic/BoolQ": 0.705, "helm_classic/NarrativeQA": 0.638, "helm_classic/NaturalQuestions (open-book)": 0.659, "helm_classic/QuAC": 0.26, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.243, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.927, "helm_classic/CivilComments": 0.664, "helm_classic/RAFT": 0.695 } }, { "id": "together/RedPajama-INCITE-Instruct-v1-3B", "name": "RedPajama-INCITE-Instruct-v1 3B", "developer": "together", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.366, "helm_classic/MMLU": 0.257, "helm_classic/BoolQ": 0.677, "helm_classic/NarrativeQA": 0.638, "helm_classic/NaturalQuestions (open-book)": 0.637, "helm_classic/QuAC": 0.259, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.208, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": -1.0, "helm_classic/XSUM": -1.0, "helm_classic/IMDB": 0.894, "helm_classic/CivilComments": 0.549, "helm_classic/RAFT": 0.661 } }, { "id": "togethercomputer/GPT-JT-6B-v1", "name": "GPT-JT-6B-v1", "developer": "togethercomputer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2061, "hfopenllm_v2/BBH": 0.3303, "hfopenllm_v2/MATH Level 5": 0.0106, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3737, "hfopenllm_v2/MMLU-PRO": 0.1626 } }, { "id": "togethercomputer/GPT-NeoXT-Chat-Base-20B", "name": "GPT-NeoXT-Chat-Base-20B", "developer": "togethercomputer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.183, "hfopenllm_v2/BBH": 0.3321, "hfopenllm_v2/MATH Level 5": 0.0234, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3461, "hfopenllm_v2/MMLU-PRO": 0.1145 } }, { "id": "togethercomputer/LLaMA-2-7B-32K", "name": "LLaMA-2-7B-32K", "developer": "togethercomputer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1865, "hfopenllm_v2/BBH": 0.34, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3754, "hfopenllm_v2/MMLU-PRO": 0.1768 } }, { "id": "togethercomputer/Llama-2-7B-32K-Instruct", "name": "Llama-2-7B-32K-Instruct", "developer": "togethercomputer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.213, "hfopenllm_v2/BBH": 0.3443, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.4056, "hfopenllm_v2/MMLU-PRO": 0.1781 } }, { "id": "togethercomputer/RedPajama-INCITE-7B-Base", "name": "RedPajama-INCITE-7B-Base", "developer": "togethercomputer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2082, "hfopenllm_v2/BBH": 0.3195, "hfopenllm_v2/MATH Level 5": 0.0159, "hfopenllm_v2/GPQA": 0.255, "hfopenllm_v2/MUSR": 0.362, "hfopenllm_v2/MMLU-PRO": 0.1197 } }, { "id": "togethercomputer/RedPajama-INCITE-7B-Chat", "name": "RedPajama-INCITE-7B-Chat", "developer": "togethercomputer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1558, "hfopenllm_v2/BBH": 0.3175, "hfopenllm_v2/MATH Level 5": 0.0068, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3448, "hfopenllm_v2/MMLU-PRO": 0.1121 } }, { "id": "togethercomputer/RedPajama-INCITE-7B-Instruct", "name": "RedPajama-INCITE-7B-Instruct", "developer": "togethercomputer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2055, "hfopenllm_v2/BBH": 0.3377, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.2508, "hfopenllm_v2/MUSR": 0.3685, "hfopenllm_v2/MMLU-PRO": 0.1272 } }, { "id": "togethercomputer/RedPajama-INCITE-Base-3B-v1", "name": "RedPajama-INCITE-Base-3B-v1", "developer": "togethercomputer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2294, "hfopenllm_v2/BBH": 0.306, "hfopenllm_v2/MATH Level 5": 0.0144, "hfopenllm_v2/GPQA": 0.2433, "hfopenllm_v2/MUSR": 0.3739, "hfopenllm_v2/MMLU-PRO": 0.1111 } }, { "id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1", "name": "RedPajama-INCITE-Chat-3B-v1", "developer": "togethercomputer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1652, "hfopenllm_v2/BBH": 0.3217, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2441, "hfopenllm_v2/MUSR": 0.3684, "hfopenllm_v2/MMLU-PRO": 0.1127 } }, { "id": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", "name": "RedPajama-INCITE-Instruct-3B-v1", "developer": "togethercomputer", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2124, "hfopenllm_v2/BBH": 0.3146, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2475, "hfopenllm_v2/MUSR": 0.3886, "hfopenllm_v2/MMLU-PRO": 0.111 } }, { "id": "tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1", "name": "Llama-3-Swallow-8B-Instruct-v0.1", "developer": "tokyotech-llm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5508, "hfopenllm_v2/BBH": 0.5009, "hfopenllm_v2/MATH Level 5": 0.0748, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4357, "hfopenllm_v2/MMLU-PRO": 0.3088 } }, { "id": "tomasmcm/sky-t1-coder-32b-flash", "name": "sky-t1-coder-32b-flash", "developer": "tomasmcm", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.778, "hfopenllm_v2/BBH": 0.6822, "hfopenllm_v2/MATH Level 5": 0.5423, "hfopenllm_v2/GPQA": 0.3683, "hfopenllm_v2/MUSR": 0.4233, "hfopenllm_v2/MMLU-PRO": 0.5782 } }, { "id": "Trappu/Magnum-Picaro-0.7-v2-12b", "name": "Magnum-Picaro-0.7-v2-12b", "developer": "Trappu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3003, "hfopenllm_v2/BBH": 0.5507, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4727, "hfopenllm_v2/MMLU-PRO": 0.358 } }, { "id": "Trappu/Nemo-Picaro-12B", "name": "Nemo-Picaro-12B", "developer": "Trappu", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2577, "hfopenllm_v2/BBH": 0.549, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.4726, "hfopenllm_v2/MMLU-PRO": 0.3605 } }, { "id": "Tremontaine/L3-12B-Lunaris-v1", "name": "L3-12B-Lunaris-v1", "developer": "Tremontaine", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6909, "hfopenllm_v2/BBH": 0.523, "hfopenllm_v2/MATH Level 5": 0.0876, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.3674, "hfopenllm_v2/MMLU-PRO": 0.3775 } }, { "id": "Triangle104/Annunaki-12b", "name": "Annunaki-12b", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3872, "hfopenllm_v2/BBH": 0.5499, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.3213, "hfopenllm_v2/MUSR": 0.4409, "hfopenllm_v2/MMLU-PRO": 0.3721 } }, { "id": "Triangle104/BigTalker-Lite-8B", "name": "BigTalker-Lite-8B", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3689, "hfopenllm_v2/BBH": 0.5308, "hfopenllm_v2/MATH Level 5": 0.102, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4208, "hfopenllm_v2/MMLU-PRO": 0.3431 } }, { "id": "Triangle104/Chatty-Harry_V2.0", "name": "Chatty-Harry_V2.0", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3326, "hfopenllm_v2/BBH": 0.5319, "hfopenllm_v2/MATH Level 5": 0.139, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4078, "hfopenllm_v2/MMLU-PRO": 0.3683 } }, { "id": "Triangle104/Chatty-Harry_V3.0", "name": "Chatty-Harry_V3.0", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3675, "hfopenllm_v2/BBH": 0.5526, "hfopenllm_v2/MATH Level 5": 0.1125, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4408, "hfopenllm_v2/MMLU-PRO": 0.3702 } }, { "id": "Triangle104/Chronos-Prism_V1.0", "name": "Chronos-Prism_V1.0", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3259, "hfopenllm_v2/BBH": 0.5554, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4263, "hfopenllm_v2/MMLU-PRO": 0.3673 } }, { "id": "Triangle104/Dark-Chivalry_V1.0", "name": "Dark-Chivalry_V1.0", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4326, "hfopenllm_v2/BBH": 0.4974, "hfopenllm_v2/MATH Level 5": 0.1314, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4182, "hfopenllm_v2/MMLU-PRO": 0.3444 } }, { "id": "Triangle104/Distilled-DarkPlanet-Allades-8B", "name": "Distilled-DarkPlanet-Allades-8B", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.346, "hfopenllm_v2/BBH": 0.4634, "hfopenllm_v2/MATH Level 5": 0.4003, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.3538, "hfopenllm_v2/MMLU-PRO": 0.2901 } }, { "id": "Triangle104/Distilled-DarkPlanet-Allades-8B_TIES", "name": "Distilled-DarkPlanet-Allades-8B_TIES", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3892, "hfopenllm_v2/BBH": 0.5042, "hfopenllm_v2/MATH Level 5": 0.0906, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.3868, "hfopenllm_v2/MMLU-PRO": 0.3401 } }, { "id": "Triangle104/Distilled-Whiskey-8b", "name": "Distilled-Whiskey-8b", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3448, "hfopenllm_v2/BBH": 0.5028, "hfopenllm_v2/MATH Level 5": 0.2545, "hfopenllm_v2/GPQA": 0.3314, "hfopenllm_v2/MUSR": 0.4172, "hfopenllm_v2/MMLU-PRO": 0.3367 } }, { "id": "Triangle104/Dolphin3-Llama3.2-Smart", "name": "Dolphin3-Llama3.2-Smart", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4137, "hfopenllm_v2/BBH": 0.3975, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3922, "hfopenllm_v2/MMLU-PRO": 0.2195 } }, { "id": "Triangle104/DS-Distilled-Hermes-Llama-3.1", "name": "DS-Distilled-Hermes-Llama-3.1", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3229, "hfopenllm_v2/BBH": 0.5117, "hfopenllm_v2/MATH Level 5": 0.2931, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4039, "hfopenllm_v2/MMLU-PRO": 0.311 } }, { "id": "Triangle104/DS-Distilled-Hermes-Llama-3.1_TIES", "name": "DS-Distilled-Hermes-Llama-3.1_TIES", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1364, "hfopenllm_v2/BBH": 0.2928, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.245, "hfopenllm_v2/MUSR": 0.3621, "hfopenllm_v2/MMLU-PRO": 0.1104 } }, { "id": "Triangle104/DS-R1-Distill-Q2.5-10B-Harmony", "name": "DS-R1-Distill-Q2.5-10B-Harmony", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1751, "hfopenllm_v2/BBH": 0.2643, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2106, "hfopenllm_v2/MUSR": 0.3128, "hfopenllm_v2/MMLU-PRO": 0.1173 } }, { "id": "Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1", "name": "DS-R1-Distill-Q2.5-14B-Harmony_V0.1", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4515, "hfopenllm_v2/BBH": 0.5783, "hfopenllm_v2/MATH Level 5": 0.5551, "hfopenllm_v2/GPQA": 0.3935, "hfopenllm_v2/MUSR": 0.5567, "hfopenllm_v2/MMLU-PRO": 0.4601 } }, { "id": "Triangle104/DS-R1-Distill-Q2.5-7B-RP", "name": "DS-R1-Distill-Q2.5-7B-RP", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3445, "hfopenllm_v2/BBH": 0.4383, "hfopenllm_v2/MATH Level 5": 0.4683, "hfopenllm_v2/GPQA": 0.3138, "hfopenllm_v2/MUSR": 0.403, "hfopenllm_v2/MMLU-PRO": 0.2891 } }, { "id": "Triangle104/DS-R1-Llama-8B-Harmony", "name": "DS-R1-Llama-8B-Harmony", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3566, "hfopenllm_v2/BBH": 0.4154, "hfopenllm_v2/MATH Level 5": 0.4282, "hfopenllm_v2/GPQA": 0.2919, "hfopenllm_v2/MUSR": 0.3762, "hfopenllm_v2/MMLU-PRO": 0.2744 } }, { "id": "Triangle104/DSR1-Distill-Llama-Lit-8B", "name": "DSR1-Distill-Llama-Lit-8B", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1885, "hfopenllm_v2/BBH": 0.4284, "hfopenllm_v2/MATH Level 5": 0.352, "hfopenllm_v2/GPQA": 0.3029, "hfopenllm_v2/MUSR": 0.3535, "hfopenllm_v2/MMLU-PRO": 0.2798 } }, { "id": "Triangle104/DSR1-Distill-Qwen-7B-RP", "name": "DSR1-Distill-Qwen-7B-RP", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3609, "hfopenllm_v2/BBH": 0.4326, "hfopenllm_v2/MATH Level 5": 0.4804, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4045, "hfopenllm_v2/MMLU-PRO": 0.3028 } }, { "id": "Triangle104/Gemmadevi-Stock-10B", "name": "Gemmadevi-Stock-10B", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1582, "hfopenllm_v2/BBH": 0.6066, "hfopenllm_v2/MATH Level 5": 0.0967, "hfopenllm_v2/GPQA": 0.3532, "hfopenllm_v2/MUSR": 0.4621, "hfopenllm_v2/MMLU-PRO": 0.4262 } }, { "id": "Triangle104/Hermes-Llama-3.2-CoT", "name": "Hermes-Llama-3.2-CoT", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4178, "hfopenllm_v2/BBH": 0.4616, "hfopenllm_v2/MATH Level 5": 0.0952, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3698, "hfopenllm_v2/MMLU-PRO": 0.2947 } }, { "id": "Triangle104/Hermes-Llama-3.2-CoT-Summary", "name": "Hermes-Llama-3.2-CoT-Summary", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.483, "hfopenllm_v2/BBH": 0.42, "hfopenllm_v2/MATH Level 5": 0.0831, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3575, "hfopenllm_v2/MMLU-PRO": 0.2901 } }, { "id": "Triangle104/Hermes3-L3.1-DirtyHarry-8B", "name": "Hermes3-L3.1-DirtyHarry-8B", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3242, "hfopenllm_v2/BBH": 0.5066, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4069, "hfopenllm_v2/MMLU-PRO": 0.3339 } }, { "id": "Triangle104/Herodotos-14B", "name": "Herodotos-14B", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4667, "hfopenllm_v2/BBH": 0.6435, "hfopenllm_v2/MATH Level 5": 0.5045, "hfopenllm_v2/GPQA": 0.3733, "hfopenllm_v2/MUSR": 0.4795, "hfopenllm_v2/MMLU-PRO": 0.529 } }, { "id": "Triangle104/Herodotos-14B_V0.1", "name": "Herodotos-14B_V0.1", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1879, "hfopenllm_v2/BBH": 0.3017, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.224, "hfopenllm_v2/MUSR": 0.3684, "hfopenllm_v2/MMLU-PRO": 0.1164 } }, { "id": "Triangle104/L3.1-8B-Dusky-Ink", "name": "L3.1-8B-Dusky-Ink", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.453, "hfopenllm_v2/BBH": 0.5098, "hfopenllm_v2/MATH Level 5": 0.1231, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4224, "hfopenllm_v2/MMLU-PRO": 0.3683 } }, { "id": "Triangle104/L3.1-8B-Dusky-Ink_v0.r1", "name": "L3.1-8B-Dusky-Ink_v0.r1", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1985, "hfopenllm_v2/BBH": 0.4337, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.3988, "hfopenllm_v2/MMLU-PRO": 0.3206 } }, { "id": "Triangle104/Llama3.1-Allades-Lit-8b", "name": "Llama3.1-Allades-Lit-8b", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2461, "hfopenllm_v2/BBH": 0.4183, "hfopenllm_v2/MATH Level 5": 0.0023, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.3708, "hfopenllm_v2/MMLU-PRO": 0.2724 } }, { "id": "Triangle104/Llama3.1-cc-Lit-8b", "name": "Llama3.1-cc-Lit-8b", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2993, "hfopenllm_v2/BBH": 0.3848, "hfopenllm_v2/MATH Level 5": 0.003, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3854, "hfopenllm_v2/MMLU-PRO": 0.3004 } }, { "id": "Triangle104/LThreePointOne-8B-HermesBlackroot", "name": "LThreePointOne-8B-HermesBlackroot", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1792, "hfopenllm_v2/BBH": 0.4998, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.3586, "hfopenllm_v2/MMLU-PRO": 0.3285 } }, { "id": "Triangle104/LThreePointOne-8B-HermesInk", "name": "LThreePointOne-8B-HermesInk", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4031, "hfopenllm_v2/BBH": 0.5223, "hfopenllm_v2/MATH Level 5": 0.1722, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4129, "hfopenllm_v2/MMLU-PRO": 0.3467 } }, { "id": "Triangle104/Minerva-1.5b", "name": "Minerva-1.5b", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2694, "hfopenllm_v2/BBH": 0.4026, "hfopenllm_v2/MATH Level 5": 0.1027, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.3655, "hfopenllm_v2/MMLU-PRO": 0.2698 } }, { "id": "Triangle104/Minerva-1.5b_V0.2", "name": "Minerva-1.5b_V0.2", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3083, "hfopenllm_v2/BBH": 0.3989, "hfopenllm_v2/MATH Level 5": 0.114, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.396, "hfopenllm_v2/MMLU-PRO": 0.2911 } }, { "id": "Triangle104/Minerva-10b", "name": "Minerva-10b", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1879, "hfopenllm_v2/BBH": 0.4462, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3627, "hfopenllm_v2/MMLU-PRO": 0.2318 } }, { "id": "Triangle104/Minerva-14b", "name": "Minerva-14b", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3468, "hfopenllm_v2/BBH": 0.6301, "hfopenllm_v2/MATH Level 5": 0.3051, "hfopenllm_v2/GPQA": 0.3742, "hfopenllm_v2/MUSR": 0.4766, "hfopenllm_v2/MMLU-PRO": 0.5194 } }, { "id": "Triangle104/Minerva-14b-V0.1", "name": "Minerva-14b-V0.1", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0861, "hfopenllm_v2/BBH": 0.609, "hfopenllm_v2/MATH Level 5": 0.3051, "hfopenllm_v2/GPQA": 0.3658, "hfopenllm_v2/MUSR": 0.47, "hfopenllm_v2/MMLU-PRO": 0.5118 } }, { "id": "Triangle104/Minerva-7b", "name": "Minerva-7b", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3724, "hfopenllm_v2/BBH": 0.5498, "hfopenllm_v2/MATH Level 5": 0.284, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4143, "hfopenllm_v2/MMLU-PRO": 0.4444 } }, { "id": "Triangle104/Minerva-8b", "name": "Minerva-8b", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1721, "hfopenllm_v2/BBH": 0.4669, "hfopenllm_v2/MATH Level 5": 0.0045, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4273, "hfopenllm_v2/MMLU-PRO": 0.3089 } }, { "id": "Triangle104/Mistral-Redemption-Arc", "name": "Mistral-Redemption-Arc", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4029, "hfopenllm_v2/BBH": 0.6255, "hfopenllm_v2/MATH Level 5": 0.4101, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4595, "hfopenllm_v2/MMLU-PRO": 0.451 } }, { "id": "Triangle104/Mistral-Small-24b-Harmony", "name": "Mistral-Small-24b-Harmony", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1687, "hfopenllm_v2/BBH": 0.6434, "hfopenllm_v2/MATH Level 5": 0.1911, "hfopenllm_v2/GPQA": 0.3842, "hfopenllm_v2/MUSR": 0.4276, "hfopenllm_v2/MMLU-PRO": 0.5431 } }, { "id": "Triangle104/Pans_Gutenbergum_V0.1", "name": "Pans_Gutenbergum_V0.1", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3097, "hfopenllm_v2/BBH": 0.5541, "hfopenllm_v2/MATH Level 5": 0.1057, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4528, "hfopenllm_v2/MMLU-PRO": 0.3697 } }, { "id": "Triangle104/Pans_Gutenbergum_V0.2", "name": "Pans_Gutenbergum_V0.2", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3215, "hfopenllm_v2/BBH": 0.5526, "hfopenllm_v2/MATH Level 5": 0.0687, "hfopenllm_v2/GPQA": 0.3121, "hfopenllm_v2/MUSR": 0.4673, "hfopenllm_v2/MMLU-PRO": 0.3585 } }, { "id": "Triangle104/Pantheon_ChatWaifu_V0.2", "name": "Pantheon_ChatWaifu_V0.2", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2683, "hfopenllm_v2/BBH": 0.5532, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.4755, "hfopenllm_v2/MMLU-PRO": 0.3442 } }, { "id": "Triangle104/Phi-4-AbliteratedRP", "name": "Phi-4-AbliteratedRP", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4923, "hfopenllm_v2/BBH": 0.6709, "hfopenllm_v2/MATH Level 5": 0.3074, "hfopenllm_v2/GPQA": 0.3951, "hfopenllm_v2/MUSR": 0.5098, "hfopenllm_v2/MMLU-PRO": 0.5308 } }, { "id": "Triangle104/Phi4-RP-o1", "name": "Phi4-RP-o1", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.022, "hfopenllm_v2/BBH": 0.6653, "hfopenllm_v2/MATH Level 5": 0.3776, "hfopenllm_v2/GPQA": 0.3733, "hfopenllm_v2/MUSR": 0.4756, "hfopenllm_v2/MMLU-PRO": 0.5111 } }, { "id": "Triangle104/Phi4-RP-o1-Ablit", "name": "Phi4-RP-o1-Ablit", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0239, "hfopenllm_v2/BBH": 0.663, "hfopenllm_v2/MATH Level 5": 0.3882, "hfopenllm_v2/GPQA": 0.3633, "hfopenllm_v2/MUSR": 0.4754, "hfopenllm_v2/MMLU-PRO": 0.5105 } }, { "id": "Triangle104/Porpoise-R1-Llama3.2-3b", "name": "Porpoise-R1-Llama3.2-3b", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4352, "hfopenllm_v2/BBH": 0.3824, "hfopenllm_v2/MATH Level 5": 0.0423, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3576, "hfopenllm_v2/MMLU-PRO": 0.2117 } }, { "id": "Triangle104/Q2.5-14B-Instruct-1M-Harmony", "name": "Q2.5-14B-Instruct-1M-Harmony", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5986, "hfopenllm_v2/BBH": 0.6339, "hfopenllm_v2/MATH Level 5": 0.3769, "hfopenllm_v2/GPQA": 0.375, "hfopenllm_v2/MUSR": 0.4795, "hfopenllm_v2/MMLU-PRO": 0.5075 } }, { "id": "Triangle104/Q2.5-AthensCOT", "name": "Q2.5-AthensCOT", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4573, "hfopenllm_v2/BBH": 0.5542, "hfopenllm_v2/MATH Level 5": 0.2915, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4578, "hfopenllm_v2/MMLU-PRO": 0.4379 } }, { "id": "Triangle104/Q2.5-CodeR1-3B", "name": "Q2.5-CodeR1-3B", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3588, "hfopenllm_v2/BBH": 0.4661, "hfopenllm_v2/MATH Level 5": 0.1639, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4315, "hfopenllm_v2/MMLU-PRO": 0.2979 } }, { "id": "Triangle104/Q2.5-EVACOT-7b", "name": "Q2.5-EVACOT-7b", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5784, "hfopenllm_v2/BBH": 0.5506, "hfopenllm_v2/MATH Level 5": 0.2825, "hfopenllm_v2/GPQA": 0.318, "hfopenllm_v2/MUSR": 0.4499, "hfopenllm_v2/MMLU-PRO": 0.4331 } }, { "id": "Triangle104/Q2.5-EvaHumane-RP", "name": "Q2.5-EvaHumane-RP", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3676, "hfopenllm_v2/BBH": 0.5328, "hfopenllm_v2/MATH Level 5": 0.2923, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4276, "hfopenllm_v2/MMLU-PRO": 0.4412 } }, { "id": "Triangle104/Q2.5-Humane-RP", "name": "Q2.5-Humane-RP", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4412, "hfopenllm_v2/BBH": 0.5649, "hfopenllm_v2/MATH Level 5": 0.3391, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4528, "hfopenllm_v2/MMLU-PRO": 0.4492 } }, { "id": "Triangle104/Q2.5-Instruct-1M_Harmony", "name": "Q2.5-Instruct-1M_Harmony", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6038, "hfopenllm_v2/BBH": 0.5373, "hfopenllm_v2/MATH Level 5": 0.3323, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4688, "hfopenllm_v2/MMLU-PRO": 0.4366 } }, { "id": "Triangle104/Q2.5-R1-3B", "name": "Q2.5-R1-3B", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4214, "hfopenllm_v2/BBH": 0.4812, "hfopenllm_v2/MATH Level 5": 0.2674, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.432, "hfopenllm_v2/MMLU-PRO": 0.3813 } }, { "id": "Triangle104/Q2.5-R1-7B", "name": "Q2.5-R1-7B", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1346, "hfopenllm_v2/BBH": 0.3007, "hfopenllm_v2/MATH Level 5": 0.0166, "hfopenllm_v2/GPQA": 0.2525, "hfopenllm_v2/MUSR": 0.3607, "hfopenllm_v2/MMLU-PRO": 0.118 } }, { "id": "Triangle104/Robo-Gutenberg_V1.0", "name": "Robo-Gutenberg_V1.0", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6008, "hfopenllm_v2/BBH": 0.6537, "hfopenllm_v2/MATH Level 5": 0.4562, "hfopenllm_v2/GPQA": 0.3859, "hfopenllm_v2/MUSR": 0.4744, "hfopenllm_v2/MMLU-PRO": 0.5391 } }, { "id": "Triangle104/Rocinante-Prism_V2.0", "name": "Rocinante-Prism_V2.0", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2616, "hfopenllm_v2/BBH": 0.5361, "hfopenllm_v2/MATH Level 5": 0.111, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.445, "hfopenllm_v2/MMLU-PRO": 0.364 } }, { "id": "Triangle104/Rocinante-Prism_V2.1", "name": "Rocinante-Prism_V2.1", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2558, "hfopenllm_v2/BBH": 0.5333, "hfopenllm_v2/MATH Level 5": 0.1125, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.449, "hfopenllm_v2/MMLU-PRO": 0.3651 } }, { "id": "Triangle104/RomboHermes3-R1-Llama3.2-3b", "name": "RomboHermes3-R1-Llama3.2-3b", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3007, "hfopenllm_v2/BBH": 0.4264, "hfopenllm_v2/MATH Level 5": 0.0816, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3657, "hfopenllm_v2/MMLU-PRO": 0.2957 } }, { "id": "Triangle104/Rombos-Novasky-7B_V1c", "name": "Rombos-Novasky-7B_V1c", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.408, "hfopenllm_v2/BBH": 0.4349, "hfopenllm_v2/MATH Level 5": 0.0853, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4465, "hfopenllm_v2/MMLU-PRO": 0.2738 } }, { "id": "Triangle104/Set-70b", "name": "Set-70b", "developer": "Triangle104", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7643, "hfopenllm_v2/BBH": 0.7014, "hfopenllm_v2/MATH Level 5": 0.364, "hfopenllm_v2/GPQA": 0.4463, "hfopenllm_v2/MUSR": 0.4696, "hfopenllm_v2/MMLU-PRO": 0.5442 } }, { "id": "trthminh1112/autotrain-llama32-1b-finetune", "name": "autotrain-llama32-1b-finetune", "developer": "trthminh1112", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1769, "hfopenllm_v2/BBH": 0.2996, "hfopenllm_v2/MATH Level 5": 0.0151, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3513, "hfopenllm_v2/MMLU-PRO": 0.1099 } }, { "id": "Tsunami-th/Tsunami-0.5-7B-Instruct", "name": "Tsunami-0.5-7B-Instruct", "developer": "Tsunami-th", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.74, "hfopenllm_v2/BBH": 0.5524, "hfopenllm_v2/MATH Level 5": 0.5045, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4257, "hfopenllm_v2/MMLU-PRO": 0.4413 } }, { "id": "Tsunami-th/Tsunami-0.5x-7B-Instruct", "name": "Tsunami-0.5x-7B-Instruct", "developer": "Tsunami-th", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7099, "hfopenllm_v2/BBH": 0.5593, "hfopenllm_v2/MATH Level 5": 0.4207, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4667, "hfopenllm_v2/MMLU-PRO": 0.4458 } }, { "id": "Tsunami-th/Tsunami-1.0-14B-Instruct", "name": "Tsunami-1.0-14B-Instruct", "developer": "Tsunami-th", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7829, "hfopenllm_v2/BBH": 0.6439, "hfopenllm_v2/MATH Level 5": 0.4585, "hfopenllm_v2/GPQA": 0.3565, "hfopenllm_v2/MUSR": 0.4459, "hfopenllm_v2/MMLU-PRO": 0.5249 } }, { "id": "Tsunami-th/Tsunami-1.0-7B-Instruct", "name": "Tsunami-1.0-7B-Instruct", "developer": "Tsunami-th", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7309, "hfopenllm_v2/BBH": 0.5491, "hfopenllm_v2/MATH Level 5": 0.4335, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4493, "hfopenllm_v2/MMLU-PRO": 0.4424 } }, { "id": "TTTXXX01/Mistral-7B-Base-SimPO2-5e-7", "name": "Mistral-7B-Base-SimPO2-5e-7", "developer": "TTTXXX01", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4392, "hfopenllm_v2/BBH": 0.432, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.3604, "hfopenllm_v2/MMLU-PRO": 0.2766 } }, { "id": "tugstugi/Qwen2.5-7B-Instruct-QwQ-v0.1", "name": "Qwen2.5-7B-Instruct-QwQ-v0.1", "developer": "tugstugi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6017, "hfopenllm_v2/BBH": 0.5101, "hfopenllm_v2/MATH Level 5": 0.3814, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3794, "hfopenllm_v2/MMLU-PRO": 0.4081 } }, { "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1", "name": "Gemma-2-9B-It-SPPO-Iter1", "developer": "UCLA-AGI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3082, "hfopenllm_v2/BBH": 0.5969, "hfopenllm_v2/MATH Level 5": 0.0899, "hfopenllm_v2/GPQA": 0.3364, "hfopenllm_v2/MUSR": 0.4099, "hfopenllm_v2/MMLU-PRO": 0.3907 } }, { "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2", "name": "Gemma-2-9B-It-SPPO-Iter2", "developer": "UCLA-AGI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.31, "hfopenllm_v2/BBH": 0.599, "hfopenllm_v2/MATH Level 5": 0.0808, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.4139, "hfopenllm_v2/MMLU-PRO": 0.387 } }, { "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3", "name": "Gemma-2-9B-It-SPPO-Iter3", "developer": "UCLA-AGI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3167, "hfopenllm_v2/BBH": 0.6007, "hfopenllm_v2/MATH Level 5": 0.071, "hfopenllm_v2/GPQA": 0.3389, "hfopenllm_v2/MUSR": 0.4166, "hfopenllm_v2/MMLU-PRO": 0.3826 } }, { "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1", "name": "Llama-3-Instruct-8B-SPPO-Iter1", "developer": "UCLA-AGI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7299, "hfopenllm_v2/BBH": 0.5058, "hfopenllm_v2/MATH Level 5": 0.1148, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3568, "hfopenllm_v2/MMLU-PRO": 0.3711 } }, { "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2", "name": "Llama-3-Instruct-8B-SPPO-Iter2", "developer": "UCLA-AGI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6989, "hfopenllm_v2/BBH": 0.5089, "hfopenllm_v2/MATH Level 5": 0.1035, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3594, "hfopenllm_v2/MMLU-PRO": 0.3692 } }, { "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3", "name": "Llama-3-Instruct-8B-SPPO-Iter3", "developer": "UCLA-AGI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6703, "hfopenllm_v2/BBH": 0.5076, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3647, "hfopenllm_v2/MMLU-PRO": 0.3658 } }, { "id": "UCLA-AGI/Mistral7B-PairRM-SPPO", "name": "Mistral7B-PairRM-SPPO", "developer": "UCLA-AGI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4355, "hfopenllm_v2/BBH": 0.4439, "hfopenllm_v2/MATH Level 5": 0.031, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3965, "hfopenllm_v2/MMLU-PRO": 0.2621 } }, { "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter1", "name": "Mistral7B-PairRM-SPPO-Iter1", "developer": "UCLA-AGI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5047, "hfopenllm_v2/BBH": 0.4468, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3992, "hfopenllm_v2/MMLU-PRO": 0.2695 } }, { "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter2", "name": "Mistral7B-PairRM-SPPO-Iter2", "developer": "UCLA-AGI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4446, "hfopenllm_v2/BBH": 0.4466, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4085, "hfopenllm_v2/MMLU-PRO": 0.2677 } }, { "id": "UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3", "name": "Mistral7B-PairRM-SPPO-Iter3", "developer": "UCLA-AGI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4351, "hfopenllm_v2/BBH": 0.4397, "hfopenllm_v2/MATH Level 5": 0.0234, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.4071, "hfopenllm_v2/MMLU-PRO": 0.2658 } }, { "id": "uiuc-oumi/coalm-70b", "name": "CoALM-70B", "developer": "uiuc-oumi", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 74.0, "bfcl/bfcl.overall.overall_accuracy": 27.99, "bfcl/bfcl.overall.total_cost_usd": 19.89, "bfcl/bfcl.overall.latency_mean_s": 16.22, "bfcl/bfcl.overall.latency_std_s": 59.91, "bfcl/bfcl.overall.latency_p95_s": 36.0, "bfcl/bfcl.non_live.ast_accuracy": 83.44, "bfcl/bfcl.non_live.simple_ast_accuracy": 70.25, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 88.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 83.0, "bfcl/bfcl.live.live_accuracy": 67.28, "bfcl/bfcl.live.live_simple_ast_accuracy": 70.54, "bfcl/bfcl.live.live_multiple_ast_accuracy": 66.57, "bfcl/bfcl.live.live_parallel_ast_accuracy": 68.75, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 62.5, "bfcl/bfcl.multi_turn.accuracy": 10.62, "bfcl/bfcl.multi_turn.base_accuracy": 11.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 14.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 9.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 8.5, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 5.81, "bfcl/bfcl.memory.kv_accuracy": 9.03, "bfcl/bfcl.memory.vector_accuracy": 5.16, "bfcl/bfcl.memory.recursive_summarization_accuracy": 3.23, "bfcl/bfcl.relevance.relevance_detection_accuracy": 93.75, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 85.65, "bfcl/bfcl.format_sensitivity.max_delta": 72.0, "bfcl/bfcl.format_sensitivity.stddev": 27.76 } }, { "id": "uiuc-oumi/coalm-8b", "name": "CoALM-8B", "developer": "uiuc-oumi", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 84.0, "bfcl/bfcl.overall.overall_accuracy": 26.81, "bfcl/bfcl.overall.total_cost_usd": 25.33, "bfcl/bfcl.overall.latency_mean_s": 20.36, "bfcl/bfcl.overall.latency_std_s": 73.74, "bfcl/bfcl.overall.latency_p95_s": 138.04, "bfcl/bfcl.non_live.ast_accuracy": 84.87, "bfcl/bfcl.non_live.simple_ast_accuracy": 69.5, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 88.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 88.5, "bfcl/bfcl.live.live_accuracy": 66.77, "bfcl/bfcl.live.live_simple_ast_accuracy": 70.54, "bfcl/bfcl.live.live_multiple_ast_accuracy": 66.19, "bfcl/bfcl.live.live_parallel_ast_accuracy": 62.5, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 54.17, "bfcl/bfcl.multi_turn.accuracy": 8.0, "bfcl/bfcl.multi_turn.base_accuracy": 10.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 7.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 8.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 7.0, "bfcl/bfcl.web_search.accuracy": 0.0, "bfcl/bfcl.web_search.base_accuracy": 0.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 0.0, "bfcl/bfcl.memory.accuracy": 2.8, "bfcl/bfcl.memory.kv_accuracy": 3.23, "bfcl/bfcl.memory.vector_accuracy": 3.87, "bfcl/bfcl.memory.recursive_summarization_accuracy": 1.29, "bfcl/bfcl.relevance.relevance_detection_accuracy": 87.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 86.9, "bfcl/bfcl.format_sensitivity.max_delta": 79.0, "bfcl/bfcl.format_sensitivity.stddev": 34.18 } }, { "id": "UKzExecution/LlamaExecutor-8B-3.0.5", "name": "LlamaExecutor-8B-3.0.5", "developer": "UKzExecution", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7403, "hfopenllm_v2/BBH": 0.5006, "hfopenllm_v2/MATH Level 5": 0.102, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3754, "hfopenllm_v2/MMLU-PRO": 0.3625 } }, { "id": "Unbabel/TowerInstruct-Mistral-7B-v0.2", "name": "TowerInstruct-Mistral-7B-v0.2", "developer": "Unbabel", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2843, "hfopenllm_v2/BBH": 0.3882, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2475, "hfopenllm_v2/MUSR": 0.4522, "hfopenllm_v2/MMLU-PRO": 0.1968 } }, { "id": "Undi95/MG-FinalMix-72B", "name": "MG-FinalMix-72B", "developer": "Undi95", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8014, "hfopenllm_v2/BBH": 0.6973, "hfopenllm_v2/MATH Level 5": 0.3973, "hfopenllm_v2/GPQA": 0.3851, "hfopenllm_v2/MUSR": 0.4823, "hfopenllm_v2/MMLU-PRO": 0.5427 } }, { "id": "Undi95/Phi4-abliterated", "name": "Phi4-abliterated", "developer": "Undi95", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6618, "hfopenllm_v2/BBH": 0.6809, "hfopenllm_v2/MATH Level 5": 0.3701, "hfopenllm_v2/GPQA": 0.3305, "hfopenllm_v2/MUSR": 0.4034, "hfopenllm_v2/MMLU-PRO": 0.5281 } }, { "id": "universalml/NepaliGPT-2.0", "name": "NepaliGPT-2.0", "developer": "universalml", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0365, "hfopenllm_v2/BBH": 0.466, "hfopenllm_v2/MATH Level 5": 0.0045, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4657, "hfopenllm_v2/MMLU-PRO": 0.33 } }, { "id": "unknown/aya-expanse-32b", "name": "aya-expanse-32b", "developer": "unknown", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.7353, "global-mmlu-lite/Culturally Sensitive": 0.6891, "global-mmlu-lite/Culturally Agnostic": 0.7815, "global-mmlu-lite/Arabic": 0.7425, "global-mmlu-lite/English": 0.7544, "global-mmlu-lite/Bengali": 0.7343, "global-mmlu-lite/German": 0.7425, "global-mmlu-lite/French": 0.7325, "global-mmlu-lite/Hindi": 0.7375, "global-mmlu-lite/Indonesian": 0.7594, "global-mmlu-lite/Italian": 0.7305, "global-mmlu-lite/Japanese": 0.7419, "global-mmlu-lite/Korean": 0.7525, "global-mmlu-lite/Portuguese": 0.7544, "global-mmlu-lite/Spanish": 0.7362, "global-mmlu-lite/Swahili": 0.7071, "global-mmlu-lite/Yoruba": 0.6942, "global-mmlu-lite/Chinese": 0.743, "global-mmlu-lite/Burmese": 0.7025 } }, { "id": "unknown/granite-4.0-h-small", "name": "granite-4.0-h-small", "developer": "unknown", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.7503, "global-mmlu-lite/Culturally Sensitive": 0.7182, "global-mmlu-lite/Culturally Agnostic": 0.7826, "global-mmlu-lite/Arabic": 0.7613, "global-mmlu-lite/English": 0.77, "global-mmlu-lite/Bengali": 0.7613, "global-mmlu-lite/German": 0.755, "global-mmlu-lite/French": 0.7594, "global-mmlu-lite/Hindi": 0.7575, "global-mmlu-lite/Indonesian": 0.7614, "global-mmlu-lite/Italian": 0.7525, "global-mmlu-lite/Japanese": 0.7406, "global-mmlu-lite/Korean": 0.7525, "global-mmlu-lite/Portuguese": 0.757, "global-mmlu-lite/Spanish": 0.7638, "global-mmlu-lite/Swahili": 0.7318, "global-mmlu-lite/Yoruba": 0.6921, "global-mmlu-lite/Chinese": 0.7475, "global-mmlu-lite/Burmese": 0.7419 } }, { "id": "unknown/o4-mini-2025-04-16", "name": "o4-mini-2025-04-16", "developer": "unknown", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.8705, "global-mmlu-lite/Culturally Sensitive": 0.8503, "global-mmlu-lite/Culturally Agnostic": 0.8906, "global-mmlu-lite/Arabic": 0.865, "global-mmlu-lite/English": 0.8675, "global-mmlu-lite/Bengali": 0.8875, "global-mmlu-lite/German": 0.8775, "global-mmlu-lite/French": 0.87, "global-mmlu-lite/Hindi": 0.87, "global-mmlu-lite/Indonesian": 0.8675, "global-mmlu-lite/Italian": 0.855, "global-mmlu-lite/Japanese": 0.885, "global-mmlu-lite/Korean": 0.88, "global-mmlu-lite/Portuguese": 0.88, "global-mmlu-lite/Spanish": 0.855, "global-mmlu-lite/Swahili": 0.8525, "global-mmlu-lite/Yoruba": 0.8525, "global-mmlu-lite/Chinese": 0.89, "global-mmlu-lite/Burmese": 0.8725 } }, { "id": "unsloth/Llama-3.2-1B-Instruct", "name": "Llama-3.2-1B-Instruct", "developer": "unsloth", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.581, "hfopenllm_v2/BBH": 0.3485, "hfopenllm_v2/MATH Level 5": 0.0823, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.3196, "hfopenllm_v2/MMLU-PRO": 0.1742 } }, { "id": "unsloth/Llama-3.2-1B-Instruct-no-system-message", "name": "Llama-3.2-1B-Instruct-no-system-message", "developer": "unsloth", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.565, "hfopenllm_v2/BBH": 0.3544, "hfopenllm_v2/MATH Level 5": 0.0755, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3341, "hfopenllm_v2/MMLU-PRO": 0.1669 } }, { "id": "unsloth/Phi-3-mini-4k-instruct", "name": "Phi-3-mini-4k-instruct", "developer": "unsloth", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.544, "hfopenllm_v2/BBH": 0.55, "hfopenllm_v2/MATH Level 5": 0.1639, "hfopenllm_v2/GPQA": 0.323, "hfopenllm_v2/MUSR": 0.4284, "hfopenllm_v2/MMLU-PRO": 0.4031 } }, { "id": "unsloth/phi-4", "name": "phi-4", "developer": "unsloth", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6882, "hfopenllm_v2/BBH": 0.6886, "hfopenllm_v2/MATH Level 5": 0.5, "hfopenllm_v2/GPQA": 0.3364, "hfopenllm_v2/MUSR": 0.4114, "hfopenllm_v2/MMLU-PRO": 0.5378 } }, { "id": "unsloth/phi-4-bnb-4bit", "name": "phi-4-bnb-4bit", "developer": "unsloth", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.673, "hfopenllm_v2/BBH": 0.677, "hfopenllm_v2/MATH Level 5": 0.4607, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4007, "hfopenllm_v2/MMLU-PRO": 0.5256 } }, { "id": "unsloth/phi-4-unsloth-bnb-4bit", "name": "phi-4-unsloth-bnb-4bit", "developer": "unsloth", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6794, "hfopenllm_v2/BBH": 0.6791, "hfopenllm_v2/MATH Level 5": 0.4562, "hfopenllm_v2/GPQA": 0.3364, "hfopenllm_v2/MUSR": 0.4034, "hfopenllm_v2/MMLU-PRO": 0.5286 } }, { "id": "upstage/SOLAR-10.7B-Instruct-v1.0", "name": "SOLAR-10.7B-Instruct-v1.0", "developer": "upstage", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4737, "hfopenllm_v2/BBH": 0.5162, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.3899, "hfopenllm_v2/MMLU-PRO": 0.3138, "reward-bench/Score": 0.7391, "reward-bench/Chat": 0.8156, "reward-bench/Chat Hard": 0.6864, "reward-bench/Safety": 0.8514, "reward-bench/Reasoning": 0.7252, "reward-bench/Prior Sets (0.5 weight)": 0.4949 } }, { "id": "upstage/SOLAR-10.7B-v1.0", "name": "SOLAR-10.7B-v1.0", "developer": "upstage", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2421, "hfopenllm_v2/BBH": 0.5094, "hfopenllm_v2/MATH Level 5": 0.0264, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.4372, "hfopenllm_v2/MMLU-PRO": 0.34 } }, { "id": "upstage/solar-pro-241126", "name": "Solar Pro", "developer": "upstage", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.602, "helm_lite/NarrativeQA": 0.753, "helm_lite/NaturalQuestions (closed-book)": 0.297, "helm_lite/OpenbookQA": 0.922, "helm_lite/MMLU": 0.679, "helm_lite/MATH": 0.567, "helm_lite/GSM8K": 0.871, "helm_lite/LegalBench": 0.67, "helm_lite/MedQA": 0.698, "helm_lite/WMT 2014": 0.169, "helm_mmlu/MMLU All Subjects": 0.776, "helm_mmlu/Abstract Algebra": 0.46, "helm_mmlu/Anatomy": 0.719, "helm_mmlu/College Physics": 0.559, "helm_mmlu/Computer Security": 0.82, "helm_mmlu/Econometrics": 0.605, "helm_mmlu/Global Facts": 0.5, "helm_mmlu/Jurisprudence": 0.898, "helm_mmlu/Philosophy": 0.817, "helm_mmlu/Professional Psychology": 0.85, "helm_mmlu/Us Foreign Policy": 0.97, "helm_mmlu/Astronomy": 0.868, "helm_mmlu/Business Ethics": 0.8, "helm_mmlu/Clinical Knowledge": 0.808, "helm_mmlu/Conceptual Physics": 0.826, "helm_mmlu/Electrical Engineering": 0.697, "helm_mmlu/Elementary Mathematics": 0.611, "helm_mmlu/Formal Logic": 0.579, "helm_mmlu/High School World History": 0.907, "helm_mmlu/Human Sexuality": 0.847, "helm_mmlu/International Law": 0.901, "helm_mmlu/Logical Fallacies": 0.865, "helm_mmlu/Machine Learning": 0.616, "helm_mmlu/Management": 0.864, "helm_mmlu/Marketing": 0.953, "helm_mmlu/Medical Genetics": 0.91, "helm_mmlu/Miscellaneous": 0.888, "helm_mmlu/Moral Scenarios": 0.811, "helm_mmlu/Nutrition": 0.859, "helm_mmlu/Prehistory": 0.867, "helm_mmlu/Public Relations": 0.764, "helm_mmlu/Security Studies": 0.82, "helm_mmlu/Sociology": 0.886, "helm_mmlu/Virology": 0.572, "helm_mmlu/World Religions": 0.883, "helm_mmlu/Mean win rate": 0.462 } }, { "id": "upstage/solar-pro-preview-instruct", "name": "solar-pro-preview-instruct", "developer": "upstage", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8416, "hfopenllm_v2/BBH": 0.6817, "hfopenllm_v2/MATH Level 5": 0.2205, "hfopenllm_v2/GPQA": 0.3708, "hfopenllm_v2/MUSR": 0.4417, "hfopenllm_v2/MMLU-PRO": 0.5273 } }, { "id": "utkmst/chimera-beta-test2-lora-merged", "name": "chimera-beta-test2-lora-merged", "developer": "utkmst", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6054, "hfopenllm_v2/BBH": 0.4796, "hfopenllm_v2/MATH Level 5": 0.0952, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.4118, "hfopenllm_v2/MMLU-PRO": 0.2992 } }, { "id": "utter-project/EuroLLM-9B", "name": "EuroLLM 9B", "developer": "unknown", "evaluator_relationship": null, "benchmark_scores": { "la_leaderboard/la_leaderboard": 25.87 } }, { "id": "uukuguy/speechless-code-mistral-7b-v1.0", "name": "speechless-code-mistral-7b-v1.0", "developer": "uukuguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3665, "hfopenllm_v2/BBH": 0.4572, "hfopenllm_v2/MATH Level 5": 0.0521, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4502, "hfopenllm_v2/MMLU-PRO": 0.3146 } }, { "id": "uukuguy/speechless-codellama-34b-v2.0", "name": "speechless-codellama-34b-v2.0", "developer": "uukuguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4604, "hfopenllm_v2/BBH": 0.4813, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3787, "hfopenllm_v2/MMLU-PRO": 0.2542 } }, { "id": "uukuguy/speechless-coder-ds-6.7b", "name": "speechless-coder-ds-6.7b", "developer": "uukuguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2505, "hfopenllm_v2/BBH": 0.4036, "hfopenllm_v2/MATH Level 5": 0.0211, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3819, "hfopenllm_v2/MMLU-PRO": 0.1719 } }, { "id": "uukuguy/speechless-instruct-mistral-7b-v0.2", "name": "speechless-instruct-mistral-7b-v0.2", "developer": "uukuguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3261, "hfopenllm_v2/BBH": 0.4607, "hfopenllm_v2/MATH Level 5": 0.0491, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4902, "hfopenllm_v2/MMLU-PRO": 0.2902 } }, { "id": "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b", "name": "speechless-llama2-hermes-orca-platypus-wizardlm-13b", "developer": "uukuguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4562, "hfopenllm_v2/BBH": 0.4846, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.4655, "hfopenllm_v2/MMLU-PRO": 0.2559 } }, { "id": "uukuguy/speechless-mistral-dolphin-orca-platypus-samantha-7b", "name": "speechless-mistral-dolphin-orca-platypus-samantha-7b", "developer": "uukuguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.37, "hfopenllm_v2/BBH": 0.4983, "hfopenllm_v2/MATH Level 5": 0.0295, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.4361, "hfopenllm_v2/MMLU-PRO": 0.299 } }, { "id": "uukuguy/speechless-zephyr-code-functionary-7b", "name": "speechless-zephyr-code-functionary-7b", "developer": "uukuguy", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2696, "hfopenllm_v2/BBH": 0.4664, "hfopenllm_v2/MATH Level 5": 0.0423, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4268, "hfopenllm_v2/MMLU-PRO": 0.3094 } }, { "id": "v000000/L3-8B-Stheno-v3.2-abliterated", "name": "L3-8B-Stheno-v3.2-abliterated", "developer": "v000000", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6718, "hfopenllm_v2/BBH": 0.5141, "hfopenllm_v2/MATH Level 5": 0.0695, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.362, "hfopenllm_v2/MMLU-PRO": 0.3604 } }, { "id": "v000000/L3.1-Niitorm-8B-DPO-t0.0001", "name": "L3.1-Niitorm-8B-DPO-t0.0001", "developer": "v000000", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7689, "hfopenllm_v2/BBH": 0.5134, "hfopenllm_v2/MATH Level 5": 0.1624, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.388, "hfopenllm_v2/MMLU-PRO": 0.3866 } }, { "id": "v000000/L3.1-Storniitova-8B", "name": "L3.1-Storniitova-8B", "developer": "v000000", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7817, "hfopenllm_v2/BBH": 0.5151, "hfopenllm_v2/MATH Level 5": 0.1465, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.4029, "hfopenllm_v2/MMLU-PRO": 0.3776 } }, { "id": "v000000/Qwen2.5-14B-Gutenberg-1e-Delta", "name": "Qwen2.5-14B-Gutenberg-1e-Delta", "developer": "v000000", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8045, "hfopenllm_v2/BBH": 0.6398, "hfopenllm_v2/MATH Level 5": 0.5264, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4073, "hfopenllm_v2/MMLU-PRO": 0.493 } }, { "id": "v000000/Qwen2.5-14B-Gutenberg-Instruct-Slerpeno", "name": "Qwen2.5-14B-Gutenberg-Instruct-Slerpeno", "developer": "v000000", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8197, "hfopenllm_v2/BBH": 0.639, "hfopenllm_v2/MATH Level 5": 0.5325, "hfopenllm_v2/GPQA": 0.3314, "hfopenllm_v2/MUSR": 0.4114, "hfopenllm_v2/MMLU-PRO": 0.4924 } }, { "id": "v000000/Qwen2.5-Lumen-14B", "name": "Qwen2.5-Lumen-14B", "developer": "v000000", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8064, "hfopenllm_v2/BBH": 0.6391, "hfopenllm_v2/MATH Level 5": 0.5363, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4114, "hfopenllm_v2/MMLU-PRO": 0.4903 } }, { "id": "V3N0M/Jenna-Tiny-2.0", "name": "Jenna-Tiny-2.0", "developer": "V3N0M", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2309, "hfopenllm_v2/BBH": 0.3148, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3367, "hfopenllm_v2/MMLU-PRO": 0.1147 } }, { "id": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct", "name": "Llama-3-SauerkrautLM-70b-Instruct", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8045, "hfopenllm_v2/BBH": 0.6663, "hfopenllm_v2/MATH Level 5": 0.2281, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4339, "hfopenllm_v2/MMLU-PRO": 0.5392 } }, { "id": "VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct", "name": "Llama-3-SauerkrautLM-8b-Instruct", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7445, "hfopenllm_v2/BBH": 0.4943, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4241, "hfopenllm_v2/MMLU-PRO": 0.3857 } }, { "id": "VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct", "name": "Llama-3.1-SauerkrautLM-70b-Instruct", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8656, "hfopenllm_v2/BBH": 0.7006, "hfopenllm_v2/MATH Level 5": 0.3693, "hfopenllm_v2/GPQA": 0.3414, "hfopenllm_v2/MUSR": 0.4711, "hfopenllm_v2/MMLU-PRO": 0.5335 } }, { "id": "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct", "name": "Llama-3.1-SauerkrautLM-8b-Instruct", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8017, "hfopenllm_v2/BBH": 0.5115, "hfopenllm_v2/MATH Level 5": 0.1941, "hfopenllm_v2/GPQA": 0.2903, "hfopenllm_v2/MUSR": 0.4148, "hfopenllm_v2/MMLU-PRO": 0.389 } }, { "id": "VAGOsolutions/SauerkrautLM-1.5b", "name": "SauerkrautLM-1.5b", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2404, "hfopenllm_v2/BBH": 0.3704, "hfopenllm_v2/MATH Level 5": 0.0363, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3739, "hfopenllm_v2/MMLU-PRO": 0.2151 } }, { "id": "VAGOsolutions/SauerkrautLM-7b-HerO", "name": "SauerkrautLM-7b-HerO", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5346, "hfopenllm_v2/BBH": 0.4904, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3924, "hfopenllm_v2/MMLU-PRO": 0.3046 } }, { "id": "VAGOsolutions/SauerkrautLM-7b-LaserChat", "name": "SauerkrautLM-7b-LaserChat", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5988, "hfopenllm_v2/BBH": 0.4543, "hfopenllm_v2/MATH Level 5": 0.0778, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4148, "hfopenllm_v2/MMLU-PRO": 0.3305 } }, { "id": "VAGOsolutions/SauerkrautLM-gemma-2-2b-it", "name": "SauerkrautLM-gemma-2-2b-it", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1321, "hfopenllm_v2/BBH": 0.4241, "hfopenllm_v2/MATH Level 5": 0.0219, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3995, "hfopenllm_v2/MMLU-PRO": 0.2693 } }, { "id": "VAGOsolutions/SauerkrautLM-gemma-2-9b-it", "name": "SauerkrautLM-gemma-2-9b-it", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3024, "hfopenllm_v2/BBH": 0.6073, "hfopenllm_v2/MATH Level 5": 0.0838, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.4318, "hfopenllm_v2/MMLU-PRO": 0.4091 } }, { "id": "VAGOsolutions/SauerkrautLM-Gemma-2b", "name": "SauerkrautLM-Gemma-2b", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2475, "hfopenllm_v2/BBH": 0.3416, "hfopenllm_v2/MATH Level 5": 0.0279, "hfopenllm_v2/GPQA": 0.2567, "hfopenllm_v2/MUSR": 0.3676, "hfopenllm_v2/MMLU-PRO": 0.1469 } }, { "id": "VAGOsolutions/SauerkrautLM-Gemma-7b", "name": "SauerkrautLM-Gemma-7b", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3407, "hfopenllm_v2/BBH": 0.4188, "hfopenllm_v2/MATH Level 5": 0.0672, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.3594, "hfopenllm_v2/MMLU-PRO": 0.2961 } }, { "id": "VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct", "name": "SauerkrautLM-Mixtral-8x7B-Instruct", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5602, "hfopenllm_v2/BBH": 0.5277, "hfopenllm_v2/MATH Level 5": 0.0982, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.4204, "hfopenllm_v2/MMLU-PRO": 0.365 } }, { "id": "VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct", "name": "SauerkrautLM-Nemo-12b-Instruct", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6113, "hfopenllm_v2/BBH": 0.5214, "hfopenllm_v2/MATH Level 5": 0.1224, "hfopenllm_v2/GPQA": 0.3096, "hfopenllm_v2/MUSR": 0.4469, "hfopenllm_v2/MMLU-PRO": 0.3385 } }, { "id": "VAGOsolutions/SauerkrautLM-Phi-3-medium", "name": "SauerkrautLM-Phi-3-medium", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4409, "hfopenllm_v2/BBH": 0.6433, "hfopenllm_v2/MATH Level 5": 0.1601, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.4845, "hfopenllm_v2/MMLU-PRO": 0.4665 } }, { "id": "VAGOsolutions/SauerkrautLM-SOLAR-Instruct", "name": "SauerkrautLM-SOLAR-Instruct", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4917, "hfopenllm_v2/BBH": 0.5169, "hfopenllm_v2/MATH Level 5": 0.0634, "hfopenllm_v2/GPQA": 0.3054, "hfopenllm_v2/MUSR": 0.3965, "hfopenllm_v2/MMLU-PRO": 0.3183 } }, { "id": "VAGOsolutions/SauerkrautLM-v2-14b-DPO", "name": "SauerkrautLM-v2-14b-DPO", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7412, "hfopenllm_v2/BBH": 0.656, "hfopenllm_v2/MATH Level 5": 0.3165, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4375, "hfopenllm_v2/MMLU-PRO": 0.5117 } }, { "id": "VAGOsolutions/SauerkrautLM-v2-14b-SFT", "name": "SauerkrautLM-v2-14b-SFT", "developer": "VAGOsolutions", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6949, "hfopenllm_v2/BBH": 0.621, "hfopenllm_v2/MATH Level 5": 0.3285, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.4179, "hfopenllm_v2/MMLU-PRO": 0.5205 } }, { "id": "ValiantLabs/Llama3-70B-Fireplace", "name": "Llama3-70B-Fireplace", "developer": "ValiantLabs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7774, "hfopenllm_v2/BBH": 0.6489, "hfopenllm_v2/MATH Level 5": 0.2145, "hfopenllm_v2/GPQA": 0.3549, "hfopenllm_v2/MUSR": 0.4449, "hfopenllm_v2/MMLU-PRO": 0.4893 } }, { "id": "ValiantLabs/Llama3-70B-ShiningValiant2", "name": "Llama3-70B-ShiningValiant2", "developer": "ValiantLabs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6122, "hfopenllm_v2/BBH": 0.6338, "hfopenllm_v2/MATH Level 5": 0.2077, "hfopenllm_v2/GPQA": 0.3305, "hfopenllm_v2/MUSR": 0.4326, "hfopenllm_v2/MMLU-PRO": 0.4898 } }, { "id": "ValiantLabs/Llama3.1-70B-ShiningValiant2", "name": "Llama3.1-70B-ShiningValiant2", "developer": "ValiantLabs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5355, "hfopenllm_v2/BBH": 0.6738, "hfopenllm_v2/MATH Level 5": 0.2915, "hfopenllm_v2/GPQA": 0.3926, "hfopenllm_v2/MUSR": 0.4681, "hfopenllm_v2/MMLU-PRO": 0.5173 } }, { "id": "ValiantLabs/Llama3.1-8B-Cobalt", "name": "Llama3.1-8B-Cobalt", "developer": "ValiantLabs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3496, "hfopenllm_v2/BBH": 0.4947, "hfopenllm_v2/MATH Level 5": 0.1269, "hfopenllm_v2/GPQA": 0.3037, "hfopenllm_v2/MUSR": 0.3959, "hfopenllm_v2/MMLU-PRO": 0.3644 } }, { "id": "ValiantLabs/Llama3.1-8B-Enigma", "name": "Llama3.1-8B-Enigma", "developer": "ValiantLabs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2681, "hfopenllm_v2/BBH": 0.4478, "hfopenllm_v2/MATH Level 5": 0.0891, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.4196, "hfopenllm_v2/MMLU-PRO": 0.3409 } }, { "id": "ValiantLabs/Llama3.1-8B-Esper2", "name": "Llama3.1-8B-Esper2", "developer": "ValiantLabs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2567, "hfopenllm_v2/BBH": 0.447, "hfopenllm_v2/MATH Level 5": 0.0589, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3561, "hfopenllm_v2/MMLU-PRO": 0.2904 } }, { "id": "ValiantLabs/Llama3.1-8B-Fireplace2", "name": "Llama3.1-8B-Fireplace2", "developer": "ValiantLabs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5328, "hfopenllm_v2/BBH": 0.4613, "hfopenllm_v2/MATH Level 5": 0.0876, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.3367, "hfopenllm_v2/MMLU-PRO": 0.2424 } }, { "id": "ValiantLabs/Llama3.1-8B-ShiningValiant2", "name": "Llama3.1-8B-ShiningValiant2", "developer": "ValiantLabs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2678, "hfopenllm_v2/BBH": 0.4429, "hfopenllm_v2/MATH Level 5": 0.0521, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.3959, "hfopenllm_v2/MMLU-PRO": 0.2927 } }, { "id": "ValiantLabs/Llama3.2-3B-Enigma", "name": "Llama3.2-3B-Enigma", "developer": "ValiantLabs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2786, "hfopenllm_v2/BBH": 0.3723, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3921, "hfopenllm_v2/MMLU-PRO": 0.2428 } }, { "id": "ValiantLabs/Llama3.2-3B-Esper2", "name": "Llama3.2-3B-Esper2", "developer": "ValiantLabs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.275, "hfopenllm_v2/BBH": 0.3808, "hfopenllm_v2/MATH Level 5": 0.0363, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.355, "hfopenllm_v2/MMLU-PRO": 0.2257 } }, { "id": "ValiantLabs/Llama3.2-3B-ShiningValiant2", "name": "Llama3.2-3B-ShiningValiant2", "developer": "ValiantLabs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2625, "hfopenllm_v2/BBH": 0.4226, "hfopenllm_v2/MATH Level 5": 0.0823, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3866, "hfopenllm_v2/MMLU-PRO": 0.2829 } }, { "id": "vhab10/llama-3-8b-merged-linear", "name": "llama-3-8b-merged-linear", "developer": "vhab10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5917, "hfopenllm_v2/BBH": 0.4937, "hfopenllm_v2/MATH Level 5": 0.0816, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.4191, "hfopenllm_v2/MMLU-PRO": 0.3704 } }, { "id": "vhab10/Llama-3.1-8B-Base-Instruct-SLERP", "name": "Llama-3.1-8B-Base-Instruct-SLERP", "developer": "vhab10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2907, "hfopenllm_v2/BBH": 0.5057, "hfopenllm_v2/MATH Level 5": 0.1201, "hfopenllm_v2/GPQA": 0.2961, "hfopenllm_v2/MUSR": 0.4011, "hfopenllm_v2/MMLU-PRO": 0.3621 } }, { "id": "vhab10/Llama-3.2-Instruct-3B-TIES", "name": "Llama-3.2-Instruct-3B-TIES", "developer": "vhab10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4727, "hfopenllm_v2/BBH": 0.4332, "hfopenllm_v2/MATH Level 5": 0.0982, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3497, "hfopenllm_v2/MMLU-PRO": 0.2916 } }, { "id": "vicgalle/CarbonBeagle-11B", "name": "CarbonBeagle-11B", "developer": "vicgalle", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5415, "hfopenllm_v2/BBH": 0.5294, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.402, "hfopenllm_v2/MMLU-PRO": 0.3276 } }, { "id": "vicgalle/CarbonBeagle-11B-truthy", "name": "CarbonBeagle-11B-truthy", "developer": "vicgalle", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5212, "hfopenllm_v2/BBH": 0.5348, "hfopenllm_v2/MATH Level 5": 0.0491, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.374, "hfopenllm_v2/MMLU-PRO": 0.3357 } }, { "id": "vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B", "name": "Configurable-Hermes-2-Pro-Llama-3-8B", "developer": "vicgalle", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5763, "hfopenllm_v2/BBH": 0.5055, "hfopenllm_v2/MATH Level 5": 0.0763, "hfopenllm_v2/GPQA": 0.297, "hfopenllm_v2/MUSR": 0.4184, "hfopenllm_v2/MMLU-PRO": 0.3098 } }, { "id": "vicgalle/Configurable-Llama-3.1-8B-Instruct", "name": "Configurable-Llama-3.1-8B-Instruct", "developer": "vicgalle", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8312, "hfopenllm_v2/BBH": 0.5045, "hfopenllm_v2/MATH Level 5": 0.173, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3845, "hfopenllm_v2/MMLU-PRO": 0.3592 } }, { "id": "vicgalle/Configurable-Yi-1.5-9B-Chat", "name": "Configurable-Yi-1.5-9B-Chat", "developer": "vicgalle", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4323, "hfopenllm_v2/BBH": 0.5452, "hfopenllm_v2/MATH Level 5": 0.2047, "hfopenllm_v2/GPQA": 0.3431, "hfopenllm_v2/MUSR": 0.4271, "hfopenllm_v2/MMLU-PRO": 0.4015 } }, { "id": "vicgalle/ConfigurableBeagle-11B", "name": "ConfigurableBeagle-11B", "developer": "vicgalle", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5834, "hfopenllm_v2/BBH": 0.5287, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.3953, "hfopenllm_v2/MMLU-PRO": 0.3374 } }, { "id": "vicgalle/ConfigurableHermes-7B", "name": "ConfigurableHermes-7B", "developer": "vicgalle", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5411, "hfopenllm_v2/BBH": 0.4573, "hfopenllm_v2/MATH Level 5": 0.0476, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.4057, "hfopenllm_v2/MMLU-PRO": 0.3025 } }, { "id": "vicgalle/ConfigurableSOLAR-10.7B", "name": "ConfigurableSOLAR-10.7B", "developer": "vicgalle", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.51, "hfopenllm_v2/BBH": 0.4867, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.3805, "hfopenllm_v2/MMLU-PRO": 0.3173 } }, { "id": "vicgalle/Humanish-RP-Llama-3.1-8B", "name": "Humanish-RP-Llama-3.1-8B", "developer": "vicgalle", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6669, "hfopenllm_v2/BBH": 0.51, "hfopenllm_v2/MATH Level 5": 0.1518, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.3952, "hfopenllm_v2/MMLU-PRO": 0.3477 } }, { "id": "vicgalle/Merge-Mistral-Prometheus-7B", "name": "Merge-Mistral-Prometheus-7B", "developer": "vicgalle", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4848, "hfopenllm_v2/BBH": 0.4201, "hfopenllm_v2/MATH Level 5": 0.0181, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.41, "hfopenllm_v2/MMLU-PRO": 0.2717 } }, { "id": "vicgalle/Merge-Mixtral-Prometheus-8x7B", "name": "Merge-Mixtral-Prometheus-8x7B", "developer": "vicgalle", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5744, "hfopenllm_v2/BBH": 0.5351, "hfopenllm_v2/MATH Level 5": 0.0929, "hfopenllm_v2/GPQA": 0.3087, "hfopenllm_v2/MUSR": 0.4098, "hfopenllm_v2/MMLU-PRO": 0.3684 } }, { "id": "vicgalle/Roleplay-Llama-3-8B", "name": "Roleplay-Llama-3-8B", "developer": "vicgalle", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.732, "hfopenllm_v2/BBH": 0.5012, "hfopenllm_v2/MATH Level 5": 0.0914, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3529, "hfopenllm_v2/MMLU-PRO": 0.3708 } }, { "id": "viettelsecurity-ai/security-llama3.2-3b", "name": "security-llama3.2-3b", "developer": "viettelsecurity-ai", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5909, "hfopenllm_v2/BBH": 0.4401, "hfopenllm_v2/MATH Level 5": 0.1261, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3379, "hfopenllm_v2/MMLU-PRO": 0.2837 } }, { "id": "vihangd/smart-dan-sft-v0.1", "name": "smart-dan-sft-v0.1", "developer": "vihangd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1576, "hfopenllm_v2/BBH": 0.3062, "hfopenllm_v2/MATH Level 5": 0.0098, "hfopenllm_v2/GPQA": 0.255, "hfopenllm_v2/MUSR": 0.3502, "hfopenllm_v2/MMLU-PRO": 0.1142 } }, { "id": "Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24", "name": "Vikhr-Llama3.1-8B-Instruct-R-21-09-24", "developer": "Vikhrmodels", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6431, "hfopenllm_v2/BBH": 0.5272, "hfopenllm_v2/MATH Level 5": 0.2175, "hfopenllm_v2/GPQA": 0.245, "hfopenllm_v2/MUSR": 0.3754, "hfopenllm_v2/MMLU-PRO": 0.3547 } }, { "id": "Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24", "name": "Vikhr-Nemo-12B-Instruct-R-21-09-24", "developer": "Vikhrmodels", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5999, "hfopenllm_v2/BBH": 0.5212, "hfopenllm_v2/MATH Level 5": 0.1715, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4073, "hfopenllm_v2/MMLU-PRO": 0.3398 } }, { "id": "VIRNECT/llama-3-Korean-8B", "name": "llama-3-Korean-8B", "developer": "VIRNECT", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5058, "hfopenllm_v2/BBH": 0.4908, "hfopenllm_v2/MATH Level 5": 0.0929, "hfopenllm_v2/GPQA": 0.271, "hfopenllm_v2/MUSR": 0.3662, "hfopenllm_v2/MMLU-PRO": 0.3539 } }, { "id": "VIRNECT/llama-3-Korean-8B-r-v-0.1", "name": "llama-3-Korean-8B-r-v-0.1", "developer": "VIRNECT", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4916, "hfopenllm_v2/BBH": 0.4806, "hfopenllm_v2/MATH Level 5": 0.0861, "hfopenllm_v2/GPQA": 0.2424, "hfopenllm_v2/MUSR": 0.3675, "hfopenllm_v2/MMLU-PRO": 0.326 } }, { "id": "voidful/smol-360m-ft", "name": "smol-360m-ft", "developer": "voidful", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2013, "hfopenllm_v2/BBH": 0.3012, "hfopenllm_v2/MATH Level 5": 0.0083, "hfopenllm_v2/GPQA": 0.2458, "hfopenllm_v2/MUSR": 0.3714, "hfopenllm_v2/MMLU-PRO": 0.1087 } }, { "id": "vonjack/MobileLLM-125M-HF", "name": "MobileLLM-125M-HF", "developer": "vonjack", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2107, "hfopenllm_v2/BBH": 0.3027, "hfopenllm_v2/MATH Level 5": 0.0091, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3782, "hfopenllm_v2/MMLU-PRO": 0.1164 } }, { "id": "vonjack/Phi-3-mini-4k-instruct-LLaMAfied", "name": "Phi-3-mini-4k-instruct-LLaMAfied", "developer": "vonjack", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5787, "hfopenllm_v2/BBH": 0.5741, "hfopenllm_v2/MATH Level 5": 0.1382, "hfopenllm_v2/GPQA": 0.3305, "hfopenllm_v2/MUSR": 0.3924, "hfopenllm_v2/MMLU-PRO": 0.3885 } }, { "id": "vonjack/Phi-3.5-mini-instruct-hermes-fc-json", "name": "Phi-3.5-mini-instruct-hermes-fc-json", "developer": "vonjack", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1416, "hfopenllm_v2/BBH": 0.2975, "hfopenllm_v2/MATH Level 5": 0.0076, "hfopenllm_v2/GPQA": 0.2542, "hfopenllm_v2/MUSR": 0.4041, "hfopenllm_v2/MMLU-PRO": 0.1139 } }, { "id": "vonjack/Qwen2.5-Coder-0.5B-Merged", "name": "Qwen2.5-Coder-0.5B-Merged", "developer": "vonjack", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.31, "hfopenllm_v2/BBH": 0.3076, "hfopenllm_v2/MATH Level 5": 0.0378, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3303, "hfopenllm_v2/MMLU-PRO": 0.1202 } }, { "id": "vonjack/SmolLM2-1.7B-Merged", "name": "SmolLM2-1.7B-Merged", "developer": "vonjack", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3698, "hfopenllm_v2/BBH": 0.3587, "hfopenllm_v2/MATH Level 5": 0.0627, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3408, "hfopenllm_v2/MMLU-PRO": 0.2048 } }, { "id": "vonjack/SmolLM2-135M-Merged", "name": "SmolLM2-135M-Merged", "developer": "vonjack", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2483, "hfopenllm_v2/BBH": 0.31, "hfopenllm_v2/MATH Level 5": 0.0113, "hfopenllm_v2/GPQA": 0.2383, "hfopenllm_v2/MUSR": 0.3662, "hfopenllm_v2/MMLU-PRO": 0.1112 } }, { "id": "vonjack/SmolLM2-360M-Merged", "name": "SmolLM2-360M-Merged", "developer": "vonjack", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3206, "hfopenllm_v2/BBH": 0.3155, "hfopenllm_v2/MATH Level 5": 0.0174, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3527, "hfopenllm_v2/MMLU-PRO": 0.1098 } }, { "id": "w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored", "name": "SOLAR-10.7B-Instruct-v1.0-uncensored", "developer": "w4r10ck", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3884, "hfopenllm_v2/BBH": 0.5302, "hfopenllm_v2/MATH Level 5": 0.0657, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.4639, "hfopenllm_v2/MMLU-PRO": 0.3344 } }, { "id": "wanlige/li-14b-v0.4", "name": "li-14b-v0.4", "developer": "wanlige", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8133, "hfopenllm_v2/BBH": 0.6544, "hfopenllm_v2/MATH Level 5": 0.5574, "hfopenllm_v2/GPQA": 0.3389, "hfopenllm_v2/MUSR": 0.446, "hfopenllm_v2/MMLU-PRO": 0.5167 } }, { "id": "wanlige/li-14b-v0.4-slerp", "name": "li-14b-v0.4-slerp", "developer": "wanlige", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4606, "hfopenllm_v2/BBH": 0.6587, "hfopenllm_v2/MATH Level 5": 0.4192, "hfopenllm_v2/GPQA": 0.4002, "hfopenllm_v2/MUSR": 0.4768, "hfopenllm_v2/MMLU-PRO": 0.5372 } }, { "id": "wanlige/li-14b-v0.4-slerp0.1", "name": "li-14b-v0.4-slerp0.1", "developer": "wanlige", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7923, "hfopenllm_v2/BBH": 0.6572, "hfopenllm_v2/MATH Level 5": 0.5332, "hfopenllm_v2/GPQA": 0.3591, "hfopenllm_v2/MUSR": 0.4207, "hfopenllm_v2/MMLU-PRO": 0.5294 } }, { "id": "wannaphong/KhanomTanLLM-Instruct", "name": "KhanomTanLLM-Instruct", "developer": "wannaphong", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1621, "hfopenllm_v2/BBH": 0.3093, "hfopenllm_v2/MATH Level 5": 0.0136, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3701, "hfopenllm_v2/MMLU-PRO": 0.1119 } }, { "id": "waqasali1707/Beast-Soul-new", "name": "Beast-Soul-new", "developer": "waqasali1707", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.503, "hfopenllm_v2/BBH": 0.5225, "hfopenllm_v2/MATH Level 5": 0.0702, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4486, "hfopenllm_v2/MMLU-PRO": 0.3108 } }, { "id": "wave-on-discord/qwent-7b", "name": "qwent-7b", "developer": "wave-on-discord", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2015, "hfopenllm_v2/BBH": 0.4228, "hfopenllm_v2/MATH Level 5": 0.0038, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3817, "hfopenllm_v2/MMLU-PRO": 0.1603 } }, { "id": "weathermanj/Menda-3B-500", "name": "Menda-3B-500", "developer": "weathermanj", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6353, "hfopenllm_v2/BBH": 0.4766, "hfopenllm_v2/MATH Level 5": 0.3724, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3968, "hfopenllm_v2/MMLU-PRO": 0.3475 } }, { "id": "weathermanj/Menda-3b-750", "name": "Menda-3b-750", "developer": "weathermanj", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6335, "hfopenllm_v2/BBH": 0.4737, "hfopenllm_v2/MATH Level 5": 0.3716, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3942, "hfopenllm_v2/MMLU-PRO": 0.3506 } }, { "id": "weathermanj/Menda-3b-Optim-100", "name": "Menda-3b-Optim-100", "developer": "weathermanj", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6398, "hfopenllm_v2/BBH": 0.4735, "hfopenllm_v2/MATH Level 5": 0.3716, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.3993, "hfopenllm_v2/MMLU-PRO": 0.3461 } }, { "id": "weathermanj/Menda-3b-Optim-200", "name": "Menda-3b-Optim-200", "developer": "weathermanj", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6375, "hfopenllm_v2/BBH": 0.4746, "hfopenllm_v2/MATH Level 5": 0.3731, "hfopenllm_v2/GPQA": 0.2827, "hfopenllm_v2/MUSR": 0.4033, "hfopenllm_v2/MMLU-PRO": 0.3484 } }, { "id": "wenbopan/Faro-Yi-9B-DPO", "name": "wenbopan/Faro-Yi-9B-DPO", "developer": "wenbopan", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6461, "reward-bench/Chat": 0.9218, "reward-bench/Chat Hard": 0.5307, "reward-bench/Safety": 0.5514, "reward-bench/Reasoning": 0.5839, "reward-bench/Prior Sets (0.5 weight)": 0.6395 } }, { "id": "weqweasdas/hh_rlhf_rm_open_llama_3b", "name": "weqweasdas/hh_rlhf_rm_open_llama_3b", "developer": "weqweasdas", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.2498, "reward-bench/Chat": 0.8184, "reward-bench/Chat Hard": 0.3728, "reward-bench/Safety": 0.24, "reward-bench/Reasoning": 0.3281, "reward-bench/Prior Sets (0.5 weight)": 0.6564, "reward-bench/Factuality": 0.3642, "reward-bench/Precise IF": 0.275, "reward-bench/Math": 0.3497, "reward-bench/Focus": 0.2384, "reward-bench/Ties": 0.0315 } }, { "id": "weqweasdas/RM-Gemma-2B", "name": "weqweasdas/RM-Gemma-2B", "developer": "weqweasdas", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6549, "reward-bench/Factuality": 0.3705, "reward-bench/Precise IF": 0.2812, "reward-bench/Math": 0.4317, "reward-bench/Safety": 0.4986, "reward-bench/Focus": 0.2343, "reward-bench/Ties": 0.1851, "reward-bench/Chat": 0.9441, "reward-bench/Chat Hard": 0.4079, "reward-bench/Reasoning": 0.7637, "reward-bench/Prior Sets (0.5 weight)": 0.6652 } }, { "id": "weqweasdas/RM-Gemma-7B", "name": "weqweasdas/RM-Gemma-7B", "developer": "weqweasdas", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6967, "reward-bench/Factuality": 0.4926, "reward-bench/Precise IF": 0.3937, "reward-bench/Math": 0.6066, "reward-bench/Safety": 0.5784, "reward-bench/Focus": 0.497, "reward-bench/Ties": 0.4232, "reward-bench/Chat": 0.9693, "reward-bench/Chat Hard": 0.4978, "reward-bench/Reasoning": 0.7362, "reward-bench/Prior Sets (0.5 weight)": 0.7069 } }, { "id": "weqweasdas/RM-Gemma-7B-4096", "name": "weqweasdas/RM-Gemma-7B-4096", "developer": "weqweasdas", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.6922, "reward-bench/Chat": 0.9497, "reward-bench/Chat Hard": 0.5022, "reward-bench/Safety": 0.5608, "reward-bench/Reasoning": 0.7511, "reward-bench/Prior Sets (0.5 weight)": 0.7024 } }, { "id": "weqweasdas/RM-Mistral-7B", "name": "weqweasdas/RM-Mistral-7B", "developer": "weqweasdas", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.7982, "reward-bench/Factuality": 0.5937, "reward-bench/Precise IF": 0.3438, "reward-bench/Math": 0.5956, "reward-bench/Safety": 0.8703, "reward-bench/Focus": 0.7293, "reward-bench/Ties": 0.6226, "reward-bench/Chat": 0.9665, "reward-bench/Chat Hard": 0.6053, "reward-bench/Reasoning": 0.7736, "reward-bench/Prior Sets (0.5 weight)": 0.753 } }, { "id": "Weyaxi/Bagel-Hermes-2x34B", "name": "Bagel-Hermes-2x34B", "developer": "Weyaxi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5432, "hfopenllm_v2/BBH": 0.4917, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4517, "hfopenllm_v2/MMLU-PRO": 0.4589 } }, { "id": "Weyaxi/Bagel-Hermes-34B-Slerp", "name": "Bagel-Hermes-34B-Slerp", "developer": "Weyaxi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4603, "hfopenllm_v2/BBH": 0.5922, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.4622, "hfopenllm_v2/MMLU-PRO": 0.4703 } }, { "id": "Weyaxi/Einstein-v4-7B", "name": "Einstein-v4-7B", "developer": "Weyaxi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4708, "hfopenllm_v2/BBH": 0.3849, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4682, "hfopenllm_v2/MMLU-PRO": 0.2259 } }, { "id": "Weyaxi/Einstein-v6.1-developed-by-Weyaxi-Llama3-8B", "name": "Einstein-v6.1-developed-by-Weyaxi-Llama3-8B", "developer": "Weyaxi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3927, "hfopenllm_v2/BBH": 0.5044, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.4332, "hfopenllm_v2/MMLU-PRO": 0.3093 } }, { "id": "Weyaxi/Einstein-v6.1-Llama3-8B", "name": "Einstein-v6.1-Llama3-8B", "developer": "Weyaxi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4568, "hfopenllm_v2/BBH": 0.5008, "hfopenllm_v2/MATH Level 5": 0.068, "hfopenllm_v2/GPQA": 0.2819, "hfopenllm_v2/MUSR": 0.4213, "hfopenllm_v2/MMLU-PRO": 0.3131 } }, { "id": "Weyaxi/Einstein-v7-Qwen2-7B", "name": "Einstein-v7-Qwen2-7B", "developer": "Weyaxi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.41, "hfopenllm_v2/BBH": 0.5161, "hfopenllm_v2/MATH Level 5": 0.1994, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.44, "hfopenllm_v2/MMLU-PRO": 0.4096 } }, { "id": "Weyaxi/Einstein-v8-Llama3.2-1B", "name": "Einstein-v8-Llama3.2-1B", "developer": "Weyaxi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1862, "hfopenllm_v2/BBH": 0.3018, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3618, "hfopenllm_v2/MMLU-PRO": 0.1161 } }, { "id": "Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct", "name": "SauerkrautLM-UNA-SOLAR-Instruct", "developer": "Weyaxi", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4573, "hfopenllm_v2/BBH": 0.5166, "hfopenllm_v2/MATH Level 5": 0.0461, "hfopenllm_v2/GPQA": 0.3112, "hfopenllm_v2/MUSR": 0.3979, "hfopenllm_v2/MMLU-PRO": 0.3153 } }, { "id": "win10/ArliAI-RPMax-v1.3-merge-13.3B", "name": "ArliAI-RPMax-v1.3-merge-13.3B", "developer": "win10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3038, "hfopenllm_v2/BBH": 0.4581, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.4325, "hfopenllm_v2/MMLU-PRO": 0.32 } }, { "id": "win10/Breeze-13B-32k-Instruct-v1_0", "name": "Breeze-13B-32k-Instruct-v1_0", "developer": "win10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3584, "hfopenllm_v2/BBH": 0.4611, "hfopenllm_v2/MATH Level 5": 0.0128, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.4202, "hfopenllm_v2/MMLU-PRO": 0.2568 } }, { "id": "win10/EVA-Norns-Qwen2.5-v0.1", "name": "EVA-Norns-Qwen2.5-v0.1", "developer": "win10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.622, "hfopenllm_v2/BBH": 0.5072, "hfopenllm_v2/MATH Level 5": 0.2613, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.4045, "hfopenllm_v2/MMLU-PRO": 0.3425 } }, { "id": "win10/Llama-3.2-3B-Instruct-24-9-29", "name": "Llama-3.2-3B-Instruct-24-9-29", "developer": "win10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7332, "hfopenllm_v2/BBH": 0.4614, "hfopenllm_v2/MATH Level 5": 0.1707, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3555, "hfopenllm_v2/MMLU-PRO": 0.3228 } }, { "id": "win10/llama3-13.45b-Instruct", "name": "llama3-13.45b-Instruct", "developer": "win10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4144, "hfopenllm_v2/BBH": 0.4865, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3848, "hfopenllm_v2/MMLU-PRO": 0.3345 } }, { "id": "win10/miscii-14b-1M-0128", "name": "miscii-14b-1M-0128", "developer": "win10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4181, "hfopenllm_v2/BBH": 0.5742, "hfopenllm_v2/MATH Level 5": 0.4773, "hfopenllm_v2/GPQA": 0.3826, "hfopenllm_v2/MUSR": 0.5431, "hfopenllm_v2/MMLU-PRO": 0.4491 } }, { "id": "win10/Norns-Qwen2.5-12B", "name": "Norns-Qwen2.5-12B", "developer": "win10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4897, "hfopenllm_v2/BBH": 0.4619, "hfopenllm_v2/MATH Level 5": 0.0838, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3555, "hfopenllm_v2/MMLU-PRO": 0.266 } }, { "id": "win10/Norns-Qwen2.5-7B", "name": "Norns-Qwen2.5-7B", "developer": "win10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6122, "hfopenllm_v2/BBH": 0.5073, "hfopenllm_v2/MATH Level 5": 0.2628, "hfopenllm_v2/GPQA": 0.2844, "hfopenllm_v2/MUSR": 0.4085, "hfopenllm_v2/MMLU-PRO": 0.3413 } }, { "id": "win10/Qwen2.5-2B-Instruct", "name": "Qwen2.5-2B-Instruct", "developer": "win10", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2273, "hfopenllm_v2/BBH": 0.3706, "hfopenllm_v2/MATH Level 5": 0.0227, "hfopenllm_v2/GPQA": 0.2676, "hfopenllm_v2/MUSR": 0.4378, "hfopenllm_v2/MMLU-PRO": 0.1934 } }, { "id": "winglian/llama-3-8b-256k-PoSE", "name": "llama-3-8b-256k-PoSE", "developer": "winglian", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2909, "hfopenllm_v2/BBH": 0.3157, "hfopenllm_v2/MATH Level 5": 0.0196, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3316, "hfopenllm_v2/MMLU-PRO": 0.1116 } }, { "id": "winglian/Llama-3-8b-64k-PoSE", "name": "Llama-3-8b-64k-PoSE", "developer": "winglian", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2857, "hfopenllm_v2/BBH": 0.3702, "hfopenllm_v2/MATH Level 5": 0.0415, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3396, "hfopenllm_v2/MMLU-PRO": 0.2467 } }, { "id": "WizardLMTeam/WizardLM-13B-V1.0", "name": "WizardLM-13B-V1.0", "developer": "WizardLMTeam", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.185, "hfopenllm_v2/BBH": 0.2913, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3497, "hfopenllm_v2/MMLU-PRO": 0.1166 } }, { "id": "WizardLMTeam/WizardLM-13B-V1.2", "name": "WizardLM-13B-V1.2", "developer": "WizardLMTeam", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3392, "hfopenllm_v2/BBH": 0.4462, "hfopenllm_v2/MATH Level 5": 0.0189, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.4378, "hfopenllm_v2/MMLU-PRO": 0.2519 } }, { "id": "WizardLMTeam/WizardLM-70B-V1.0", "name": "WizardLM-70B-V1.0", "developer": "WizardLMTeam", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4951, "hfopenllm_v2/BBH": 0.559, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.4391, "hfopenllm_v2/MMLU-PRO": 0.3447 } }, { "id": "Wladastic/Mini-Think-Base-1B", "name": "Mini-Think-Base-1B", "developer": "Wladastic", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5588, "hfopenllm_v2/BBH": 0.3574, "hfopenllm_v2/MATH Level 5": 0.0733, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3275, "hfopenllm_v2/MMLU-PRO": 0.1772 } }, { "id": "writer/InstructPalmyra-30B", "name": "InstructPalmyra 30B", "developer": "writer", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.568, "helm_classic/MMLU": 0.403, "helm_classic/BoolQ": 0.751, "helm_classic/NarrativeQA": 0.496, "helm_classic/NaturalQuestions (open-book)": 0.682, "helm_classic/QuAC": 0.433, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.185, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": 0.152, "helm_classic/XSUM": 0.104, "helm_classic/IMDB": 0.94, "helm_classic/CivilComments": 0.555, "helm_classic/RAFT": 0.652 } }, { "id": "writer/palmyra-fin", "name": "Palmyra Fin", "developer": "writer", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.577, "helm_capabilities/MMLU-Pro": 0.591, "helm_capabilities/GPQA": 0.422, "helm_capabilities/IFEval": 0.793, "helm_capabilities/WildBench": 0.783, "helm_capabilities/Omni-MATH": 0.295 } }, { "id": "writer/palmyra-med", "name": "Palmyra Med", "developer": "writer", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.476, "helm_capabilities/MMLU-Pro": 0.411, "helm_capabilities/GPQA": 0.368, "helm_capabilities/IFEval": 0.767, "helm_capabilities/WildBench": 0.676, "helm_capabilities/Omni-MATH": 0.156 } }, { "id": "writer/palmyra-x-004", "name": "Palmyra-X-004", "developer": "writer", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.609, "helm_capabilities/MMLU-Pro": 0.657, "helm_capabilities/GPQA": 0.395, "helm_capabilities/IFEval": 0.872, "helm_capabilities/WildBench": 0.802, "helm_capabilities/Omni-MATH": 0.32, "helm_lite/Mean win rate": 0.808, "helm_lite/NarrativeQA": 0.773, "helm_lite/NaturalQuestions (closed-book)": 0.457, "helm_lite/OpenbookQA": 0.926, "helm_lite/MMLU": 0.739, "helm_lite/MATH": 0.767, "helm_lite/GSM8K": 0.905, "helm_lite/LegalBench": 0.73, "helm_lite/MedQA": 0.775, "helm_lite/WMT 2014": 0.203, "helm_mmlu/MMLU All Subjects": 0.813, "helm_mmlu/Abstract Algebra": 0.75, "helm_mmlu/Anatomy": 0.822, "helm_mmlu/College Physics": 0.647, "helm_mmlu/Computer Security": 0.82, "helm_mmlu/Econometrics": 0.684, "helm_mmlu/Global Facts": 0.62, "helm_mmlu/Jurisprudence": 0.843, "helm_mmlu/Philosophy": 0.83, "helm_mmlu/Professional Psychology": 0.845, "helm_mmlu/Us Foreign Policy": 0.92, "helm_mmlu/Astronomy": 0.928, "helm_mmlu/Business Ethics": 0.76, "helm_mmlu/Clinical Knowledge": 0.879, "helm_mmlu/Conceptual Physics": 0.885, "helm_mmlu/Electrical Engineering": 0.793, "helm_mmlu/Elementary Mathematics": 0.841, "helm_mmlu/Formal Logic": 0.579, "helm_mmlu/High School World History": 0.911, "helm_mmlu/Human Sexuality": 0.924, "helm_mmlu/International Law": 0.901, "helm_mmlu/Logical Fallacies": 0.877, "helm_mmlu/Machine Learning": 0.679, "helm_mmlu/Management": 0.903, "helm_mmlu/Marketing": 0.932, "helm_mmlu/Medical Genetics": 0.87, "helm_mmlu/Miscellaneous": 0.934, "helm_mmlu/Moral Scenarios": 0.825, "helm_mmlu/Nutrition": 0.869, "helm_mmlu/Prehistory": 0.917, "helm_mmlu/Public Relations": 0.791, "helm_mmlu/Security Studies": 0.849, "helm_mmlu/Sociology": 0.915, "helm_mmlu/Virology": 0.584, "helm_mmlu/World Religions": 0.842, "helm_mmlu/Mean win rate": 0.629 } }, { "id": "writer/palmyra-x-004-fc", "name": "palmyra-x-004 (FC)", "developer": "writer", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 76.0, "bfcl/bfcl.overall.overall_accuracy": 27.87, "bfcl/bfcl.overall.total_cost_usd": 178.15, "bfcl/bfcl.overall.latency_mean_s": 3.71, "bfcl/bfcl.overall.latency_std_s": 7.62, "bfcl/bfcl.overall.latency_p95_s": 8.04, "bfcl/bfcl.non_live.ast_accuracy": 87.46, "bfcl/bfcl.non_live.simple_ast_accuracy": 71.33, "bfcl/bfcl.non_live.multiple_ast_accuracy": 96.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 93.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 89.5, "bfcl/bfcl.live.live_accuracy": 77.87, "bfcl/bfcl.live.live_simple_ast_accuracy": 79.46, "bfcl/bfcl.live.live_multiple_ast_accuracy": 77.97, "bfcl/bfcl.live.live_parallel_ast_accuracy": 56.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 0.38, "bfcl/bfcl.multi_turn.base_accuracy": 0.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 0.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 0.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 0.5, "bfcl/bfcl.web_search.accuracy": 2.5, "bfcl/bfcl.web_search.base_accuracy": 4.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 1.0, "bfcl/bfcl.memory.accuracy": 13.12, "bfcl/bfcl.memory.kv_accuracy": 6.45, "bfcl/bfcl.memory.vector_accuracy": 14.19, "bfcl/bfcl.memory.recursive_summarization_accuracy": 18.71, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 80.99 } }, { "id": "writer/palmyra-x-v2", "name": "Palmyra X V2 33B", "developer": "writer", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.589, "helm_lite/NarrativeQA": 0.752, "helm_lite/NaturalQuestions (closed-book)": 0.428, "helm_lite/OpenbookQA": 0.878, "helm_lite/MMLU": 0.621, "helm_lite/MATH": 0.58, "helm_lite/GSM8K": 0.735, "helm_lite/LegalBench": 0.644, "helm_lite/MedQA": 0.598, "helm_lite/WMT 2014": 0.239 } }, { "id": "writer/palmyra-x-v3", "name": "Palmyra X V3 72B", "developer": "writer", "evaluator_relationship": null, "benchmark_scores": { "helm_lite/Mean win rate": 0.679, "helm_lite/NarrativeQA": 0.706, "helm_lite/NaturalQuestions (closed-book)": 0.407, "helm_lite/OpenbookQA": 0.938, "helm_lite/MMLU": 0.702, "helm_lite/MATH": 0.723, "helm_lite/GSM8K": 0.831, "helm_lite/LegalBench": 0.709, "helm_lite/MedQA": 0.684, "helm_lite/WMT 2014": 0.262, "helm_mmlu/MMLU All Subjects": 0.786, "helm_mmlu/Abstract Algebra": 0.53, "helm_mmlu/Anatomy": 0.733, "helm_mmlu/College Physics": 0.549, "helm_mmlu/Computer Security": 0.78, "helm_mmlu/Econometrics": 0.649, "helm_mmlu/Global Facts": 0.53, "helm_mmlu/Jurisprudence": 0.88, "helm_mmlu/Philosophy": 0.836, "helm_mmlu/Professional Psychology": 0.858, "helm_mmlu/Us Foreign Policy": 0.96, "helm_mmlu/Astronomy": 0.862, "helm_mmlu/Business Ethics": 0.83, "helm_mmlu/Clinical Knowledge": 0.804, "helm_mmlu/Conceptual Physics": 0.809, "helm_mmlu/Electrical Engineering": 0.772, "helm_mmlu/Elementary Mathematics": 0.661, "helm_mmlu/Formal Logic": 0.659, "helm_mmlu/High School World History": 0.911, "helm_mmlu/Human Sexuality": 0.924, "helm_mmlu/International Law": 0.909, "helm_mmlu/Logical Fallacies": 0.877, "helm_mmlu/Machine Learning": 0.625, "helm_mmlu/Management": 0.903, "helm_mmlu/Marketing": 0.94, "helm_mmlu/Medical Genetics": 0.83, "helm_mmlu/Miscellaneous": 0.894, "helm_mmlu/Moral Scenarios": 0.562, "helm_mmlu/Nutrition": 0.856, "helm_mmlu/Prehistory": 0.87, "helm_mmlu/Public Relations": 0.773, "helm_mmlu/Security Studies": 0.833, "helm_mmlu/Sociology": 0.91, "helm_mmlu/Virology": 0.572, "helm_mmlu/World Religions": 0.877, "helm_mmlu/Mean win rate": 0.325 } }, { "id": "writer/palmyra-x5", "name": "Palmyra X5", "developer": "writer", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.696, "helm_capabilities/MMLU-Pro": 0.804, "helm_capabilities/GPQA": 0.661, "helm_capabilities/IFEval": 0.823, "helm_capabilities/WildBench": 0.78, "helm_capabilities/Omni-MATH": 0.414 } }, { "id": "wzhouad/gemma-2-9b-it-WPO-HB", "name": "gemma-2-9b-it-WPO-HB", "developer": "wzhouad", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5437, "hfopenllm_v2/BBH": 0.5629, "hfopenllm_v2/MATH Level 5": 0.1533, "hfopenllm_v2/GPQA": 0.3498, "hfopenllm_v2/MUSR": 0.3675, "hfopenllm_v2/MMLU-PRO": 0.336 } }, { "id": "x0000001/Deepseek-Lumen-R1-Qwen2.5-14B", "name": "Deepseek-Lumen-R1-Qwen2.5-14B", "developer": "x0000001", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4436, "hfopenllm_v2/BBH": 0.4569, "hfopenllm_v2/MATH Level 5": 0.2779, "hfopenllm_v2/GPQA": 0.2852, "hfopenllm_v2/MUSR": 0.474, "hfopenllm_v2/MMLU-PRO": 0.4379 } }, { "id": "xai/Grok 4", "name": "Grok 4", "developer": "xAI", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Overall Pass@1": 0.152, "apex-agents/Overall Pass@8": 0.329, "apex-agents/Overall Mean Score": 0.303, "apex-agents/Investment Banking Pass@1": 0.17, "apex-agents/Management Consulting Pass@1": 0.12, "apex-agents/Corporate Law Pass@1": 0.165, "apex-agents/Corporate Lawyer Mean Score": 0.41, "apex-v1/Overall Score": 0.635 } }, { "id": "xai/grok-3-beta", "name": "Grok 3 Beta", "developer": "xAI", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.727, "helm_capabilities/MMLU-Pro": 0.788, "helm_capabilities/GPQA": 0.65, "helm_capabilities/IFEval": 0.884, "helm_capabilities/WildBench": 0.849, "helm_capabilities/Omni-MATH": 0.464 } }, { "id": "xai/grok-3-mini", "name": "grok-3-mini", "developer": "xAI", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.673, "global-mmlu-lite/Culturally Sensitive": 0.6717, "global-mmlu-lite/Culturally Agnostic": 0.6743, "global-mmlu-lite/Arabic": 0.755, "global-mmlu-lite/English": 0.5075, "global-mmlu-lite/Bengali": 0.7355, "global-mmlu-lite/German": 0.6591, "global-mmlu-lite/French": 0.485, "global-mmlu-lite/Hindi": 0.56, "global-mmlu-lite/Indonesian": 0.725, "global-mmlu-lite/Italian": 0.696, "global-mmlu-lite/Japanese": 0.6575, "global-mmlu-lite/Korean": 0.7325, "global-mmlu-lite/Portuguese": 0.6275, "global-mmlu-lite/Spanish": 0.61, "global-mmlu-lite/Swahili": 0.7625, "global-mmlu-lite/Yoruba": 0.8296, "global-mmlu-lite/Chinese": 0.5564, "global-mmlu-lite/Burmese": 0.8693 } }, { "id": "xai/grok-3-mini-beta", "name": "Grok 3 mini Beta", "developer": "xAI", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.679, "helm_capabilities/MMLU-Pro": 0.799, "helm_capabilities/GPQA": 0.675, "helm_capabilities/IFEval": 0.951, "helm_capabilities/WildBench": 0.651, "helm_capabilities/Omni-MATH": 0.318 } }, { "id": "xai/grok-4", "name": "Grok 4", "developer": "xAI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 23.1 } }, { "id": "xai/grok-4-0709", "name": "grok-4-0709", "developer": "xAI", "evaluator_relationship": null, "benchmark_scores": { "global-mmlu-lite/Global MMLU Lite": 0.8881, "global-mmlu-lite/Culturally Sensitive": 0.8862, "global-mmlu-lite/Culturally Agnostic": 0.89, "global-mmlu-lite/Arabic": 0.885, "global-mmlu-lite/English": 0.905, "global-mmlu-lite/Bengali": 0.8925, "global-mmlu-lite/German": 0.8725, "global-mmlu-lite/French": 0.875, "global-mmlu-lite/Hindi": 0.8675, "global-mmlu-lite/Indonesian": 0.89, "global-mmlu-lite/Italian": 0.9025, "global-mmlu-lite/Japanese": 0.87, "global-mmlu-lite/Korean": 0.895, "global-mmlu-lite/Portuguese": 0.8725, "global-mmlu-lite/Spanish": 0.9075, "global-mmlu-lite/Swahili": 0.91, "global-mmlu-lite/Yoruba": 0.905, "global-mmlu-lite/Chinese": 0.8525, "global-mmlu-lite/Burmese": 0.9075, "helm_capabilities/Mean score": 0.785, "helm_capabilities/MMLU-Pro": 0.851, "helm_capabilities/GPQA": 0.726, "helm_capabilities/IFEval": 0.949, "helm_capabilities/WildBench": 0.797, "helm_capabilities/Omni-MATH": 0.603 } }, { "id": "xai/grok-4-0709-fc", "name": "Grok-4-0709 (FC)", "developer": "xAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 10.0, "bfcl/bfcl.overall.overall_accuracy": 61.38, "bfcl/bfcl.overall.total_cost_usd": 355.17, "bfcl/bfcl.overall.latency_mean_s": 15.49, "bfcl/bfcl.overall.latency_std_s": 26.22, "bfcl/bfcl.overall.latency_p95_s": 44.28, "bfcl/bfcl.non_live.ast_accuracy": 85.38, "bfcl/bfcl.non_live.simple_ast_accuracy": 73.5, "bfcl/bfcl.non_live.multiple_ast_accuracy": 92.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 88.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 87.0, "bfcl/bfcl.live.live_accuracy": 75.57, "bfcl/bfcl.live.live_simple_ast_accuracy": 82.17, "bfcl/bfcl.live.live_multiple_ast_accuracy": 73.88, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 79.17, "bfcl/bfcl.multi_turn.accuracy": 33.88, "bfcl/bfcl.multi_turn.base_accuracy": 44.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 19.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 28.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 44.0, "bfcl/bfcl.web_search.accuracy": 82.0, "bfcl/bfcl.web_search.base_accuracy": 80.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 84.0, "bfcl/bfcl.memory.accuracy": 55.91, "bfcl/bfcl.memory.kv_accuracy": 57.42, "bfcl/bfcl.memory.vector_accuracy": 58.71, "bfcl/bfcl.memory.recursive_summarization_accuracy": 51.61, "bfcl/bfcl.relevance.relevance_detection_accuracy": 87.5, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 75.4 } }, { "id": "xai/grok-4-0709-prompt", "name": "Grok-4-0709 (Prompt)", "developer": "xAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 9.0, "bfcl/bfcl.overall.overall_accuracy": 62.97, "bfcl/bfcl.overall.total_cost_usd": 348.19, "bfcl/bfcl.overall.latency_mean_s": 30.38, "bfcl/bfcl.overall.latency_std_s": 36.19, "bfcl/bfcl.overall.latency_p95_s": 101.54, "bfcl/bfcl.non_live.ast_accuracy": 82.75, "bfcl/bfcl.non_live.simple_ast_accuracy": 67.0, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.5, "bfcl/bfcl.non_live.parallel_ast_accuracy": 89.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 81.5, "bfcl/bfcl.live.live_accuracy": 72.54, "bfcl/bfcl.live.live_simple_ast_accuracy": 81.78, "bfcl/bfcl.live.live_multiple_ast_accuracy": 70.18, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 47.0, "bfcl/bfcl.multi_turn.base_accuracy": 55.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 46.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 36.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 50.5, "bfcl/bfcl.web_search.accuracy": 74.0, "bfcl/bfcl.web_search.base_accuracy": 74.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 74.0, "bfcl/bfcl.memory.accuracy": 50.54, "bfcl/bfcl.memory.kv_accuracy": 43.87, "bfcl/bfcl.memory.vector_accuracy": 59.35, "bfcl/bfcl.memory.recursive_summarization_accuracy": 48.39, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 84.3, "bfcl/bfcl.format_sensitivity.max_delta": 13.0, "bfcl/bfcl.format_sensitivity.stddev": 2.88 } }, { "id": "xai/grok-4-1-fast-non-reasoning-fc", "name": "Grok-4-1-fast-non-reasoning (FC)", "developer": "xAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 12.0, "bfcl/bfcl.overall.overall_accuracy": 58.29, "bfcl/bfcl.overall.total_cost_usd": 16.27, "bfcl/bfcl.overall.latency_mean_s": 2.29, "bfcl/bfcl.overall.latency_std_s": 7.31, "bfcl/bfcl.overall.latency_p95_s": 5.34, "bfcl/bfcl.non_live.ast_accuracy": 88.13, "bfcl/bfcl.non_live.simple_ast_accuracy": 76.0, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 93.0, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 90.5, "bfcl/bfcl.live.live_accuracy": 77.94, "bfcl/bfcl.live.live_simple_ast_accuracy": 82.95, "bfcl/bfcl.live.live_multiple_ast_accuracy": 76.92, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 46.75, "bfcl/bfcl.multi_turn.base_accuracy": 58.0, "bfcl/bfcl.multi_turn.miss_function_accuracy": 39.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 37.5, "bfcl/bfcl.multi_turn.long_context_accuracy": 52.0, "bfcl/bfcl.web_search.accuracy": 75.0, "bfcl/bfcl.web_search.base_accuracy": 74.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 76.0, "bfcl/bfcl.memory.accuracy": 26.24, "bfcl/bfcl.memory.kv_accuracy": 20.65, "bfcl/bfcl.memory.vector_accuracy": 20.0, "bfcl/bfcl.memory.recursive_summarization_accuracy": 38.06, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 74.09 } }, { "id": "xai/grok-4-1-fast-reasoning-fc", "name": "Grok-4-1-fast-reasoning (FC)", "developer": "xAI", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 5.0, "bfcl/bfcl.overall.overall_accuracy": 69.57, "bfcl/bfcl.overall.total_cost_usd": 17.26, "bfcl/bfcl.overall.latency_mean_s": 6.74, "bfcl/bfcl.overall.latency_std_s": 12.78, "bfcl/bfcl.overall.latency_p95_s": 17.57, "bfcl/bfcl.non_live.ast_accuracy": 88.27, "bfcl/bfcl.non_live.simple_ast_accuracy": 77.58, "bfcl/bfcl.non_live.multiple_ast_accuracy": 93.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 92.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 90.0, "bfcl/bfcl.live.live_accuracy": 78.46, "bfcl/bfcl.live.live_simple_ast_accuracy": 84.11, "bfcl/bfcl.live.live_multiple_ast_accuracy": 77.3, "bfcl/bfcl.live.live_parallel_ast_accuracy": 75.0, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 70.83, "bfcl/bfcl.multi_turn.accuracy": 58.87, "bfcl/bfcl.multi_turn.base_accuracy": 70.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 59.5, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 43.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 62.5, "bfcl/bfcl.web_search.accuracy": 82.5, "bfcl/bfcl.web_search.base_accuracy": 82.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 83.0, "bfcl/bfcl.memory.accuracy": 53.98, "bfcl/bfcl.memory.kv_accuracy": 41.29, "bfcl/bfcl.memory.vector_accuracy": 57.42, "bfcl/bfcl.memory.recursive_summarization_accuracy": 63.23, "bfcl/bfcl.relevance.relevance_detection_accuracy": 81.25, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 79.43 } }, { "id": "xai/grok-code-fast-1", "name": "Grok Code Fast 1", "developer": "xAI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 14.2 } }, { "id": "Xclbr7/Arcanum-12b", "name": "Arcanum-12b", "developer": "Xclbr7", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2907, "hfopenllm_v2/BBH": 0.5265, "hfopenllm_v2/MATH Level 5": 0.1193, "hfopenllm_v2/GPQA": 0.3205, "hfopenllm_v2/MUSR": 0.417, "hfopenllm_v2/MMLU-PRO": 0.3586 } }, { "id": "Xclbr7/caliburn-12b", "name": "caliburn-12b", "developer": "Xclbr7", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3576, "hfopenllm_v2/BBH": 0.5519, "hfopenllm_v2/MATH Level 5": 0.1125, "hfopenllm_v2/GPQA": 0.3364, "hfopenllm_v2/MUSR": 0.4292, "hfopenllm_v2/MMLU-PRO": 0.3675 } }, { "id": "Xclbr7/caliburn-v2-12b", "name": "caliburn-v2-12b", "developer": "Xclbr7", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2967, "hfopenllm_v2/BBH": 0.5141, "hfopenllm_v2/MATH Level 5": 0.105, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.437, "hfopenllm_v2/MMLU-PRO": 0.3784 } }, { "id": "Xclbr7/Hyena-12b", "name": "Hyena-12b", "developer": "Xclbr7", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3404, "hfopenllm_v2/BBH": 0.5457, "hfopenllm_v2/MATH Level 5": 0.1133, "hfopenllm_v2/GPQA": 0.2978, "hfopenllm_v2/MUSR": 0.3984, "hfopenllm_v2/MMLU-PRO": 0.3439 } }, { "id": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER", "name": "Llama3.2-1B-THREADRIPPER", "developer": "Xiaojian9992024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5576, "hfopenllm_v2/BBH": 0.3544, "hfopenllm_v2/MATH Level 5": 0.074, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.313, "hfopenllm_v2/MMLU-PRO": 0.1763 } }, { "id": "Xiaojian9992024/Llama3.2-1B-THREADRIPPER-v0.2", "name": "Llama3.2-1B-THREADRIPPER-v0.2", "developer": "Xiaojian9992024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5318, "hfopenllm_v2/BBH": 0.3528, "hfopenllm_v2/MATH Level 5": 0.0657, "hfopenllm_v2/GPQA": 0.2659, "hfopenllm_v2/MUSR": 0.3316, "hfopenllm_v2/MMLU-PRO": 0.1745 } }, { "id": "Xiaojian9992024/Phi-4-Megatron-Empathetic", "name": "Phi-4-Megatron-Empathetic", "developer": "Xiaojian9992024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.0173, "hfopenllm_v2/BBH": 0.6673, "hfopenllm_v2/MATH Level 5": 0.2696, "hfopenllm_v2/GPQA": 0.3859, "hfopenllm_v2/MUSR": 0.5071, "hfopenllm_v2/MMLU-PRO": 0.5082 } }, { "id": "Xiaojian9992024/Phi-4-mini-UNOFFICAL", "name": "Phi-4-mini-UNOFFICAL", "developer": "Xiaojian9992024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1273, "hfopenllm_v2/BBH": 0.2944, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2408, "hfopenllm_v2/MUSR": 0.3368, "hfopenllm_v2/MMLU-PRO": 0.1144 } }, { "id": "Xiaojian9992024/Qwen2.5-7B-MS-Destroyer", "name": "Qwen2.5-7B-MS-Destroyer", "developer": "Xiaojian9992024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7296, "hfopenllm_v2/BBH": 0.547, "hfopenllm_v2/MATH Level 5": 0.4592, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.427, "hfopenllm_v2/MMLU-PRO": 0.4412 } }, { "id": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview", "name": "Qwen2.5-Dyanka-7B-Preview", "developer": "Xiaojian9992024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.764, "hfopenllm_v2/BBH": 0.5543, "hfopenllm_v2/MATH Level 5": 0.4879, "hfopenllm_v2/GPQA": 0.3171, "hfopenllm_v2/MUSR": 0.4481, "hfopenllm_v2/MMLU-PRO": 0.4376 } }, { "id": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2", "name": "Qwen2.5-Dyanka-7B-Preview-v0.2", "developer": "Xiaojian9992024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6702, "hfopenllm_v2/BBH": 0.5374, "hfopenllm_v2/MATH Level 5": 0.4721, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.4467, "hfopenllm_v2/MMLU-PRO": 0.4371 } }, { "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored", "name": "Qwen2.5-THREADRIPPER-Medium-Censored", "developer": "Xiaojian9992024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8112, "hfopenllm_v2/BBH": 0.6431, "hfopenllm_v2/MATH Level 5": 0.534, "hfopenllm_v2/GPQA": 0.3347, "hfopenllm_v2/MUSR": 0.414, "hfopenllm_v2/MMLU-PRO": 0.4929 } }, { "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small", "name": "Qwen2.5-THREADRIPPER-Small", "developer": "Xiaojian9992024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7689, "hfopenllm_v2/BBH": 0.549, "hfopenllm_v2/MATH Level 5": 0.4736, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4349, "hfopenllm_v2/MMLU-PRO": 0.4357 } }, { "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition", "name": "Qwen2.5-THREADRIPPER-Small-AnniversaryEdition", "developer": "Xiaojian9992024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7404, "hfopenllm_v2/BBH": 0.5465, "hfopenllm_v2/MATH Level 5": 0.5076, "hfopenllm_v2/GPQA": 0.2685, "hfopenllm_v2/MUSR": 0.3807, "hfopenllm_v2/MMLU-PRO": 0.4393 } }, { "id": "Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp", "name": "Qwen2.5-Ultra-1.5B-25.02-Exp", "developer": "Xiaojian9992024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4073, "hfopenllm_v2/BBH": 0.4066, "hfopenllm_v2/MATH Level 5": 0.0831, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3383, "hfopenllm_v2/MMLU-PRO": 0.2641 } }, { "id": "Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B", "name": "Reflection-L3.2-JametMiniMix-3B", "developer": "Xiaojian9992024", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4619, "hfopenllm_v2/BBH": 0.439, "hfopenllm_v2/MATH Level 5": 0.1193, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3667, "hfopenllm_v2/MMLU-PRO": 0.2988 } }, { "id": "xinchen9/llama3-b8-ft-dis", "name": "llama3-b8-ft-dis", "developer": "xinchen9", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1546, "hfopenllm_v2/BBH": 0.4626, "hfopenllm_v2/MATH Level 5": 0.0393, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.3654, "hfopenllm_v2/MMLU-PRO": 0.3244 } }, { "id": "xinchen9/Llama3.1_8B_Instruct_CoT", "name": "Llama3.1_8B_Instruct_CoT", "developer": "xinchen9", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2974, "hfopenllm_v2/BBH": 0.4398, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4371, "hfopenllm_v2/MMLU-PRO": 0.2879 } }, { "id": "xinchen9/Llama3.1_CoT", "name": "Llama3.1_CoT", "developer": "xinchen9", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2246, "hfopenllm_v2/BBH": 0.4341, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.4305, "hfopenllm_v2/MMLU-PRO": 0.2739 } }, { "id": "xinchen9/Llama3.1_CoT_V1", "name": "Llama3.1_CoT_V1", "developer": "xinchen9", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2453, "hfopenllm_v2/BBH": 0.4376, "hfopenllm_v2/MATH Level 5": 0.0332, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.4572, "hfopenllm_v2/MMLU-PRO": 0.2805 } }, { "id": "xinchen9/Mistral-7B-CoT", "name": "Mistral-7B-CoT", "developer": "xinchen9", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2783, "hfopenllm_v2/BBH": 0.3873, "hfopenllm_v2/MATH Level 5": 0.0249, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.3994, "hfopenllm_v2/MMLU-PRO": 0.2284 } }, { "id": "Xkev/Llama-3.2V-11B-cot", "name": "Llama-3.2V-11B-cot", "developer": "Xkev", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4158, "hfopenllm_v2/BBH": 0.4959, "hfopenllm_v2/MATH Level 5": 0.1556, "hfopenllm_v2/GPQA": 0.2953, "hfopenllm_v2/MUSR": 0.4159, "hfopenllm_v2/MMLU-PRO": 0.3587 } }, { "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table", "name": "Llama-3-8B-Instruct-SPPO-Iter2_bt_2b-table", "developer": "xkp24", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6375, "hfopenllm_v2/BBH": 0.4912, "hfopenllm_v2/MATH Level 5": 0.0921, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.382, "hfopenllm_v2/MMLU-PRO": 0.3686 } }, { "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table", "name": "Llama-3-8B-Instruct-SPPO-Iter2_bt_8b-table", "developer": "xkp24", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7275, "hfopenllm_v2/BBH": 0.5057, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3819, "hfopenllm_v2/MMLU-PRO": 0.3697 } }, { "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table", "name": "Llama-3-8B-Instruct-SPPO-Iter2_gp_2b-table", "developer": "xkp24", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6569, "hfopenllm_v2/BBH": 0.4952, "hfopenllm_v2/MATH Level 5": 0.0891, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3594, "hfopenllm_v2/MMLU-PRO": 0.3702 } }, { "id": "xkp24/Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table", "name": "Llama-3-8B-Instruct-SPPO-Iter2_gp_8b-table", "developer": "xkp24", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6621, "hfopenllm_v2/BBH": 0.5004, "hfopenllm_v2/MATH Level 5": 0.0861, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3805, "hfopenllm_v2/MMLU-PRO": 0.36 } }, { "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001", "name": "Llama-3-8B-Instruct-SPPO-score-Iter2_bt_2b-table-0.001", "developer": "xkp24", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6042, "hfopenllm_v2/BBH": 0.4936, "hfopenllm_v2/MATH Level 5": 0.0997, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3793, "hfopenllm_v2/MMLU-PRO": 0.3708 } }, { "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002", "name": "Llama-3-8B-Instruct-SPPO-score-Iter2_bt_8b-table-0.002", "developer": "xkp24", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7132, "hfopenllm_v2/BBH": 0.4996, "hfopenllm_v2/MATH Level 5": 0.0853, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3872, "hfopenllm_v2/MMLU-PRO": 0.3664 } }, { "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001", "name": "Llama-3-8B-Instruct-SPPO-score-Iter2_gp_2b-table-0.001", "developer": "xkp24", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5947, "hfopenllm_v2/BBH": 0.4899, "hfopenllm_v2/MATH Level 5": 0.1073, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3581, "hfopenllm_v2/MMLU-PRO": 0.3704 } }, { "id": "xkp24/Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002", "name": "Llama-3-8B-Instruct-SPPO-score-Iter2_gp_8b-table-0.002", "developer": "xkp24", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6453, "hfopenllm_v2/BBH": 0.4951, "hfopenllm_v2/MATH Level 5": 0.0937, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3939, "hfopenllm_v2/MMLU-PRO": 0.353 } }, { "id": "xMaulana/FinMatcha-3B-Instruct", "name": "FinMatcha-3B-Instruct", "developer": "xMaulana", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7548, "hfopenllm_v2/BBH": 0.4536, "hfopenllm_v2/MATH Level 5": 0.1435, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3633, "hfopenllm_v2/MMLU-PRO": 0.3182 } }, { "id": "xukp20/llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table", "name": "llama-3-8b-instruct-sppo-iter1-gp-2b-tau01-table", "developer": "xukp20", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.69, "hfopenllm_v2/BBH": 0.4978, "hfopenllm_v2/MATH Level 5": 0.105, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3673, "hfopenllm_v2/MMLU-PRO": 0.3716 } }, { "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table", "name": "Llama-3-8B-Instruct-SPPO-Iter3_bt_2b-table", "developer": "xukp20", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5756, "hfopenllm_v2/BBH": 0.4901, "hfopenllm_v2/MATH Level 5": 0.0997, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.366, "hfopenllm_v2/MMLU-PRO": 0.3659 } }, { "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table", "name": "Llama-3-8B-Instruct-SPPO-Iter3_bt_8b-table", "developer": "xukp20", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7034, "hfopenllm_v2/BBH": 0.5092, "hfopenllm_v2/MATH Level 5": 0.0967, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3739, "hfopenllm_v2/MMLU-PRO": 0.3693 } }, { "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table", "name": "Llama-3-8B-Instruct-SPPO-Iter3_gp_2b-table", "developer": "xukp20", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6024, "hfopenllm_v2/BBH": 0.497, "hfopenllm_v2/MATH Level 5": 0.1042, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3674, "hfopenllm_v2/MMLU-PRO": 0.3658 } }, { "id": "xukp20/Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table", "name": "Llama-3-8B-Instruct-SPPO-Iter3_gp_8b-table", "developer": "xukp20", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.662, "hfopenllm_v2/BBH": 0.5, "hfopenllm_v2/MATH Level 5": 0.0937, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3818, "hfopenllm_v2/MMLU-PRO": 0.3615 } }, { "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001", "name": "Llama-3-8B-Instruct-SPPO-score-Iter3_bt_2b-table-0.001", "developer": "xukp20", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5336, "hfopenllm_v2/BBH": 0.4915, "hfopenllm_v2/MATH Level 5": 0.0982, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.378, "hfopenllm_v2/MMLU-PRO": 0.3625 } }, { "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002", "name": "Llama-3-8B-Instruct-SPPO-score-Iter3_bt_8b-table-0.002", "developer": "xukp20", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6852, "hfopenllm_v2/BBH": 0.5075, "hfopenllm_v2/MATH Level 5": 0.0718, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3832, "hfopenllm_v2/MMLU-PRO": 0.3621 } }, { "id": "xukp20/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001", "name": "Llama-3-8B-Instruct-SPPO-score-Iter3_gp_2b-table-0.001", "developer": "xukp20", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5482, "hfopenllm_v2/BBH": 0.4887, "hfopenllm_v2/MATH Level 5": 0.0891, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3633, "hfopenllm_v2/MMLU-PRO": 0.3671 } }, { "id": "xwen-team/Xwen-7B-Chat", "name": "Xwen-7B-Chat", "developer": "xwen-team", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6864, "hfopenllm_v2/BBH": 0.5068, "hfopenllm_v2/MATH Level 5": 0.4509, "hfopenllm_v2/GPQA": 0.2609, "hfopenllm_v2/MUSR": 0.3914, "hfopenllm_v2/MMLU-PRO": 0.429 } }, { "id": "xxx777xxxASD/L3.1-ClaudeMaid-4x8B", "name": "L3.1-ClaudeMaid-4x8B", "developer": "xxx777xxxASD", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6696, "hfopenllm_v2/BBH": 0.5071, "hfopenllm_v2/MATH Level 5": 0.1412, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4289, "hfopenllm_v2/MMLU-PRO": 0.358 } }, { "id": "yam-peleg/Hebrew-Gemma-11B-Instruct", "name": "Hebrew-Gemma-11B-Instruct", "developer": "yam-peleg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3021, "hfopenllm_v2/BBH": 0.4036, "hfopenllm_v2/MATH Level 5": 0.0657, "hfopenllm_v2/GPQA": 0.276, "hfopenllm_v2/MUSR": 0.4089, "hfopenllm_v2/MMLU-PRO": 0.2554 } }, { "id": "yam-peleg/Hebrew-Mistral-7B", "name": "Hebrew-Mistral-7B", "developer": "yam-peleg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2328, "hfopenllm_v2/BBH": 0.4334, "hfopenllm_v2/MATH Level 5": 0.0498, "hfopenllm_v2/GPQA": 0.2794, "hfopenllm_v2/MUSR": 0.3977, "hfopenllm_v2/MMLU-PRO": 0.278 } }, { "id": "yam-peleg/Hebrew-Mistral-7B-200K", "name": "Hebrew-Mistral-7B-200K", "developer": "yam-peleg", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.177, "hfopenllm_v2/BBH": 0.3411, "hfopenllm_v2/MATH Level 5": 0.031, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.374, "hfopenllm_v2/MMLU-PRO": 0.2529 } }, { "id": "yandex/YaLM-100B", "name": "YaLM 100B", "developer": "yandex", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.075, "helm_classic/MMLU": 0.243, "helm_classic/BoolQ": 0.634, "helm_classic/NarrativeQA": 0.252, "helm_classic/NaturalQuestions (open-book)": 0.227, "helm_classic/QuAC": 0.162, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.202, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": 0.017, "helm_classic/XSUM": 0.021, "helm_classic/IMDB": 0.836, "helm_classic/CivilComments": 0.49, "helm_classic/RAFT": 0.395 } }, { "id": "yanng1242/Marcoro14-7B-slerp", "name": "Marcoro14-7B-slerp", "developer": "yanng1242", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.406, "hfopenllm_v2/BBH": 0.5252, "hfopenllm_v2/MATH Level 5": 0.0748, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4686, "hfopenllm_v2/MMLU-PRO": 0.3168 } }, { "id": "Yash21/TinyYi-7B-Test", "name": "TinyYi-7B-Test", "developer": "Yash21", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1856, "hfopenllm_v2/BBH": 0.291, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2643, "hfopenllm_v2/MUSR": 0.3364, "hfopenllm_v2/MMLU-PRO": 0.1091 } }, { "id": "yasserrmd/Coder-GRPO-3B", "name": "Coder-GRPO-3B", "developer": "yasserrmd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6208, "hfopenllm_v2/BBH": 0.4469, "hfopenllm_v2/MATH Level 5": 0.3202, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.4115, "hfopenllm_v2/MMLU-PRO": 0.3197 } }, { "id": "yasserrmd/Text2SQL-1.5B", "name": "Text2SQL-1.5B", "developer": "yasserrmd", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2857, "hfopenllm_v2/BBH": 0.3858, "hfopenllm_v2/MATH Level 5": 0.068, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3942, "hfopenllm_v2/MMLU-PRO": 0.2363 } }, { "id": "ycros/BagelMIsteryTour-v2-8x7B", "name": "BagelMIsteryTour-v2-8x7B", "developer": "ycros", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5994, "hfopenllm_v2/BBH": 0.5159, "hfopenllm_v2/MATH Level 5": 0.0785, "hfopenllm_v2/GPQA": 0.3045, "hfopenllm_v2/MUSR": 0.4203, "hfopenllm_v2/MMLU-PRO": 0.3473 } }, { "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table", "name": "Llama-3-8B-Instruct-SPPO-Iter1_bt_2b-table", "developer": "yfzp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6709, "hfopenllm_v2/BBH": 0.4987, "hfopenllm_v2/MATH Level 5": 0.1118, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3727, "hfopenllm_v2/MMLU-PRO": 0.3716 } }, { "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table", "name": "Llama-3-8B-Instruct-SPPO-Iter1_bt_8b-table", "developer": "yfzp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7333, "hfopenllm_v2/BBH": 0.508, "hfopenllm_v2/MATH Level 5": 0.1035, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3806, "hfopenllm_v2/MMLU-PRO": 0.3748 } }, { "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table", "name": "Llama-3-8B-Instruct-SPPO-Iter1_gp_2b-table", "developer": "yfzp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6785, "hfopenllm_v2/BBH": 0.4941, "hfopenllm_v2/MATH Level 5": 0.1125, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3647, "hfopenllm_v2/MMLU-PRO": 0.3718 } }, { "id": "yfzp/Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table", "name": "Llama-3-8B-Instruct-SPPO-Iter1_gp_8b-table", "developer": "yfzp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7132, "hfopenllm_v2/BBH": 0.5025, "hfopenllm_v2/MATH Level 5": 0.0989, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3713, "hfopenllm_v2/MMLU-PRO": 0.3683 } }, { "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001", "name": "Llama-3-8B-Instruct-SPPO-score-Iter1_bt_2b-table-0.001", "developer": "yfzp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6496, "hfopenllm_v2/BBH": 0.4979, "hfopenllm_v2/MATH Level 5": 0.1012, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.378, "hfopenllm_v2/MMLU-PRO": 0.372 } }, { "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002", "name": "Llama-3-8B-Instruct-SPPO-score-Iter1_bt_8b-table-0.002", "developer": "yfzp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7196, "hfopenllm_v2/BBH": 0.5045, "hfopenllm_v2/MATH Level 5": 0.0876, "hfopenllm_v2/GPQA": 0.2601, "hfopenllm_v2/MUSR": 0.3831, "hfopenllm_v2/MMLU-PRO": 0.3734 } }, { "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001", "name": "Llama-3-8B-Instruct-SPPO-score-Iter1_gp_2b-table-0.001", "developer": "yfzp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6504, "hfopenllm_v2/BBH": 0.4958, "hfopenllm_v2/MATH Level 5": 0.0937, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.366, "hfopenllm_v2/MMLU-PRO": 0.3703 } }, { "id": "yfzp/Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002", "name": "Llama-3-8B-Instruct-SPPO-score-Iter1_gp_8b-table-0.002", "developer": "yfzp", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7016, "hfopenllm_v2/BBH": 0.4992, "hfopenllm_v2/MATH Level 5": 0.0869, "hfopenllm_v2/GPQA": 0.2592, "hfopenllm_v2/MUSR": 0.3779, "hfopenllm_v2/MMLU-PRO": 0.3669 } }, { "id": "yifAI/Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002", "name": "Llama-3-8B-Instruct-SPPO-score-Iter3_gp_8b-table-0.002", "developer": "yifAI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.649, "hfopenllm_v2/BBH": 0.4915, "hfopenllm_v2/MATH Level 5": 0.0755, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3899, "hfopenllm_v2/MMLU-PRO": 0.352 } }, { "id": "ylalain/ECE-PRYMMAL-YL-1B-SLERP-V8", "name": "ECE-PRYMMAL-YL-1B-SLERP-V8", "developer": "ylalain", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1505, "hfopenllm_v2/BBH": 0.3976, "hfopenllm_v2/MATH Level 5": 0.0045, "hfopenllm_v2/GPQA": 0.2894, "hfopenllm_v2/MUSR": 0.3875, "hfopenllm_v2/MMLU-PRO": 0.2384 } }, { "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17", "name": "gemma-2-2b-jpn-it-abliterated-17", "developer": "ymcki", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5082, "hfopenllm_v2/BBH": 0.4076, "hfopenllm_v2/MATH Level 5": 0.0385, "hfopenllm_v2/GPQA": 0.2718, "hfopenllm_v2/MUSR": 0.3701, "hfopenllm_v2/MMLU-PRO": 0.2455 } }, { "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24", "name": "gemma-2-2b-jpn-it-abliterated-17-18-24", "developer": "ymcki", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5055, "hfopenllm_v2/BBH": 0.3812, "hfopenllm_v2/MATH Level 5": 0.0257, "hfopenllm_v2/GPQA": 0.281, "hfopenllm_v2/MUSR": 0.3502, "hfopenllm_v2/MMLU-PRO": 0.2282 } }, { "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO", "name": "gemma-2-2b-jpn-it-abliterated-17-ORPO", "developer": "ymcki", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4748, "hfopenllm_v2/BBH": 0.3898, "hfopenllm_v2/MATH Level 5": 0.0619, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3768, "hfopenllm_v2/MMLU-PRO": 0.2191 } }, { "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca", "name": "gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca", "developer": "ymcki", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3065, "hfopenllm_v2/BBH": 0.4072, "hfopenllm_v2/MATH Level 5": 0.0325, "hfopenllm_v2/GPQA": 0.2693, "hfopenllm_v2/MUSR": 0.3969, "hfopenllm_v2/MMLU-PRO": 0.2249 } }, { "id": "ymcki/gemma-2-2b-jpn-it-abliterated-18", "name": "gemma-2-2b-jpn-it-abliterated-18", "developer": "ymcki", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5175, "hfopenllm_v2/BBH": 0.4132, "hfopenllm_v2/MATH Level 5": 0.0446, "hfopenllm_v2/GPQA": 0.2735, "hfopenllm_v2/MUSR": 0.3742, "hfopenllm_v2/MMLU-PRO": 0.2505 } }, { "id": "ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO", "name": "gemma-2-2b-jpn-it-abliterated-18-ORPO", "developer": "ymcki", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4742, "hfopenllm_v2/BBH": 0.4039, "hfopenllm_v2/MATH Level 5": 0.0468, "hfopenllm_v2/GPQA": 0.2617, "hfopenllm_v2/MUSR": 0.3953, "hfopenllm_v2/MMLU-PRO": 0.2185 } }, { "id": "ymcki/gemma-2-2b-jpn-it-abliterated-24", "name": "gemma-2-2b-jpn-it-abliterated-24", "developer": "ymcki", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4979, "hfopenllm_v2/BBH": 0.411, "hfopenllm_v2/MATH Level 5": 0.0438, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3915, "hfopenllm_v2/MMLU-PRO": 0.2473 } }, { "id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18", "name": "gemma-2-2b-ORPO-jpn-it-abliterated-18", "developer": "ymcki", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4631, "hfopenllm_v2/BBH": 0.4053, "hfopenllm_v2/MATH Level 5": 0.0431, "hfopenllm_v2/GPQA": 0.2886, "hfopenllm_v2/MUSR": 0.3754, "hfopenllm_v2/MMLU-PRO": 0.2345 } }, { "id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge", "name": "gemma-2-2b-ORPO-jpn-it-abliterated-18-merge", "developer": "ymcki", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5218, "hfopenllm_v2/BBH": 0.4147, "hfopenllm_v2/MATH Level 5": 0.0544, "hfopenllm_v2/GPQA": 0.2836, "hfopenllm_v2/MUSR": 0.3514, "hfopenllm_v2/MMLU-PRO": 0.2461 } }, { "id": "ymcki/Llama-3.1-8B-GRPO-Instruct", "name": "Llama-3.1-8B-GRPO-Instruct", "developer": "ymcki", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7445, "hfopenllm_v2/BBH": 0.5132, "hfopenllm_v2/MATH Level 5": 0.2024, "hfopenllm_v2/GPQA": 0.2945, "hfopenllm_v2/MUSR": 0.3817, "hfopenllm_v2/MMLU-PRO": 0.3738 } }, { "id": "ymcki/Llama-3.1-8B-SFT-GRPO-Instruct", "name": "Llama-3.1-8B-SFT-GRPO-Instruct", "developer": "ymcki", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3354, "hfopenllm_v2/BBH": 0.3126, "hfopenllm_v2/MATH Level 5": 0.04, "hfopenllm_v2/GPQA": 0.2534, "hfopenllm_v2/MUSR": 0.3526, "hfopenllm_v2/MMLU-PRO": 0.1098 } }, { "id": "Youlln/1PARAMMYL-8B-ModelStock", "name": "1PARAMMYL-8B-ModelStock", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5371, "hfopenllm_v2/BBH": 0.5216, "hfopenllm_v2/MATH Level 5": 0.1488, "hfopenllm_v2/GPQA": 0.3238, "hfopenllm_v2/MUSR": 0.4409, "hfopenllm_v2/MMLU-PRO": 0.4 } }, { "id": "Youlln/2PRYMMAL-Yi1.5-6B-SLERP", "name": "2PRYMMAL-Yi1.5-6B-SLERP", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2826, "hfopenllm_v2/BBH": 0.4665, "hfopenllm_v2/MATH Level 5": 0.1133, "hfopenllm_v2/GPQA": 0.307, "hfopenllm_v2/MUSR": 0.4756, "hfopenllm_v2/MMLU-PRO": 0.317 } }, { "id": "Youlln/3PRYMMAL-PHI3-3B-SLERP", "name": "3PRYMMAL-PHI3-3B-SLERP", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3656, "hfopenllm_v2/BBH": 0.5422, "hfopenllm_v2/MATH Level 5": 0.1715, "hfopenllm_v2/GPQA": 0.3263, "hfopenllm_v2/MUSR": 0.4648, "hfopenllm_v2/MMLU-PRO": 0.4002 } }, { "id": "Youlln/4PRYMMAL-GEMMA2-9B-SLERP", "name": "4PRYMMAL-GEMMA2-9B-SLERP", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2714, "hfopenllm_v2/BBH": 0.5923, "hfopenllm_v2/MATH Level 5": 0.0906, "hfopenllm_v2/GPQA": 0.3305, "hfopenllm_v2/MUSR": 0.4672, "hfopenllm_v2/MMLU-PRO": 0.421 } }, { "id": "Youlln/ECE-MIRAGE-1-12B", "name": "ECE-MIRAGE-1-12B", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.207, "hfopenllm_v2/BBH": 0.3011, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3219, "hfopenllm_v2/MMLU-PRO": 0.111 } }, { "id": "Youlln/ECE-MIRAGE-1-15B", "name": "ECE-MIRAGE-1-15B", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.207, "hfopenllm_v2/BBH": 0.3011, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2634, "hfopenllm_v2/MUSR": 0.3219, "hfopenllm_v2/MMLU-PRO": 0.111 } }, { "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3", "name": "ECE-PRYMMAL-0.5B-FT-V3", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1642, "hfopenllm_v2/BBH": 0.3093, "hfopenllm_v2/MATH Level 5": 0.003, "hfopenllm_v2/GPQA": 0.2576, "hfopenllm_v2/MUSR": 0.3644, "hfopenllm_v2/MMLU-PRO": 0.1161 } }, { "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR", "name": "ECE-PRYMMAL-0.5B-FT-V3-MUSR", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1533, "hfopenllm_v2/BBH": 0.3041, "hfopenllm_v2/MATH Level 5": 0.0242, "hfopenllm_v2/GPQA": 0.2492, "hfopenllm_v2/MUSR": 0.366, "hfopenllm_v2/MMLU-PRO": 0.1645 } }, { "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR", "name": "ECE-PRYMMAL-0.5B-FT-V4-MUSR", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1138, "hfopenllm_v2/BBH": 0.3038, "hfopenllm_v2/MATH Level 5": 0.0121, "hfopenllm_v2/GPQA": 0.2701, "hfopenllm_v2/MUSR": 0.3529, "hfopenllm_v2/MMLU-PRO": 0.1321 } }, { "id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V2", "name": "ECE-PRYMMAL-0.5B-SLERP-V2", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1612, "hfopenllm_v2/BBH": 0.2935, "hfopenllm_v2/MATH Level 5": 0.0008, "hfopenllm_v2/GPQA": 0.2743, "hfopenllm_v2/MUSR": 0.3831, "hfopenllm_v2/MMLU-PRO": 0.1095 } }, { "id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V3", "name": "ECE-PRYMMAL-0.5B-SLERP-V3", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.167, "hfopenllm_v2/BBH": 0.2938, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2517, "hfopenllm_v2/MUSR": 0.3541, "hfopenllm_v2/MMLU-PRO": 0.1087 } }, { "id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1", "name": "ECE-PRYMMAL-YL-1B-SLERP-V1", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3251, "hfopenllm_v2/BBH": 0.4209, "hfopenllm_v2/MATH Level 5": 0.1073, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4266, "hfopenllm_v2/MMLU-PRO": 0.2936 } }, { "id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2", "name": "ECE-PRYMMAL-YL-1B-SLERP-V2", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3251, "hfopenllm_v2/BBH": 0.4209, "hfopenllm_v2/MATH Level 5": 0.1073, "hfopenllm_v2/GPQA": 0.2911, "hfopenllm_v2/MUSR": 0.4266, "hfopenllm_v2/MMLU-PRO": 0.2936 } }, { "id": "Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4", "name": "ECE-PRYMMAL-YL-7B-SLERP-V4", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.251, "hfopenllm_v2/BBH": 0.377, "hfopenllm_v2/MATH Level 5": 0.0536, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3745, "hfopenllm_v2/MMLU-PRO": 0.2132 } }, { "id": "Youlln/ECE-PRYMMAL0.5-FT", "name": "ECE-PRYMMAL0.5-FT", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1851, "hfopenllm_v2/BBH": 0.3132, "hfopenllm_v2/MATH Level 5": 0.0234, "hfopenllm_v2/GPQA": 0.2559, "hfopenllm_v2/MUSR": 0.3301, "hfopenllm_v2/MMLU-PRO": 0.1477 } }, { "id": "Youlln/ECE-PRYMMAL0.5B-Youri", "name": "ECE-PRYMMAL0.5B-Youri", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1446, "hfopenllm_v2/BBH": 0.2817, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2433, "hfopenllm_v2/MUSR": 0.3697, "hfopenllm_v2/MMLU-PRO": 0.1095 } }, { "id": "Youlln/ECE-PRYMMAL1B-FT-V1", "name": "ECE-PRYMMAL1B-FT-V1", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2144, "hfopenllm_v2/BBH": 0.4033, "hfopenllm_v2/MATH Level 5": 0.0642, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.3417, "hfopenllm_v2/MMLU-PRO": 0.2743 } }, { "id": "Youlln/ECE-Qwen0.5B-FT-V2", "name": "ECE-Qwen0.5B-FT-V2", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2526, "hfopenllm_v2/BBH": 0.329, "hfopenllm_v2/MATH Level 5": 0.0204, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.3063, "hfopenllm_v2/MMLU-PRO": 0.1666 } }, { "id": "Youlln/ECE.EIFFEIL.ia-0.5B-SLERP", "name": "ECE.EIFFEIL.ia-0.5B-SLERP", "developer": "Youlln", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2561, "hfopenllm_v2/BBH": 0.3306, "hfopenllm_v2/MATH Level 5": 0.0597, "hfopenllm_v2/GPQA": 0.2651, "hfopenllm_v2/MUSR": 0.3102, "hfopenllm_v2/MMLU-PRO": 0.1903 } }, { "id": "YoungPanda/qwenqwen", "name": "qwenqwen", "developer": "YoungPanda", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1264, "hfopenllm_v2/BBH": 0.3379, "hfopenllm_v2/MATH Level 5": 0.0355, "hfopenllm_v2/GPQA": 0.25, "hfopenllm_v2/MUSR": 0.3434, "hfopenllm_v2/MMLU-PRO": 0.1168 } }, { "id": "YOYO-AI/Qwen2.5-14B-1M-YOYO-V3", "name": "Qwen2.5-14B-1M-YOYO-V3", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8398, "hfopenllm_v2/BBH": 0.6448, "hfopenllm_v2/MATH Level 5": 0.5355, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4141, "hfopenllm_v2/MMLU-PRO": 0.5207 } }, { "id": "YOYO-AI/Qwen2.5-14B-it-restore", "name": "Qwen2.5-14B-it-restore", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8209, "hfopenllm_v2/BBH": 0.6388, "hfopenllm_v2/MATH Level 5": 0.537, "hfopenllm_v2/GPQA": 0.3372, "hfopenllm_v2/MUSR": 0.4087, "hfopenllm_v2/MMLU-PRO": 0.49 } }, { "id": "YOYO-AI/Qwen2.5-14B-YOYO-0505", "name": "Qwen2.5-14B-YOYO-0505", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5883, "hfopenllm_v2/BBH": 0.6539, "hfopenllm_v2/MATH Level 5": 0.4434, "hfopenllm_v2/GPQA": 0.3733, "hfopenllm_v2/MUSR": 0.4757, "hfopenllm_v2/MMLU-PRO": 0.5371 } }, { "id": "YOYO-AI/Qwen2.5-14B-YOYO-0510-v2", "name": "Qwen2.5-14B-YOYO-0510-v2", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5947, "hfopenllm_v2/BBH": 0.6553, "hfopenllm_v2/MATH Level 5": 0.4441, "hfopenllm_v2/GPQA": 0.3817, "hfopenllm_v2/MUSR": 0.4744, "hfopenllm_v2/MMLU-PRO": 0.5381 } }, { "id": "YOYO-AI/Qwen2.5-14B-YOYO-0805", "name": "Qwen2.5-14B-YOYO-0805", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5883, "hfopenllm_v2/BBH": 0.6539, "hfopenllm_v2/MATH Level 5": 0.4434, "hfopenllm_v2/GPQA": 0.3733, "hfopenllm_v2/MUSR": 0.4757, "hfopenllm_v2/MMLU-PRO": 0.5371 } }, { "id": "YOYO-AI/Qwen2.5-14B-YOYO-1005", "name": "Qwen2.5-14B-YOYO-1005", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5972, "hfopenllm_v2/BBH": 0.6542, "hfopenllm_v2/MATH Level 5": 0.4524, "hfopenllm_v2/GPQA": 0.3809, "hfopenllm_v2/MUSR": 0.473, "hfopenllm_v2/MMLU-PRO": 0.5382 } }, { "id": "YOYO-AI/Qwen2.5-14B-YOYO-1005-v2", "name": "Qwen2.5-14B-YOYO-1005-v2", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5953, "hfopenllm_v2/BBH": 0.6551, "hfopenllm_v2/MATH Level 5": 0.4434, "hfopenllm_v2/GPQA": 0.3842, "hfopenllm_v2/MUSR": 0.4731, "hfopenllm_v2/MMLU-PRO": 0.5372 } }, { "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010", "name": "Qwen2.5-14B-YOYO-1010", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5899, "hfopenllm_v2/BBH": 0.654, "hfopenllm_v2/MATH Level 5": 0.4509, "hfopenllm_v2/GPQA": 0.3834, "hfopenllm_v2/MUSR": 0.4744, "hfopenllm_v2/MMLU-PRO": 0.5376 } }, { "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010-v2", "name": "Qwen2.5-14B-YOYO-1010-v2", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5947, "hfopenllm_v2/BBH": 0.6553, "hfopenllm_v2/MATH Level 5": 0.4441, "hfopenllm_v2/GPQA": 0.3817, "hfopenllm_v2/MUSR": 0.4744, "hfopenllm_v2/MMLU-PRO": 0.5381 } }, { "id": "YOYO-AI/Qwen2.5-14B-YOYO-latest", "name": "Qwen2.5-14B-YOYO-latest", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5911, "hfopenllm_v2/BBH": 0.6656, "hfopenllm_v2/MATH Level 5": 0.4418, "hfopenllm_v2/GPQA": 0.3826, "hfopenllm_v2/MUSR": 0.4691, "hfopenllm_v2/MMLU-PRO": 0.5371 } }, { "id": "YOYO-AI/Qwen2.5-14B-YOYO-latest-V2", "name": "Qwen2.5-14B-YOYO-latest-V2", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7771, "hfopenllm_v2/BBH": 0.6299, "hfopenllm_v2/MATH Level 5": 0.5159, "hfopenllm_v2/GPQA": 0.354, "hfopenllm_v2/MUSR": 0.4299, "hfopenllm_v2/MMLU-PRO": 0.5224 } }, { "id": "YOYO-AI/Qwen2.5-14B-YOYO-SCE", "name": "Qwen2.5-14B-YOYO-SCE", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5844, "hfopenllm_v2/BBH": 0.6489, "hfopenllm_v2/MATH Level 5": 0.4615, "hfopenllm_v2/GPQA": 0.3742, "hfopenllm_v2/MUSR": 0.4704, "hfopenllm_v2/MMLU-PRO": 0.5381 } }, { "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4", "name": "Qwen2.5-14B-YOYO-V4", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8398, "hfopenllm_v2/BBH": 0.649, "hfopenllm_v2/MATH Level 5": 0.5347, "hfopenllm_v2/GPQA": 0.3221, "hfopenllm_v2/MUSR": 0.4115, "hfopenllm_v2/MMLU-PRO": 0.517 } }, { "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p1", "name": "Qwen2.5-14B-YOYO-V4-p1", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8203, "hfopenllm_v2/BBH": 0.6516, "hfopenllm_v2/MATH Level 5": 0.5332, "hfopenllm_v2/GPQA": 0.3456, "hfopenllm_v2/MUSR": 0.4194, "hfopenllm_v2/MMLU-PRO": 0.502 } }, { "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p2", "name": "Qwen2.5-14B-YOYO-V4-p2", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8048, "hfopenllm_v2/BBH": 0.6339, "hfopenllm_v2/MATH Level 5": 0.5166, "hfopenllm_v2/GPQA": 0.3272, "hfopenllm_v2/MUSR": 0.4435, "hfopenllm_v2/MMLU-PRO": 0.4968 } }, { "id": "YOYO-AI/Qwen2.5-7B-it-restore", "name": "Qwen2.5-7B-it-restore", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7531, "hfopenllm_v2/BBH": 0.5407, "hfopenllm_v2/MATH Level 5": 0.5, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.4007, "hfopenllm_v2/MMLU-PRO": 0.4288 } }, { "id": "YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010", "name": "Qwen2.5-Coder-14B-YOYO-1010", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5336, "hfopenllm_v2/BBH": 0.6187, "hfopenllm_v2/MATH Level 5": 0.3218, "hfopenllm_v2/GPQA": 0.3523, "hfopenllm_v2/MUSR": 0.4422, "hfopenllm_v2/MMLU-PRO": 0.4075 } }, { "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B", "name": "ZYH-LLM-Qwen2.5-14B", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5941, "hfopenllm_v2/BBH": 0.6644, "hfopenllm_v2/MATH Level 5": 0.4116, "hfopenllm_v2/GPQA": 0.3859, "hfopenllm_v2/MUSR": 0.4757, "hfopenllm_v2/MMLU-PRO": 0.5351 } }, { "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2", "name": "ZYH-LLM-Qwen2.5-14B-V2", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5071, "hfopenllm_v2/BBH": 0.6452, "hfopenllm_v2/MATH Level 5": 0.3542, "hfopenllm_v2/GPQA": 0.3792, "hfopenllm_v2/MUSR": 0.4689, "hfopenllm_v2/MMLU-PRO": 0.5372 } }, { "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3", "name": "ZYH-LLM-Qwen2.5-14B-V3", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8578, "hfopenllm_v2/BBH": 0.6359, "hfopenllm_v2/MATH Level 5": 0.5272, "hfopenllm_v2/GPQA": 0.3322, "hfopenllm_v2/MUSR": 0.4022, "hfopenllm_v2/MMLU-PRO": 0.4881 } }, { "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4", "name": "ZYH-LLM-Qwen2.5-14B-V4", "developer": "YOYO-AI", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8365, "hfopenllm_v2/BBH": 0.6515, "hfopenllm_v2/MATH Level 5": 0.5393, "hfopenllm_v2/GPQA": 0.3146, "hfopenllm_v2/MUSR": 0.4434, "hfopenllm_v2/MMLU-PRO": 0.5204 } }, { "id": "yuchenxie/ArlowGPT-3B-Multilingual", "name": "ArlowGPT-3B-Multilingual", "developer": "yuchenxie", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6395, "hfopenllm_v2/BBH": 0.4301, "hfopenllm_v2/MATH Level 5": 0.1125, "hfopenllm_v2/GPQA": 0.2802, "hfopenllm_v2/MUSR": 0.3727, "hfopenllm_v2/MMLU-PRO": 0.2817 } }, { "id": "yuchenxie/ArlowGPT-8B", "name": "ArlowGPT-8B", "developer": "yuchenxie", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7847, "hfopenllm_v2/BBH": 0.508, "hfopenllm_v2/MATH Level 5": 0.2039, "hfopenllm_v2/GPQA": 0.2936, "hfopenllm_v2/MUSR": 0.3882, "hfopenllm_v2/MMLU-PRO": 0.3787 } }, { "id": "Yuma42/KangalKhan-RawRuby-7B", "name": "KangalKhan-RawRuby-7B", "developer": "Yuma42", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5477, "hfopenllm_v2/BBH": 0.4755, "hfopenllm_v2/MATH Level 5": 0.0665, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.395, "hfopenllm_v2/MMLU-PRO": 0.3023 } }, { "id": "Yuma42/Llama3.1-IgneousIguana-8B", "name": "Llama3.1-IgneousIguana-8B", "developer": "Yuma42", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8133, "hfopenllm_v2/BBH": 0.5191, "hfopenllm_v2/MATH Level 5": 0.2198, "hfopenllm_v2/GPQA": 0.3104, "hfopenllm_v2/MUSR": 0.4203, "hfopenllm_v2/MMLU-PRO": 0.3974 } }, { "id": "Yuma42/Llama3.1-SuperHawk-8B", "name": "Llama3.1-SuperHawk-8B", "developer": "Yuma42", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7986, "hfopenllm_v2/BBH": 0.52, "hfopenllm_v2/MATH Level 5": 0.2349, "hfopenllm_v2/GPQA": 0.3129, "hfopenllm_v2/MUSR": 0.4084, "hfopenllm_v2/MMLU-PRO": 0.3945 } }, { "id": "yuvraj17/Llama3-8B-abliterated-Spectrum-slerp", "name": "Llama3-8B-abliterated-Spectrum-slerp", "developer": "yuvraj17", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.2885, "hfopenllm_v2/BBH": 0.4978, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.3012, "hfopenllm_v2/MUSR": 0.3998, "hfopenllm_v2/MMLU-PRO": 0.3257 } }, { "id": "yuvraj17/Llama3-8B-SuperNova-Spectrum-dare_ties", "name": "Llama3-8B-SuperNova-Spectrum-dare_ties", "developer": "yuvraj17", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4013, "hfopenllm_v2/BBH": 0.4616, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.2752, "hfopenllm_v2/MUSR": 0.4211, "hfopenllm_v2/MMLU-PRO": 0.3574 } }, { "id": "yuvraj17/Llama3-8B-SuperNova-Spectrum-Hermes-DPO", "name": "Llama3-8B-SuperNova-Spectrum-Hermes-DPO", "developer": "yuvraj17", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4691, "hfopenllm_v2/BBH": 0.44, "hfopenllm_v2/MATH Level 5": 0.0566, "hfopenllm_v2/GPQA": 0.302, "hfopenllm_v2/MUSR": 0.4012, "hfopenllm_v2/MMLU-PRO": 0.2635 } }, { "id": "z-ai/glm-4.5", "name": "z-ai/glm-4.5", "developer": "Z.ai", "evaluator_relationship": null, "benchmark_scores": { "livecodebenchpro/Hard Problems": 0.0, "livecodebenchpro/Medium Problems": 0.028169014084507043, "livecodebenchpro/Easy Problems": 0.1267605633802817 } }, { "id": "Z1-Coder/Z1-Coder-7B", "name": "Z1-Coder-7B", "developer": "Z1-Coder", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3215, "hfopenllm_v2/BBH": 0.4842, "hfopenllm_v2/MATH Level 5": 0.3248, "hfopenllm_v2/GPQA": 0.2727, "hfopenllm_v2/MUSR": 0.3622, "hfopenllm_v2/MMLU-PRO": 0.3759 } }, { "id": "zai-org/glm-4.5-air-fp8", "name": "GLM-4.5-Air-FP8", "developer": "zai-org", "evaluator_relationship": null, "benchmark_scores": { "helm_capabilities/Mean score": 0.67, "helm_capabilities/MMLU-Pro": 0.762, "helm_capabilities/GPQA": 0.594, "helm_capabilities/IFEval": 0.812, "helm_capabilities/WildBench": 0.789, "helm_capabilities/Omni-MATH": 0.391 } }, { "id": "zake7749/gemma-2-2b-it-chinese-kyara-dpo", "name": "gemma-2-2b-it-chinese-kyara-dpo", "developer": "zake7749", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5382, "hfopenllm_v2/BBH": 0.4257, "hfopenllm_v2/MATH Level 5": 0.0838, "hfopenllm_v2/GPQA": 0.2668, "hfopenllm_v2/MUSR": 0.4576, "hfopenllm_v2/MMLU-PRO": 0.2573 } }, { "id": "zake7749/gemma-2-9b-it-chinese-kyara", "name": "gemma-2-9b-it-chinese-kyara", "developer": "zake7749", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1764, "hfopenllm_v2/BBH": 0.5954, "hfopenllm_v2/MATH Level 5": 0.105, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4242, "hfopenllm_v2/MMLU-PRO": 0.4179 } }, { "id": "zelk12/gemma-2-S2MTM-9B", "name": "gemma-2-S2MTM-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7823, "hfopenllm_v2/BBH": 0.6061, "hfopenllm_v2/MATH Level 5": 0.2047, "hfopenllm_v2/GPQA": 0.3456, "hfopenllm_v2/MUSR": 0.4218, "hfopenllm_v2/MMLU-PRO": 0.4297 } }, { "id": "zelk12/Gemma-2-TM-9B", "name": "Gemma-2-TM-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8045, "hfopenllm_v2/BBH": 0.5987, "hfopenllm_v2/MATH Level 5": 0.2024, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.4152, "hfopenllm_v2/MMLU-PRO": 0.4088 } }, { "id": "zelk12/MT-gemma-2-9B", "name": "MT-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7968, "hfopenllm_v2/BBH": 0.6064, "hfopenllm_v2/MATH Level 5": 0.2054, "hfopenllm_v2/GPQA": 0.3456, "hfopenllm_v2/MUSR": 0.4071, "hfopenllm_v2/MMLU-PRO": 0.4224 } }, { "id": "zelk12/MT-Gen1-gemma-2-9B", "name": "MT-Gen1-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7886, "hfopenllm_v2/BBH": 0.61, "hfopenllm_v2/MATH Level 5": 0.2221, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.4217, "hfopenllm_v2/MMLU-PRO": 0.4381 } }, { "id": "zelk12/MT-Gen2-gemma-2-9B", "name": "MT-Gen2-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7907, "hfopenllm_v2/BBH": 0.61, "hfopenllm_v2/MATH Level 5": 0.219, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.4323, "hfopenllm_v2/MMLU-PRO": 0.4387 } }, { "id": "zelk12/MT-Gen2-GI-gemma-2-9B", "name": "MT-Gen2-GI-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7914, "hfopenllm_v2/BBH": 0.6096, "hfopenllm_v2/MATH Level 5": 0.2205, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.4283, "hfopenllm_v2/MMLU-PRO": 0.4356 } }, { "id": "zelk12/MT-Gen3-gemma-2-9B", "name": "MT-Gen3-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.802, "hfopenllm_v2/BBH": 0.6097, "hfopenllm_v2/MATH Level 5": 0.2296, "hfopenllm_v2/GPQA": 0.349, "hfopenllm_v2/MUSR": 0.4217, "hfopenllm_v2/MMLU-PRO": 0.4356 } }, { "id": "zelk12/MT-Gen4-gemma-2-9B", "name": "MT-Gen4-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7883, "hfopenllm_v2/BBH": 0.611, "hfopenllm_v2/MATH Level 5": 0.2236, "hfopenllm_v2/GPQA": 0.3549, "hfopenllm_v2/MUSR": 0.4228, "hfopenllm_v2/MMLU-PRO": 0.4387 } }, { "id": "zelk12/MT-Gen5-gemma-2-9B", "name": "MT-Gen5-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7923, "hfopenllm_v2/BBH": 0.6133, "hfopenllm_v2/MATH Level 5": 0.2153, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4202, "hfopenllm_v2/MMLU-PRO": 0.4402 } }, { "id": "zelk12/MT-Gen6-gemma-2-9B", "name": "MT-Gen6-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1616, "hfopenllm_v2/BBH": 0.5845, "hfopenllm_v2/MATH Level 5": 0.0823, "hfopenllm_v2/GPQA": 0.3331, "hfopenllm_v2/MUSR": 0.4069, "hfopenllm_v2/MMLU-PRO": 0.4166 } }, { "id": "zelk12/MT-Gen6fix-gemma-2-9B", "name": "MT-Gen6fix-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1576, "hfopenllm_v2/BBH": 0.5917, "hfopenllm_v2/MATH Level 5": 0.0816, "hfopenllm_v2/GPQA": 0.3372, "hfopenllm_v2/MUSR": 0.4084, "hfopenllm_v2/MMLU-PRO": 0.412 } }, { "id": "zelk12/MT-Gen7-gemma-2-9B", "name": "MT-Gen7-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1664, "hfopenllm_v2/BBH": 0.5935, "hfopenllm_v2/MATH Level 5": 0.0891, "hfopenllm_v2/GPQA": 0.3356, "hfopenllm_v2/MUSR": 0.4098, "hfopenllm_v2/MMLU-PRO": 0.4122 } }, { "id": "zelk12/MT-Max-Merge_02012025163610-gemma-2-9B", "name": "MT-Max-Merge_02012025163610-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7907, "hfopenllm_v2/BBH": 0.6142, "hfopenllm_v2/MATH Level 5": 0.2213, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4228, "hfopenllm_v2/MMLU-PRO": 0.4396 } }, { "id": "zelk12/MT-Merge-gemma-2-9B", "name": "MT-Merge-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8035, "hfopenllm_v2/BBH": 0.6118, "hfopenllm_v2/MATH Level 5": 0.2205, "hfopenllm_v2/GPQA": 0.3482, "hfopenllm_v2/MUSR": 0.4256, "hfopenllm_v2/MMLU-PRO": 0.4362 } }, { "id": "zelk12/MT-Merge1-gemma-2-9B", "name": "MT-Merge1-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7901, "hfopenllm_v2/BBH": 0.61, "hfopenllm_v2/MATH Level 5": 0.2289, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4244, "hfopenllm_v2/MMLU-PRO": 0.4374 } }, { "id": "zelk12/MT-Merge2-gemma-2-9B", "name": "MT-Merge2-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7877, "hfopenllm_v2/BBH": 0.6107, "hfopenllm_v2/MATH Level 5": 0.2349, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.4217, "hfopenllm_v2/MMLU-PRO": 0.4382 } }, { "id": "zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B", "name": "MT-Merge2-MU-gemma-2-MTg2MT1g2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7956, "hfopenllm_v2/BBH": 0.6084, "hfopenllm_v2/MATH Level 5": 0.2183, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.4322, "hfopenllm_v2/MMLU-PRO": 0.4373 } }, { "id": "zelk12/MT-Merge3-gemma-2-9B", "name": "MT-Merge3-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7859, "hfopenllm_v2/BBH": 0.6102, "hfopenllm_v2/MATH Level 5": 0.2205, "hfopenllm_v2/GPQA": 0.349, "hfopenllm_v2/MUSR": 0.4258, "hfopenllm_v2/MMLU-PRO": 0.4373 } }, { "id": "zelk12/MT-Merge4-gemma-2-9B", "name": "MT-Merge4-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7807, "hfopenllm_v2/BBH": 0.6118, "hfopenllm_v2/MATH Level 5": 0.2168, "hfopenllm_v2/GPQA": 0.3523, "hfopenllm_v2/MUSR": 0.4294, "hfopenllm_v2/MMLU-PRO": 0.439 } }, { "id": "zelk12/MT-Merge5-gemma-2-9B", "name": "MT-Merge5-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7844, "hfopenllm_v2/BBH": 0.6123, "hfopenllm_v2/MATH Level 5": 0.2183, "hfopenllm_v2/GPQA": 0.3532, "hfopenllm_v2/MUSR": 0.4281, "hfopenllm_v2/MMLU-PRO": 0.4387 } }, { "id": "zelk12/MT-Merge6-gemma-2-9B", "name": "MT-Merge6-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1695, "hfopenllm_v2/BBH": 0.5949, "hfopenllm_v2/MATH Level 5": 0.0801, "hfopenllm_v2/GPQA": 0.3289, "hfopenllm_v2/MUSR": 0.4098, "hfopenllm_v2/MMLU-PRO": 0.4115 } }, { "id": "zelk12/MT1-gemma-2-9B", "name": "MT1-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7947, "hfopenllm_v2/BBH": 0.6109, "hfopenllm_v2/MATH Level 5": 0.2236, "hfopenllm_v2/GPQA": 0.3456, "hfopenllm_v2/MUSR": 0.4322, "hfopenllm_v2/MMLU-PRO": 0.4358 } }, { "id": "zelk12/MT1-Gen1-gemma-2-9B", "name": "MT1-Gen1-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7974, "hfopenllm_v2/BBH": 0.6118, "hfopenllm_v2/MATH Level 5": 0.2243, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.431, "hfopenllm_v2/MMLU-PRO": 0.4376 } }, { "id": "zelk12/MT1-Gen2-gemma-2-9B", "name": "MT1-Gen2-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7984, "hfopenllm_v2/BBH": 0.6096, "hfopenllm_v2/MATH Level 5": 0.2251, "hfopenllm_v2/GPQA": 0.3523, "hfopenllm_v2/MUSR": 0.4284, "hfopenllm_v2/MMLU-PRO": 0.4355 } }, { "id": "zelk12/MT1-Gen3-gemma-2-9B", "name": "MT1-Gen3-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.796, "hfopenllm_v2/BBH": 0.6102, "hfopenllm_v2/MATH Level 5": 0.2243, "hfopenllm_v2/GPQA": 0.349, "hfopenllm_v2/MUSR": 0.4243, "hfopenllm_v2/MMLU-PRO": 0.4349 } }, { "id": "zelk12/MT1-Gen4-gemma-2-9B", "name": "MT1-Gen4-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7941, "hfopenllm_v2/BBH": 0.6058, "hfopenllm_v2/MATH Level 5": 0.216, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4231, "hfopenllm_v2/MMLU-PRO": 0.4286 } }, { "id": "zelk12/MT1-Gen5-gemma-2-9B", "name": "MT1-Gen5-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7795, "hfopenllm_v2/BBH": 0.6017, "hfopenllm_v2/MATH Level 5": 0.2077, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.4191, "hfopenllm_v2/MMLU-PRO": 0.4222 } }, { "id": "zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B", "name": "MT1-Gen5-IF-gemma-2-S2DMv1-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7929, "hfopenllm_v2/BBH": 0.6, "hfopenllm_v2/MATH Level 5": 0.2032, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.4245, "hfopenllm_v2/MMLU-PRO": 0.4218 } }, { "id": "zelk12/MT1-Gen6-gemma-2-9B", "name": "MT1-Gen6-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1634, "hfopenllm_v2/BBH": 0.5944, "hfopenllm_v2/MATH Level 5": 0.0808, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4044, "hfopenllm_v2/MMLU-PRO": 0.4133 } }, { "id": "zelk12/MT1-Gen7-gemma-2-9B", "name": "MT1-Gen7-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1634, "hfopenllm_v2/BBH": 0.5938, "hfopenllm_v2/MATH Level 5": 0.0831, "hfopenllm_v2/GPQA": 0.328, "hfopenllm_v2/MUSR": 0.4111, "hfopenllm_v2/MMLU-PRO": 0.4145 } }, { "id": "zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B", "name": "MT1-Max-Merge_02012025163610-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7929, "hfopenllm_v2/BBH": 0.6123, "hfopenllm_v2/MATH Level 5": 0.2228, "hfopenllm_v2/GPQA": 0.3549, "hfopenllm_v2/MUSR": 0.4255, "hfopenllm_v2/MMLU-PRO": 0.4382 } }, { "id": "zelk12/MT2-gemma-2-9B", "name": "MT2-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7886, "hfopenllm_v2/BBH": 0.6115, "hfopenllm_v2/MATH Level 5": 0.2213, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4217, "hfopenllm_v2/MMLU-PRO": 0.4368 } }, { "id": "zelk12/MT2-Gen1-gemma-2-9B", "name": "MT2-Gen1-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7856, "hfopenllm_v2/BBH": 0.6101, "hfopenllm_v2/MATH Level 5": 0.2213, "hfopenllm_v2/GPQA": 0.3431, "hfopenllm_v2/MUSR": 0.4243, "hfopenllm_v2/MMLU-PRO": 0.4377 } }, { "id": "zelk12/MT2-Gen2-gemma-2-9B", "name": "MT2-Gen2-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7889, "hfopenllm_v2/BBH": 0.6093, "hfopenllm_v2/MATH Level 5": 0.2183, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.427, "hfopenllm_v2/MMLU-PRO": 0.4388 } }, { "id": "zelk12/MT2-Gen3-gemma-2-9B", "name": "MT2-Gen3-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.781, "hfopenllm_v2/BBH": 0.6105, "hfopenllm_v2/MATH Level 5": 0.2107, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.4231, "hfopenllm_v2/MMLU-PRO": 0.4374 } }, { "id": "zelk12/MT2-Gen4-gemma-2-9B", "name": "MT2-Gen4-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7896, "hfopenllm_v2/BBH": 0.6097, "hfopenllm_v2/MATH Level 5": 0.2236, "hfopenllm_v2/GPQA": 0.3456, "hfopenllm_v2/MUSR": 0.4125, "hfopenllm_v2/MMLU-PRO": 0.4321 } }, { "id": "zelk12/MT2-Gen5-gemma-2-9B", "name": "MT2-Gen5-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7749, "hfopenllm_v2/BBH": 0.6064, "hfopenllm_v2/MATH Level 5": 0.2107, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4244, "hfopenllm_v2/MMLU-PRO": 0.4302 } }, { "id": "zelk12/MT2-Gen6-gemma-2-9B", "name": "MT2-Gen6-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1664, "hfopenllm_v2/BBH": 0.596, "hfopenllm_v2/MATH Level 5": 0.0846, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4137, "hfopenllm_v2/MMLU-PRO": 0.421 } }, { "id": "zelk12/MT2-Gen7-gemma-2-9B", "name": "MT2-Gen7-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1762, "hfopenllm_v2/BBH": 0.6079, "hfopenllm_v2/MATH Level 5": 0.102, "hfopenllm_v2/GPQA": 0.3549, "hfopenllm_v2/MUSR": 0.4203, "hfopenllm_v2/MMLU-PRO": 0.4311 } }, { "id": "zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B", "name": "MT2-Max-Merge_02012025163610-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7901, "hfopenllm_v2/BBH": 0.6108, "hfopenllm_v2/MATH Level 5": 0.2243, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4228, "hfopenllm_v2/MMLU-PRO": 0.4391 } }, { "id": "zelk12/MT3-gemma-2-9B", "name": "MT3-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7786, "hfopenllm_v2/BBH": 0.6131, "hfopenllm_v2/MATH Level 5": 0.2168, "hfopenllm_v2/GPQA": 0.3448, "hfopenllm_v2/MUSR": 0.4243, "hfopenllm_v2/MMLU-PRO": 0.4327 } }, { "id": "zelk12/MT3-Gen1-gemma-2-9B", "name": "MT3-Gen1-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7838, "hfopenllm_v2/BBH": 0.6107, "hfopenllm_v2/MATH Level 5": 0.2145, "hfopenllm_v2/GPQA": 0.3465, "hfopenllm_v2/MUSR": 0.4151, "hfopenllm_v2/MMLU-PRO": 0.4327 } }, { "id": "zelk12/MT3-Gen2-gemma-2-9B", "name": "MT3-Gen2-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7843, "hfopenllm_v2/BBH": 0.6091, "hfopenllm_v2/MATH Level 5": 0.2236, "hfopenllm_v2/GPQA": 0.3574, "hfopenllm_v2/MUSR": 0.4111, "hfopenllm_v2/MMLU-PRO": 0.4333 } }, { "id": "zelk12/MT3-Gen3-gemma-2-9B", "name": "MT3-Gen3-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7856, "hfopenllm_v2/BBH": 0.6089, "hfopenllm_v2/MATH Level 5": 0.2153, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4258, "hfopenllm_v2/MMLU-PRO": 0.4303 } }, { "id": "zelk12/MT3-Gen4-gemma-2-9B", "name": "MT3-Gen4-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7737, "hfopenllm_v2/BBH": 0.6101, "hfopenllm_v2/MATH Level 5": 0.2062, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4476, "hfopenllm_v2/MMLU-PRO": 0.4387 } }, { "id": "zelk12/MT3-Gen5-gemma-2-9B", "name": "MT3-Gen5-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.799, "hfopenllm_v2/BBH": 0.6099, "hfopenllm_v2/MATH Level 5": 0.2266, "hfopenllm_v2/GPQA": 0.3532, "hfopenllm_v2/MUSR": 0.4191, "hfopenllm_v2/MMLU-PRO": 0.4317 } }, { "id": "zelk12/MT3-Gen5-gemma-2-9B_v1", "name": "MT3-Gen5-gemma-2-9B_v1", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7996, "hfopenllm_v2/BBH": 0.6113, "hfopenllm_v2/MATH Level 5": 0.2228, "hfopenllm_v2/GPQA": 0.349, "hfopenllm_v2/MUSR": 0.4204, "hfopenllm_v2/MMLU-PRO": 0.4359 } }, { "id": "zelk12/MT3-Gen6-gemma-2-9B", "name": "MT3-Gen6-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1762, "hfopenllm_v2/BBH": 0.602, "hfopenllm_v2/MATH Level 5": 0.0884, "hfopenllm_v2/GPQA": 0.3431, "hfopenllm_v2/MUSR": 0.4126, "hfopenllm_v2/MMLU-PRO": 0.4102 } }, { "id": "zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B", "name": "MT3-Max-Merge_02012025163610-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1762, "hfopenllm_v2/BBH": 0.6123, "hfopenllm_v2/MATH Level 5": 0.1012, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.4255, "hfopenllm_v2/MMLU-PRO": 0.4389 } }, { "id": "zelk12/MT4-gemma-2-9B", "name": "MT4-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7762, "hfopenllm_v2/BBH": 0.6073, "hfopenllm_v2/MATH Level 5": 0.2085, "hfopenllm_v2/GPQA": 0.3381, "hfopenllm_v2/MUSR": 0.4309, "hfopenllm_v2/MMLU-PRO": 0.4366 } }, { "id": "zelk12/MT4-Gen1-gemma-2-9B", "name": "MT4-Gen1-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7895, "hfopenllm_v2/BBH": 0.6094, "hfopenllm_v2/MATH Level 5": 0.2198, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.4322, "hfopenllm_v2/MMLU-PRO": 0.4389 } }, { "id": "zelk12/MT4-Gen2-gemma-2-9B", "name": "MT4-Gen2-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8051, "hfopenllm_v2/BBH": 0.6108, "hfopenllm_v2/MATH Level 5": 0.2326, "hfopenllm_v2/GPQA": 0.3456, "hfopenllm_v2/MUSR": 0.4257, "hfopenllm_v2/MMLU-PRO": 0.4368 } }, { "id": "zelk12/MT4-Gen3-gemma-2-9B", "name": "MT4-Gen3-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7841, "hfopenllm_v2/BBH": 0.6087, "hfopenllm_v2/MATH Level 5": 0.219, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.4243, "hfopenllm_v2/MMLU-PRO": 0.4381 } }, { "id": "zelk12/MT4-Gen4-gemma-2-9B", "name": "MT4-Gen4-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7874, "hfopenllm_v2/BBH": 0.6076, "hfopenllm_v2/MATH Level 5": 0.2145, "hfopenllm_v2/GPQA": 0.3523, "hfopenllm_v2/MUSR": 0.4244, "hfopenllm_v2/MMLU-PRO": 0.4323 } }, { "id": "zelk12/MT4-Gen5-gemma-2-9B", "name": "MT4-Gen5-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7789, "hfopenllm_v2/BBH": 0.6107, "hfopenllm_v2/MATH Level 5": 0.2266, "hfopenllm_v2/GPQA": 0.3565, "hfopenllm_v2/MUSR": 0.4268, "hfopenllm_v2/MMLU-PRO": 0.4384 } }, { "id": "zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B", "name": "MT4-Max-Merge_02012025163610-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1771, "hfopenllm_v2/BBH": 0.612, "hfopenllm_v2/MATH Level 5": 0.0952, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4228, "hfopenllm_v2/MMLU-PRO": 0.4391 } }, { "id": "zelk12/MT5-gemma-2-9B", "name": "MT5-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8048, "hfopenllm_v2/BBH": 0.6112, "hfopenllm_v2/MATH Level 5": 0.2258, "hfopenllm_v2/GPQA": 0.3431, "hfopenllm_v2/MUSR": 0.4204, "hfopenllm_v2/MMLU-PRO": 0.4367 } }, { "id": "zelk12/MT5-Gen1-gemma-2-9B", "name": "MT5-Gen1-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7831, "hfopenllm_v2/BBH": 0.611, "hfopenllm_v2/MATH Level 5": 0.2213, "hfopenllm_v2/GPQA": 0.3473, "hfopenllm_v2/MUSR": 0.4204, "hfopenllm_v2/MMLU-PRO": 0.4368 } }, { "id": "zelk12/MT5-Gen2-gemma-2-9B", "name": "MT5-Gen2-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7962, "hfopenllm_v2/BBH": 0.6105, "hfopenllm_v2/MATH Level 5": 0.2205, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4163, "hfopenllm_v2/MMLU-PRO": 0.4379 } }, { "id": "zelk12/MT5-Gen3-gemma-2-9B", "name": "MT5-Gen3-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7825, "hfopenllm_v2/BBH": 0.609, "hfopenllm_v2/MATH Level 5": 0.2168, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4231, "hfopenllm_v2/MMLU-PRO": 0.4375 } }, { "id": "zelk12/MT5-Gen4-gemma-2-9B", "name": "MT5-Gen4-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7835, "hfopenllm_v2/BBH": 0.6131, "hfopenllm_v2/MATH Level 5": 0.2243, "hfopenllm_v2/GPQA": 0.3532, "hfopenllm_v2/MUSR": 0.4228, "hfopenllm_v2/MMLU-PRO": 0.4397 } }, { "id": "zelk12/MT5-Gen5-gemma-2-9B", "name": "MT5-Gen5-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7947, "hfopenllm_v2/BBH": 0.6112, "hfopenllm_v2/MATH Level 5": 0.2258, "hfopenllm_v2/GPQA": 0.3482, "hfopenllm_v2/MUSR": 0.4191, "hfopenllm_v2/MMLU-PRO": 0.4329 } }, { "id": "zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B", "name": "MT5-Max-Merge_02012025163610-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1762, "hfopenllm_v2/BBH": 0.6127, "hfopenllm_v2/MATH Level 5": 0.0982, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4228, "hfopenllm_v2/MMLU-PRO": 0.439 } }, { "id": "zelk12/MTM-Merge-gemma-2-9B", "name": "MTM-Merge-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7798, "hfopenllm_v2/BBH": 0.6133, "hfopenllm_v2/MATH Level 5": 0.2175, "hfopenllm_v2/GPQA": 0.3549, "hfopenllm_v2/MUSR": 0.4268, "hfopenllm_v2/MMLU-PRO": 0.4388 } }, { "id": "zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B", "name": "MTMaMe-Merge_02012025163610-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1786, "hfopenllm_v2/BBH": 0.6117, "hfopenllm_v2/MATH Level 5": 0.0959, "hfopenllm_v2/GPQA": 0.3523, "hfopenllm_v2/MUSR": 0.4241, "hfopenllm_v2/MMLU-PRO": 0.4382 } }, { "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1", "name": "recoilme-gemma-2-Ataraxy-9B-v0.1", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7649, "hfopenllm_v2/BBH": 0.6075, "hfopenllm_v2/MATH Level 5": 0.2281, "hfopenllm_v2/GPQA": 0.3498, "hfopenllm_v2/MUSR": 0.4136, "hfopenllm_v2/MMLU-PRO": 0.4321 } }, { "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25", "name": "recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7707, "hfopenllm_v2/BBH": 0.6075, "hfopenllm_v2/MATH Level 5": 0.2145, "hfopenllm_v2/GPQA": 0.3431, "hfopenllm_v2/MUSR": 0.4323, "hfopenllm_v2/MMLU-PRO": 0.44 } }, { "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75", "name": "recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7208, "hfopenllm_v2/BBH": 0.5995, "hfopenllm_v2/MATH Level 5": 0.2017, "hfopenllm_v2/GPQA": 0.3498, "hfopenllm_v2/MUSR": 0.3951, "hfopenllm_v2/MMLU-PRO": 0.4141 } }, { "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2", "name": "recoilme-gemma-2-Ataraxy-9B-v0.2", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.76, "hfopenllm_v2/BBH": 0.6066, "hfopenllm_v2/MATH Level 5": 0.2228, "hfopenllm_v2/GPQA": 0.3482, "hfopenllm_v2/MUSR": 0.411, "hfopenllm_v2/MMLU-PRO": 0.4323 } }, { "id": "zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1", "name": "recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7615, "hfopenllm_v2/BBH": 0.6099, "hfopenllm_v2/MATH Level 5": 0.21, "hfopenllm_v2/GPQA": 0.3414, "hfopenllm_v2/MUSR": 0.431, "hfopenllm_v2/MMLU-PRO": 0.4315 } }, { "id": "zelk12/recoilme-gemma-2-Ifable-9B-v0.1", "name": "recoilme-gemma-2-Ifable-9B-v0.1", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7944, "hfopenllm_v2/BBH": 0.6064, "hfopenllm_v2/MATH Level 5": 0.2205, "hfopenllm_v2/GPQA": 0.3515, "hfopenllm_v2/MUSR": 0.4202, "hfopenllm_v2/MMLU-PRO": 0.4323 } }, { "id": "zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1", "name": "recoilme-gemma-2-psy10k-mental_healt-9B-v0.1", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7445, "hfopenllm_v2/BBH": 0.5978, "hfopenllm_v2/MATH Level 5": 0.1888, "hfopenllm_v2/GPQA": 0.344, "hfopenllm_v2/MUSR": 0.4295, "hfopenllm_v2/MMLU-PRO": 0.4181 } }, { "id": "zelk12/Rv0.4DMv1t0.25-gemma-2-9B", "name": "Rv0.4DMv1t0.25-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7497, "hfopenllm_v2/BBH": 0.607, "hfopenllm_v2/MATH Level 5": 0.2258, "hfopenllm_v2/GPQA": 0.3456, "hfopenllm_v2/MUSR": 0.4309, "hfopenllm_v2/MMLU-PRO": 0.4401 } }, { "id": "zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B", "name": "Rv0.4DMv1t0.25Tt0.25-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7646, "hfopenllm_v2/BBH": 0.6098, "hfopenllm_v2/MATH Level 5": 0.2069, "hfopenllm_v2/GPQA": 0.3423, "hfopenllm_v2/MUSR": 0.4283, "hfopenllm_v2/MMLU-PRO": 0.4347 } }, { "id": "zelk12/Rv0.4MT4g2-gemma-2-9B", "name": "Rv0.4MT4g2-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.732, "hfopenllm_v2/BBH": 0.6041, "hfopenllm_v2/MATH Level 5": 0.1949, "hfopenllm_v2/GPQA": 0.3532, "hfopenllm_v2/MUSR": 0.4231, "hfopenllm_v2/MMLU-PRO": 0.4417 } }, { "id": "zelk12/T31122024203920-gemma-2-9B", "name": "T31122024203920-gemma-2-9B", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7676, "hfopenllm_v2/BBH": 0.6096, "hfopenllm_v2/MATH Level 5": 0.2054, "hfopenllm_v2/GPQA": 0.3507, "hfopenllm_v2/MUSR": 0.4322, "hfopenllm_v2/MMLU-PRO": 0.4373 } }, { "id": "zelk12/Test01012025155054", "name": "Test01012025155054", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1555, "hfopenllm_v2/BBH": 0.283, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2416, "hfopenllm_v2/MUSR": 0.367, "hfopenllm_v2/MMLU-PRO": 0.109 } }, { "id": "zelk12/Test01012025155054t0.5_gemma-2", "name": "Test01012025155054t0.5_gemma-2", "developer": "zelk12", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.1555, "hfopenllm_v2/BBH": 0.283, "hfopenllm_v2/MATH Level 5": 0.0, "hfopenllm_v2/GPQA": 0.2416, "hfopenllm_v2/MUSR": 0.367, "hfopenllm_v2/MMLU-PRO": 0.109 } }, { "id": "ZeroXClem/L3-Aspire-Heart-Matrix-8B", "name": "L3-Aspire-Heart-Matrix-8B", "developer": "ZeroXClem", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4834, "hfopenllm_v2/BBH": 0.5384, "hfopenllm_v2/MATH Level 5": 0.1828, "hfopenllm_v2/GPQA": 0.3247, "hfopenllm_v2/MUSR": 0.4187, "hfopenllm_v2/MMLU-PRO": 0.3785 } }, { "id": "ZeroXClem/Llama-3.1-8B-AthenaSky-MegaMix", "name": "Llama-3.1-8B-AthenaSky-MegaMix", "developer": "ZeroXClem", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6301, "hfopenllm_v2/BBH": 0.5163, "hfopenllm_v2/MATH Level 5": 0.2795, "hfopenllm_v2/GPQA": 0.2777, "hfopenllm_v2/MUSR": 0.3538, "hfopenllm_v2/MMLU-PRO": 0.3504 } }, { "id": "ZeroXClem/Llama-3.1-8B-RainbowLight-EtherealMix", "name": "Llama-3.1-8B-RainbowLight-EtherealMix", "developer": "ZeroXClem", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4973, "hfopenllm_v2/BBH": 0.5155, "hfopenllm_v2/MATH Level 5": 0.1216, "hfopenllm_v2/GPQA": 0.2869, "hfopenllm_v2/MUSR": 0.3947, "hfopenllm_v2/MMLU-PRO": 0.363 } }, { "id": "ZeroXClem/Llama-3.1-8B-SpecialTitanFusion", "name": "Llama-3.1-8B-SpecialTitanFusion", "developer": "ZeroXClem", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7402, "hfopenllm_v2/BBH": 0.5439, "hfopenllm_v2/MATH Level 5": 0.2334, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.3874, "hfopenllm_v2/MMLU-PRO": 0.3621 } }, { "id": "ZeroXClem/Llama-3.1-8B-SuperNova-EtherealHermes", "name": "Llama-3.1-8B-SuperNova-EtherealHermes", "developer": "ZeroXClem", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7339, "hfopenllm_v2/BBH": 0.5244, "hfopenllm_v2/MATH Level 5": 0.1745, "hfopenllm_v2/GPQA": 0.2928, "hfopenllm_v2/MUSR": 0.4066, "hfopenllm_v2/MMLU-PRO": 0.3745 } }, { "id": "ZeroXClem/Llama-3.1-8B-SuperTulu-LexiNova", "name": "Llama-3.1-8B-SuperTulu-LexiNova", "developer": "ZeroXClem", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4165, "hfopenllm_v2/BBH": 0.5079, "hfopenllm_v2/MATH Level 5": 0.253, "hfopenllm_v2/GPQA": 0.2861, "hfopenllm_v2/MUSR": 0.3971, "hfopenllm_v2/MMLU-PRO": 0.3368 } }, { "id": "ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B", "name": "Qwen-2.5-Aether-SlerpFusion-7B", "developer": "ZeroXClem", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6262, "hfopenllm_v2/BBH": 0.5462, "hfopenllm_v2/MATH Level 5": 0.2734, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.4178, "hfopenllm_v2/MMLU-PRO": 0.4327 } }, { "id": "ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M", "name": "Qwen2.5-7B-CelestialHarmony-1M", "developer": "ZeroXClem", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.5944, "hfopenllm_v2/BBH": 0.5431, "hfopenllm_v2/MATH Level 5": 0.3474, "hfopenllm_v2/GPQA": 0.3188, "hfopenllm_v2/MUSR": 0.4595, "hfopenllm_v2/MMLU-PRO": 0.4387 } }, { "id": "ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix", "name": "Qwen2.5-7B-HomerAnvita-NerdMix", "developer": "ZeroXClem", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7708, "hfopenllm_v2/BBH": 0.5541, "hfopenllm_v2/MATH Level 5": 0.3837, "hfopenllm_v2/GPQA": 0.3196, "hfopenllm_v2/MUSR": 0.4391, "hfopenllm_v2/MMLU-PRO": 0.4432 } }, { "id": "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix", "name": "Qwen2.5-7B-HomerCreative-Mix", "developer": "ZeroXClem", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7835, "hfopenllm_v2/BBH": 0.5548, "hfopenllm_v2/MATH Level 5": 0.3565, "hfopenllm_v2/GPQA": 0.2995, "hfopenllm_v2/MUSR": 0.435, "hfopenllm_v2/MMLU-PRO": 0.4447 } }, { "id": "ZeroXClem/Qwen2.5-7B-Qandora-CySec", "name": "Qwen2.5-7B-Qandora-CySec", "developer": "ZeroXClem", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6773, "hfopenllm_v2/BBH": 0.549, "hfopenllm_v2/MATH Level 5": 0.2931, "hfopenllm_v2/GPQA": 0.3003, "hfopenllm_v2/MUSR": 0.4286, "hfopenllm_v2/MMLU-PRO": 0.4485 } }, { "id": "zetasepic/Qwen2.5-32B-Instruct-abliterated-v2", "name": "Qwen2.5-32B-Instruct-abliterated-v2", "developer": "zetasepic", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.8334, "hfopenllm_v2/BBH": 0.6934, "hfopenllm_v2/MATH Level 5": 0.5952, "hfopenllm_v2/GPQA": 0.3674, "hfopenllm_v2/MUSR": 0.4354, "hfopenllm_v2/MMLU-PRO": 0.5622 } }, { "id": "zetasepic/Qwen2.5-72B-Instruct-abliterated", "name": "Qwen2.5-72B-Instruct-abliterated", "developer": "zetasepic", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7153, "hfopenllm_v2/BBH": 0.7152, "hfopenllm_v2/MATH Level 5": 0.5242, "hfopenllm_v2/GPQA": 0.4069, "hfopenllm_v2/MUSR": 0.4719, "hfopenllm_v2/MMLU-PRO": 0.5872 } }, { "id": "ZeusLabs/L3-Aethora-15B-V2", "name": "L3-Aethora-15B-V2", "developer": "ZeusLabs", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.7208, "hfopenllm_v2/BBH": 0.5011, "hfopenllm_v2/MATH Level 5": 0.0808, "hfopenllm_v2/GPQA": 0.2878, "hfopenllm_v2/MUSR": 0.3871, "hfopenllm_v2/MMLU-PRO": 0.35 } }, { "id": "ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3", "name": "SELM-Llama-3-8B-Instruct-iter-3", "developer": "ZhangShenao", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.6903, "hfopenllm_v2/BBH": 0.5046, "hfopenllm_v2/MATH Level 5": 0.0861, "hfopenllm_v2/GPQA": 0.2584, "hfopenllm_v2/MUSR": 0.3845, "hfopenllm_v2/MMLU-PRO": 0.3783 } }, { "id": "zhengr/MixTAO-7Bx2-MoE-v8.1", "name": "MixTAO-7Bx2-MoE-v8.1", "developer": "zhengr", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.4188, "hfopenllm_v2/BBH": 0.4202, "hfopenllm_v2/MATH Level 5": 0.0604, "hfopenllm_v2/GPQA": 0.2987, "hfopenllm_v2/MUSR": 0.3976, "hfopenllm_v2/MMLU-PRO": 0.2847 } }, { "id": "zhipu-ai/GLM-130B", "name": "GLM 130B", "developer": "zhipu-ai", "evaluator_relationship": null, "benchmark_scores": { "helm_classic/Mean win rate": 0.512, "helm_classic/MMLU": 0.344, "helm_classic/BoolQ": 0.784, "helm_classic/NarrativeQA": 0.706, "helm_classic/NaturalQuestions (open-book)": 0.642, "helm_classic/QuAC": 0.272, "helm_classic/HellaSwag": -1.0, "helm_classic/OpenbookQA": -1.0, "helm_classic/TruthfulQA": 0.218, "helm_classic/MS MARCO (TREC)": -1.0, "helm_classic/CNN/DailyMail": 0.154, "helm_classic/XSUM": 0.132, "helm_classic/IMDB": 0.955, "helm_classic/CivilComments": 0.5, "helm_classic/RAFT": 0.598 } }, { "id": "zhipu-ai/glm-4.6", "name": "GLM 4.6", "developer": "Z.ai", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 24.5 } }, { "id": "zhipu-ai/glm-4.7", "name": "GLM 4.7", "developer": "Z-AI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 33.3 } }, { "id": "zhipu-ai/glm-5", "name": "GLM 5", "developer": "Z-AI", "evaluator_relationship": null, "benchmark_scores": { "terminal-bench-2.0/terminal-bench-2.0": 52.4 } }, { "id": "zhipu/GLM 4.6", "name": "GLM 4.6", "developer": "zhipu", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Corporate Lawyer Mean Score": 0.196 } }, { "id": "zhipu/GLM 4.7", "name": "GLM 4.7", "developer": "zhipu", "evaluator_relationship": null, "benchmark_scores": { "apex-agents/Corporate Lawyer Mean Score": 0.147 } }, { "id": "zhipu/glm-4-6-fc-thinking", "name": "GLM-4.6 (FC thinking)", "developer": "zhipu", "evaluator_relationship": null, "benchmark_scores": { "bfcl/bfcl.overall.rank": 4.0, "bfcl/bfcl.overall.overall_accuracy": 72.38, "bfcl/bfcl.overall.total_cost_usd": 4.64, "bfcl/bfcl.overall.latency_mean_s": 4.34, "bfcl/bfcl.overall.latency_std_s": 7.22, "bfcl/bfcl.overall.latency_p95_s": 13.5, "bfcl/bfcl.non_live.ast_accuracy": 87.56, "bfcl/bfcl.non_live.simple_ast_accuracy": 74.25, "bfcl/bfcl.non_live.multiple_ast_accuracy": 95.0, "bfcl/bfcl.non_live.parallel_ast_accuracy": 91.5, "bfcl/bfcl.non_live.parallel_multiple_ast_accuracy": 89.5, "bfcl/bfcl.live.live_accuracy": 80.9, "bfcl/bfcl.live.live_simple_ast_accuracy": 89.53, "bfcl/bfcl.live.live_multiple_ast_accuracy": 78.92, "bfcl/bfcl.live.live_parallel_ast_accuracy": 81.25, "bfcl/bfcl.live.live_parallel_multiple_ast_accuracy": 75.0, "bfcl/bfcl.multi_turn.accuracy": 68.0, "bfcl/bfcl.multi_turn.base_accuracy": 74.5, "bfcl/bfcl.multi_turn.miss_function_accuracy": 68.0, "bfcl/bfcl.multi_turn.miss_parameter_accuracy": 63.0, "bfcl/bfcl.multi_turn.long_context_accuracy": 66.5, "bfcl/bfcl.web_search.accuracy": 77.5, "bfcl/bfcl.web_search.base_accuracy": 79.0, "bfcl/bfcl.web_search.no_snippet_accuracy": 76.0, "bfcl/bfcl.memory.accuracy": 55.7, "bfcl/bfcl.memory.kv_accuracy": 43.87, "bfcl/bfcl.memory.vector_accuracy": 56.13, "bfcl/bfcl.memory.recursive_summarization_accuracy": 67.1, "bfcl/bfcl.relevance.relevance_detection_accuracy": 75.0, "bfcl/bfcl.relevance.irrelevance_detection_accuracy": 84.96 } }, { "id": "ZHLiu627/zephyr-7b-gemma-dpo-avg", "name": "zephyr-7b-gemma-dpo-avg", "developer": "ZHLiu627", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.309, "hfopenllm_v2/BBH": 0.4149, "hfopenllm_v2/MATH Level 5": 0.0453, "hfopenllm_v2/GPQA": 0.2785, "hfopenllm_v2/MUSR": 0.4107, "hfopenllm_v2/MMLU-PRO": 0.2851 } }, { "id": "ZHLiu627/zephyr-7b-gemma-rpo-avg", "name": "zephyr-7b-gemma-rpo-avg", "developer": "ZHLiu627", "evaluator_relationship": null, "benchmark_scores": { "hfopenllm_v2/IFEval": 0.3006, "hfopenllm_v2/BBH": 0.4183, "hfopenllm_v2/MATH Level 5": 0.0498, "hfopenllm_v2/GPQA": 0.2768, "hfopenllm_v2/MUSR": 0.4081, "hfopenllm_v2/MMLU-PRO": 0.2831 } }, { "id": "ZiyiYe/Con-J-Qwen2-7B", "name": "ZiyiYe/Con-J-Qwen2-7B", "developer": "ZiyiYe", "evaluator_relationship": null, "benchmark_scores": { "reward-bench/Score": 0.8712, "reward-bench/Chat": 0.919, "reward-bench/Chat Hard": 0.8026, "reward-bench/Safety": 0.8824, "reward-bench/Reasoning": 0.8808 } } ]