Spaces:
Running on CPU Spr
Running on CPU Spr
| { | |
| "developer": "01-ai", | |
| "models": [ | |
| { | |
| "id": "01-ai/Yi-1.5-34B", | |
| "name": "Yi-1.5-34B", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.2841, | |
| "hfopenllm_v2/BBH": 0.5976, | |
| "hfopenllm_v2/MATH Level 5": 0.1533, | |
| "hfopenllm_v2/GPQA": 0.3658, | |
| "hfopenllm_v2/MUSR": 0.4236, | |
| "hfopenllm_v2/MMLU-PRO": 0.4666 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-1.5-34B-32K", | |
| "name": "Yi-1.5-34B-32K", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.3119, | |
| "hfopenllm_v2/BBH": 0.6016, | |
| "hfopenllm_v2/MATH Level 5": 0.1541, | |
| "hfopenllm_v2/GPQA": 0.3633, | |
| "hfopenllm_v2/MUSR": 0.4398, | |
| "hfopenllm_v2/MMLU-PRO": 0.4709 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-1.5-34B-Chat", | |
| "name": "Yi-1.5-34B-Chat", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.6067, | |
| "hfopenllm_v2/BBH": 0.6084, | |
| "hfopenllm_v2/MATH Level 5": 0.2772, | |
| "hfopenllm_v2/GPQA": 0.3649, | |
| "hfopenllm_v2/MUSR": 0.4282, | |
| "hfopenllm_v2/MMLU-PRO": 0.452 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-1.5-34B-Chat-16K", | |
| "name": "Yi-1.5-34B-Chat-16K", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.4564, | |
| "hfopenllm_v2/BBH": 0.61, | |
| "hfopenllm_v2/MATH Level 5": 0.2137, | |
| "hfopenllm_v2/GPQA": 0.3381, | |
| "hfopenllm_v2/MUSR": 0.4398, | |
| "hfopenllm_v2/MMLU-PRO": 0.4545 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-1.5-6B", | |
| "name": "Yi-1.5-6B", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.2617, | |
| "hfopenllm_v2/BBH": 0.4493, | |
| "hfopenllm_v2/MATH Level 5": 0.0665, | |
| "hfopenllm_v2/GPQA": 0.3138, | |
| "hfopenllm_v2/MUSR": 0.4374, | |
| "hfopenllm_v2/MMLU-PRO": 0.3144 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-1.5-6B-Chat", | |
| "name": "Yi-1.5-6B-Chat", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.5145, | |
| "hfopenllm_v2/BBH": 0.4571, | |
| "hfopenllm_v2/MATH Level 5": 0.1624, | |
| "hfopenllm_v2/GPQA": 0.302, | |
| "hfopenllm_v2/MUSR": 0.4392, | |
| "hfopenllm_v2/MMLU-PRO": 0.3193 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-1.5-9B", | |
| "name": "Yi-1.5-9B", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.2936, | |
| "hfopenllm_v2/BBH": 0.5143, | |
| "hfopenllm_v2/MATH Level 5": 0.114, | |
| "hfopenllm_v2/GPQA": 0.3792, | |
| "hfopenllm_v2/MUSR": 0.4328, | |
| "hfopenllm_v2/MMLU-PRO": 0.3916 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-1.5-9B-32K", | |
| "name": "Yi-1.5-9B-32K", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.2303, | |
| "hfopenllm_v2/BBH": 0.4963, | |
| "hfopenllm_v2/MATH Level 5": 0.108, | |
| "hfopenllm_v2/GPQA": 0.3591, | |
| "hfopenllm_v2/MUSR": 0.4186, | |
| "hfopenllm_v2/MMLU-PRO": 0.3765 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-1.5-9B-Chat", | |
| "name": "Yi-1.5-9B-Chat", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.6046, | |
| "hfopenllm_v2/BBH": 0.5559, | |
| "hfopenllm_v2/MATH Level 5": 0.2258, | |
| "hfopenllm_v2/GPQA": 0.3347, | |
| "hfopenllm_v2/MUSR": 0.4259, | |
| "hfopenllm_v2/MMLU-PRO": 0.3975 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-1.5-9B-Chat-16K", | |
| "name": "Yi-1.5-9B-Chat-16K", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.4214, | |
| "hfopenllm_v2/BBH": 0.5153, | |
| "hfopenllm_v2/MATH Level 5": 0.1782, | |
| "hfopenllm_v2/GPQA": 0.3087, | |
| "hfopenllm_v2/MUSR": 0.4099, | |
| "hfopenllm_v2/MMLU-PRO": 0.3994 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/yi-34b", | |
| "name": "Yi 34B", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "helm_lite/Mean win rate": 0.57, | |
| "helm_lite/NarrativeQA": 0.782, | |
| "helm_lite/NaturalQuestions (closed-book)": 0.443, | |
| "helm_lite/OpenbookQA": 0.92, | |
| "helm_lite/MMLU": 0.65, | |
| "helm_lite/MATH": 0.375, | |
| "helm_lite/GSM8K": 0.648, | |
| "helm_lite/LegalBench": 0.618, | |
| "helm_lite/MedQA": 0.656, | |
| "helm_lite/WMT 2014": 0.172, | |
| "helm_mmlu/MMLU All Subjects": 0.762, | |
| "helm_mmlu/Abstract Algebra": 0.4, | |
| "helm_mmlu/Anatomy": 0.748, | |
| "helm_mmlu/College Physics": 0.5, | |
| "helm_mmlu/Computer Security": 0.83, | |
| "helm_mmlu/Econometrics": 0.588, | |
| "helm_mmlu/Global Facts": 0.53, | |
| "helm_mmlu/Jurisprudence": 0.898, | |
| "helm_mmlu/Philosophy": 0.82, | |
| "helm_mmlu/Professional Psychology": 0.835, | |
| "helm_mmlu/Us Foreign Policy": 0.91, | |
| "helm_mmlu/Astronomy": 0.901, | |
| "helm_mmlu/Business Ethics": 0.75, | |
| "helm_mmlu/Clinical Knowledge": 0.8, | |
| "helm_mmlu/Conceptual Physics": 0.77, | |
| "helm_mmlu/Electrical Engineering": 0.779, | |
| "helm_mmlu/Elementary Mathematics": 0.656, | |
| "helm_mmlu/Formal Logic": 0.548, | |
| "helm_mmlu/High School World History": 0.907, | |
| "helm_mmlu/Human Sexuality": 0.87, | |
| "helm_mmlu/International Law": 0.909, | |
| "helm_mmlu/Logical Fallacies": 0.883, | |
| "helm_mmlu/Machine Learning": 0.58, | |
| "helm_mmlu/Management": 0.893, | |
| "helm_mmlu/Marketing": 0.936, | |
| "helm_mmlu/Medical Genetics": 0.87, | |
| "helm_mmlu/Miscellaneous": 0.902, | |
| "helm_mmlu/Moral Scenarios": 0.606, | |
| "helm_mmlu/Nutrition": 0.869, | |
| "helm_mmlu/Prehistory": 0.877, | |
| "helm_mmlu/Public Relations": 0.745, | |
| "helm_mmlu/Security Studies": 0.833, | |
| "helm_mmlu/Sociology": 0.9, | |
| "helm_mmlu/Virology": 0.572, | |
| "helm_mmlu/World Religions": 0.877, | |
| "helm_mmlu/Mean win rate": 0.315, | |
| "hfopenllm_v2/IFEval": 0.3046, | |
| "hfopenllm_v2/BBH": 0.5457, | |
| "hfopenllm_v2/MATH Level 5": 0.0514, | |
| "hfopenllm_v2/GPQA": 0.3666, | |
| "hfopenllm_v2/MUSR": 0.4119, | |
| "hfopenllm_v2/MMLU-PRO": 0.4412 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-34B-200K", | |
| "name": "Yi-34B-200K", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.1542, | |
| "hfopenllm_v2/BBH": 0.5442, | |
| "hfopenllm_v2/MATH Level 5": 0.0574, | |
| "hfopenllm_v2/GPQA": 0.3565, | |
| "hfopenllm_v2/MUSR": 0.3817, | |
| "hfopenllm_v2/MMLU-PRO": 0.4535 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-34B-Chat", | |
| "name": "Yi-34B-Chat", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.4699, | |
| "hfopenllm_v2/BBH": 0.5561, | |
| "hfopenllm_v2/MATH Level 5": 0.0627, | |
| "hfopenllm_v2/GPQA": 0.3381, | |
| "hfopenllm_v2/MUSR": 0.3978, | |
| "hfopenllm_v2/MMLU-PRO": 0.4093 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/yi-6b", | |
| "name": "Yi 6B", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "helm_lite/Mean win rate": 0.253, | |
| "helm_lite/NarrativeQA": 0.702, | |
| "helm_lite/NaturalQuestions (closed-book)": 0.31, | |
| "helm_lite/OpenbookQA": 0.8, | |
| "helm_lite/MMLU": 0.53, | |
| "helm_lite/MATH": 0.126, | |
| "helm_lite/GSM8K": 0.375, | |
| "helm_lite/LegalBench": 0.519, | |
| "helm_lite/MedQA": 0.497, | |
| "helm_lite/WMT 2014": 0.117, | |
| "helm_mmlu/MMLU All Subjects": 0.64, | |
| "helm_mmlu/Abstract Algebra": 0.3, | |
| "helm_mmlu/Anatomy": 0.6, | |
| "helm_mmlu/College Physics": 0.422, | |
| "helm_mmlu/Computer Security": 0.73, | |
| "helm_mmlu/Econometrics": 0.351, | |
| "helm_mmlu/Global Facts": 0.43, | |
| "helm_mmlu/Jurisprudence": 0.796, | |
| "helm_mmlu/Philosophy": 0.678, | |
| "helm_mmlu/Professional Psychology": 0.668, | |
| "helm_mmlu/Us Foreign Policy": 0.87, | |
| "helm_mmlu/Astronomy": 0.684, | |
| "helm_mmlu/Business Ethics": 0.67, | |
| "helm_mmlu/Clinical Knowledge": 0.66, | |
| "helm_mmlu/Conceptual Physics": 0.621, | |
| "helm_mmlu/Electrical Engineering": 0.662, | |
| "helm_mmlu/Elementary Mathematics": 0.452, | |
| "helm_mmlu/Formal Logic": 0.452, | |
| "helm_mmlu/High School World History": 0.785, | |
| "helm_mmlu/Human Sexuality": 0.763, | |
| "helm_mmlu/International Law": 0.769, | |
| "helm_mmlu/Logical Fallacies": 0.779, | |
| "helm_mmlu/Machine Learning": 0.411, | |
| "helm_mmlu/Management": 0.806, | |
| "helm_mmlu/Marketing": 0.893, | |
| "helm_mmlu/Medical Genetics": 0.77, | |
| "helm_mmlu/Miscellaneous": 0.796, | |
| "helm_mmlu/Moral Scenarios": 0.335, | |
| "helm_mmlu/Nutrition": 0.739, | |
| "helm_mmlu/Prehistory": 0.713, | |
| "helm_mmlu/Public Relations": 0.718, | |
| "helm_mmlu/Security Studies": 0.735, | |
| "helm_mmlu/Sociology": 0.831, | |
| "helm_mmlu/Virology": 0.452, | |
| "helm_mmlu/World Religions": 0.836, | |
| "helm_mmlu/Mean win rate": 0.651, | |
| "hfopenllm_v2/IFEval": 0.2893, | |
| "hfopenllm_v2/BBH": 0.4309, | |
| "hfopenllm_v2/MATH Level 5": 0.0159, | |
| "hfopenllm_v2/GPQA": 0.2693, | |
| "hfopenllm_v2/MUSR": 0.3937, | |
| "hfopenllm_v2/MMLU-PRO": 0.2991 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-6B-200K", | |
| "name": "Yi-6B-200K", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.0843, | |
| "hfopenllm_v2/BBH": 0.4289, | |
| "hfopenllm_v2/MATH Level 5": 0.0181, | |
| "hfopenllm_v2/GPQA": 0.2819, | |
| "hfopenllm_v2/MUSR": 0.4587, | |
| "hfopenllm_v2/MMLU-PRO": 0.2844 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-6B-Chat", | |
| "name": "Yi-6B-Chat", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.3395, | |
| "hfopenllm_v2/BBH": 0.4133, | |
| "hfopenllm_v2/MATH Level 5": 0.0136, | |
| "hfopenllm_v2/GPQA": 0.2945, | |
| "hfopenllm_v2/MUSR": 0.3688, | |
| "hfopenllm_v2/MMLU-PRO": 0.3061 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-9B", | |
| "name": "Yi-9B", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.2709, | |
| "hfopenllm_v2/BBH": 0.494, | |
| "hfopenllm_v2/MATH Level 5": 0.0559, | |
| "hfopenllm_v2/GPQA": 0.318, | |
| "hfopenllm_v2/MUSR": 0.4054, | |
| "hfopenllm_v2/MMLU-PRO": 0.3574 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-9B-200K", | |
| "name": "Yi-9B-200K", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.2327, | |
| "hfopenllm_v2/BBH": 0.4793, | |
| "hfopenllm_v2/MATH Level 5": 0.0665, | |
| "hfopenllm_v2/GPQA": 0.3154, | |
| "hfopenllm_v2/MUSR": 0.4294, | |
| "hfopenllm_v2/MMLU-PRO": 0.3622 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/Yi-Coder-9B-Chat", | |
| "name": "Yi-Coder-9B-Chat", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "hfopenllm_v2/IFEval": 0.4817, | |
| "hfopenllm_v2/BBH": 0.4814, | |
| "hfopenllm_v2/MATH Level 5": 0.04, | |
| "hfopenllm_v2/GPQA": 0.2475, | |
| "hfopenllm_v2/MUSR": 0.3992, | |
| "hfopenllm_v2/MMLU-PRO": 0.2425 | |
| } | |
| }, | |
| { | |
| "id": "01-ai/yi-large-preview", | |
| "name": "Yi Large Preview", | |
| "developer": "01-ai", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "helm_lite/Mean win rate": 0.471, | |
| "helm_lite/NarrativeQA": 0.373, | |
| "helm_lite/NaturalQuestions (closed-book)": 0.428, | |
| "helm_lite/OpenbookQA": 0.946, | |
| "helm_lite/MMLU": 0.712, | |
| "helm_lite/MATH": 0.712, | |
| "helm_lite/GSM8K": 0.69, | |
| "helm_lite/LegalBench": 0.519, | |
| "helm_lite/MedQA": 0.66, | |
| "helm_lite/WMT 2014": 0.176, | |
| "helm_mmlu/MMLU All Subjects": 0.793, | |
| "helm_mmlu/Abstract Algebra": 0.6, | |
| "helm_mmlu/Anatomy": 0.83, | |
| "helm_mmlu/College Physics": 0.569, | |
| "helm_mmlu/Computer Security": 0.86, | |
| "helm_mmlu/Econometrics": 0.728, | |
| "helm_mmlu/Global Facts": 0.52, | |
| "helm_mmlu/Jurisprudence": 0.852, | |
| "helm_mmlu/Philosophy": 0.842, | |
| "helm_mmlu/Professional Psychology": 0.853, | |
| "helm_mmlu/Us Foreign Policy": 0.85, | |
| "helm_mmlu/Astronomy": 0.914, | |
| "helm_mmlu/Business Ethics": 0.8, | |
| "helm_mmlu/Clinical Knowledge": 0.857, | |
| "helm_mmlu/Conceptual Physics": 0.864, | |
| "helm_mmlu/Electrical Engineering": 0.779, | |
| "helm_mmlu/Elementary Mathematics": 0.685, | |
| "helm_mmlu/Formal Logic": 0.603, | |
| "helm_mmlu/High School World History": 0.928, | |
| "helm_mmlu/Human Sexuality": 0.901, | |
| "helm_mmlu/International Law": 0.917, | |
| "helm_mmlu/Logical Fallacies": 0.865, | |
| "helm_mmlu/Machine Learning": 0.616, | |
| "helm_mmlu/Management": 0.903, | |
| "helm_mmlu/Marketing": 0.927, | |
| "helm_mmlu/Medical Genetics": 0.83, | |
| "helm_mmlu/Miscellaneous": 0.916, | |
| "helm_mmlu/Moral Scenarios": 0.831, | |
| "helm_mmlu/Nutrition": 0.846, | |
| "helm_mmlu/Prehistory": 0.892, | |
| "helm_mmlu/Public Relations": 0.827, | |
| "helm_mmlu/Security Studies": 0.82, | |
| "helm_mmlu/Sociology": 0.881, | |
| "helm_mmlu/Virology": 0.59, | |
| "helm_mmlu/World Religions": 0.871, | |
| "helm_mmlu/Mean win rate": 0.258 | |
| } | |
| } | |
| ] | |
| } |