GitHub Actions
chore: sync EEE pipeline output [2026-03-28 11:49 UTC]
d91b463
raw
history blame
13.9 kB
{
"developer": "01-ai",
"models": [
{
"id": "01-ai/Yi-1.5-34B",
"name": "Yi-1.5-34B",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.2841,
"hfopenllm_v2/BBH": 0.5976,
"hfopenllm_v2/MATH Level 5": 0.1533,
"hfopenllm_v2/GPQA": 0.3658,
"hfopenllm_v2/MUSR": 0.4236,
"hfopenllm_v2/MMLU-PRO": 0.4666
}
},
{
"id": "01-ai/Yi-1.5-34B-32K",
"name": "Yi-1.5-34B-32K",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.3119,
"hfopenllm_v2/BBH": 0.6016,
"hfopenllm_v2/MATH Level 5": 0.1541,
"hfopenllm_v2/GPQA": 0.3633,
"hfopenllm_v2/MUSR": 0.4398,
"hfopenllm_v2/MMLU-PRO": 0.4709
}
},
{
"id": "01-ai/Yi-1.5-34B-Chat",
"name": "Yi-1.5-34B-Chat",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.6067,
"hfopenllm_v2/BBH": 0.6084,
"hfopenllm_v2/MATH Level 5": 0.2772,
"hfopenllm_v2/GPQA": 0.3649,
"hfopenllm_v2/MUSR": 0.4282,
"hfopenllm_v2/MMLU-PRO": 0.452
}
},
{
"id": "01-ai/Yi-1.5-34B-Chat-16K",
"name": "Yi-1.5-34B-Chat-16K",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.4564,
"hfopenllm_v2/BBH": 0.61,
"hfopenllm_v2/MATH Level 5": 0.2137,
"hfopenllm_v2/GPQA": 0.3381,
"hfopenllm_v2/MUSR": 0.4398,
"hfopenllm_v2/MMLU-PRO": 0.4545
}
},
{
"id": "01-ai/Yi-1.5-6B",
"name": "Yi-1.5-6B",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.2617,
"hfopenllm_v2/BBH": 0.4493,
"hfopenllm_v2/MATH Level 5": 0.0665,
"hfopenllm_v2/GPQA": 0.3138,
"hfopenllm_v2/MUSR": 0.4374,
"hfopenllm_v2/MMLU-PRO": 0.3144
}
},
{
"id": "01-ai/Yi-1.5-6B-Chat",
"name": "Yi-1.5-6B-Chat",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.5145,
"hfopenllm_v2/BBH": 0.4571,
"hfopenllm_v2/MATH Level 5": 0.1624,
"hfopenllm_v2/GPQA": 0.302,
"hfopenllm_v2/MUSR": 0.4392,
"hfopenllm_v2/MMLU-PRO": 0.3193
}
},
{
"id": "01-ai/Yi-1.5-9B",
"name": "Yi-1.5-9B",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.2936,
"hfopenllm_v2/BBH": 0.5143,
"hfopenllm_v2/MATH Level 5": 0.114,
"hfopenllm_v2/GPQA": 0.3792,
"hfopenllm_v2/MUSR": 0.4328,
"hfopenllm_v2/MMLU-PRO": 0.3916
}
},
{
"id": "01-ai/Yi-1.5-9B-32K",
"name": "Yi-1.5-9B-32K",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.2303,
"hfopenllm_v2/BBH": 0.4963,
"hfopenllm_v2/MATH Level 5": 0.108,
"hfopenllm_v2/GPQA": 0.3591,
"hfopenllm_v2/MUSR": 0.4186,
"hfopenllm_v2/MMLU-PRO": 0.3765
}
},
{
"id": "01-ai/Yi-1.5-9B-Chat",
"name": "Yi-1.5-9B-Chat",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.6046,
"hfopenllm_v2/BBH": 0.5559,
"hfopenllm_v2/MATH Level 5": 0.2258,
"hfopenllm_v2/GPQA": 0.3347,
"hfopenllm_v2/MUSR": 0.4259,
"hfopenllm_v2/MMLU-PRO": 0.3975
}
},
{
"id": "01-ai/Yi-1.5-9B-Chat-16K",
"name": "Yi-1.5-9B-Chat-16K",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.4214,
"hfopenllm_v2/BBH": 0.5153,
"hfopenllm_v2/MATH Level 5": 0.1782,
"hfopenllm_v2/GPQA": 0.3087,
"hfopenllm_v2/MUSR": 0.4099,
"hfopenllm_v2/MMLU-PRO": 0.3994
}
},
{
"id": "01-ai/yi-34b",
"name": "Yi 34B",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"helm_lite/Mean win rate": 0.57,
"helm_lite/NarrativeQA": 0.782,
"helm_lite/NaturalQuestions (closed-book)": 0.443,
"helm_lite/OpenbookQA": 0.92,
"helm_lite/MMLU": 0.65,
"helm_lite/MATH": 0.375,
"helm_lite/GSM8K": 0.648,
"helm_lite/LegalBench": 0.618,
"helm_lite/MedQA": 0.656,
"helm_lite/WMT 2014": 0.172,
"helm_mmlu/MMLU All Subjects": 0.762,
"helm_mmlu/Abstract Algebra": 0.4,
"helm_mmlu/Anatomy": 0.748,
"helm_mmlu/College Physics": 0.5,
"helm_mmlu/Computer Security": 0.83,
"helm_mmlu/Econometrics": 0.588,
"helm_mmlu/Global Facts": 0.53,
"helm_mmlu/Jurisprudence": 0.898,
"helm_mmlu/Philosophy": 0.82,
"helm_mmlu/Professional Psychology": 0.835,
"helm_mmlu/Us Foreign Policy": 0.91,
"helm_mmlu/Astronomy": 0.901,
"helm_mmlu/Business Ethics": 0.75,
"helm_mmlu/Clinical Knowledge": 0.8,
"helm_mmlu/Conceptual Physics": 0.77,
"helm_mmlu/Electrical Engineering": 0.779,
"helm_mmlu/Elementary Mathematics": 0.656,
"helm_mmlu/Formal Logic": 0.548,
"helm_mmlu/High School World History": 0.907,
"helm_mmlu/Human Sexuality": 0.87,
"helm_mmlu/International Law": 0.909,
"helm_mmlu/Logical Fallacies": 0.883,
"helm_mmlu/Machine Learning": 0.58,
"helm_mmlu/Management": 0.893,
"helm_mmlu/Marketing": 0.936,
"helm_mmlu/Medical Genetics": 0.87,
"helm_mmlu/Miscellaneous": 0.902,
"helm_mmlu/Moral Scenarios": 0.606,
"helm_mmlu/Nutrition": 0.869,
"helm_mmlu/Prehistory": 0.877,
"helm_mmlu/Public Relations": 0.745,
"helm_mmlu/Security Studies": 0.833,
"helm_mmlu/Sociology": 0.9,
"helm_mmlu/Virology": 0.572,
"helm_mmlu/World Religions": 0.877,
"helm_mmlu/Mean win rate": 0.315,
"hfopenllm_v2/IFEval": 0.3046,
"hfopenllm_v2/BBH": 0.5457,
"hfopenllm_v2/MATH Level 5": 0.0514,
"hfopenllm_v2/GPQA": 0.3666,
"hfopenllm_v2/MUSR": 0.4119,
"hfopenllm_v2/MMLU-PRO": 0.4412
}
},
{
"id": "01-ai/Yi-34B-200K",
"name": "Yi-34B-200K",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.1542,
"hfopenllm_v2/BBH": 0.5442,
"hfopenllm_v2/MATH Level 5": 0.0574,
"hfopenllm_v2/GPQA": 0.3565,
"hfopenllm_v2/MUSR": 0.3817,
"hfopenllm_v2/MMLU-PRO": 0.4535
}
},
{
"id": "01-ai/Yi-34B-Chat",
"name": "Yi-34B-Chat",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.4699,
"hfopenllm_v2/BBH": 0.5561,
"hfopenllm_v2/MATH Level 5": 0.0627,
"hfopenllm_v2/GPQA": 0.3381,
"hfopenllm_v2/MUSR": 0.3978,
"hfopenllm_v2/MMLU-PRO": 0.4093
}
},
{
"id": "01-ai/yi-6b",
"name": "Yi 6B",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"helm_lite/Mean win rate": 0.253,
"helm_lite/NarrativeQA": 0.702,
"helm_lite/NaturalQuestions (closed-book)": 0.31,
"helm_lite/OpenbookQA": 0.8,
"helm_lite/MMLU": 0.53,
"helm_lite/MATH": 0.126,
"helm_lite/GSM8K": 0.375,
"helm_lite/LegalBench": 0.519,
"helm_lite/MedQA": 0.497,
"helm_lite/WMT 2014": 0.117,
"helm_mmlu/MMLU All Subjects": 0.64,
"helm_mmlu/Abstract Algebra": 0.3,
"helm_mmlu/Anatomy": 0.6,
"helm_mmlu/College Physics": 0.422,
"helm_mmlu/Computer Security": 0.73,
"helm_mmlu/Econometrics": 0.351,
"helm_mmlu/Global Facts": 0.43,
"helm_mmlu/Jurisprudence": 0.796,
"helm_mmlu/Philosophy": 0.678,
"helm_mmlu/Professional Psychology": 0.668,
"helm_mmlu/Us Foreign Policy": 0.87,
"helm_mmlu/Astronomy": 0.684,
"helm_mmlu/Business Ethics": 0.67,
"helm_mmlu/Clinical Knowledge": 0.66,
"helm_mmlu/Conceptual Physics": 0.621,
"helm_mmlu/Electrical Engineering": 0.662,
"helm_mmlu/Elementary Mathematics": 0.452,
"helm_mmlu/Formal Logic": 0.452,
"helm_mmlu/High School World History": 0.785,
"helm_mmlu/Human Sexuality": 0.763,
"helm_mmlu/International Law": 0.769,
"helm_mmlu/Logical Fallacies": 0.779,
"helm_mmlu/Machine Learning": 0.411,
"helm_mmlu/Management": 0.806,
"helm_mmlu/Marketing": 0.893,
"helm_mmlu/Medical Genetics": 0.77,
"helm_mmlu/Miscellaneous": 0.796,
"helm_mmlu/Moral Scenarios": 0.335,
"helm_mmlu/Nutrition": 0.739,
"helm_mmlu/Prehistory": 0.713,
"helm_mmlu/Public Relations": 0.718,
"helm_mmlu/Security Studies": 0.735,
"helm_mmlu/Sociology": 0.831,
"helm_mmlu/Virology": 0.452,
"helm_mmlu/World Religions": 0.836,
"helm_mmlu/Mean win rate": 0.651,
"hfopenllm_v2/IFEval": 0.2893,
"hfopenllm_v2/BBH": 0.4309,
"hfopenllm_v2/MATH Level 5": 0.0159,
"hfopenllm_v2/GPQA": 0.2693,
"hfopenllm_v2/MUSR": 0.3937,
"hfopenllm_v2/MMLU-PRO": 0.2991
}
},
{
"id": "01-ai/Yi-6B-200K",
"name": "Yi-6B-200K",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.0843,
"hfopenllm_v2/BBH": 0.4289,
"hfopenllm_v2/MATH Level 5": 0.0181,
"hfopenllm_v2/GPQA": 0.2819,
"hfopenllm_v2/MUSR": 0.4587,
"hfopenllm_v2/MMLU-PRO": 0.2844
}
},
{
"id": "01-ai/Yi-6B-Chat",
"name": "Yi-6B-Chat",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.3395,
"hfopenllm_v2/BBH": 0.4133,
"hfopenllm_v2/MATH Level 5": 0.0136,
"hfopenllm_v2/GPQA": 0.2945,
"hfopenllm_v2/MUSR": 0.3688,
"hfopenllm_v2/MMLU-PRO": 0.3061
}
},
{
"id": "01-ai/Yi-9B",
"name": "Yi-9B",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.2709,
"hfopenllm_v2/BBH": 0.494,
"hfopenllm_v2/MATH Level 5": 0.0559,
"hfopenllm_v2/GPQA": 0.318,
"hfopenllm_v2/MUSR": 0.4054,
"hfopenllm_v2/MMLU-PRO": 0.3574
}
},
{
"id": "01-ai/Yi-9B-200K",
"name": "Yi-9B-200K",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.2327,
"hfopenllm_v2/BBH": 0.4793,
"hfopenllm_v2/MATH Level 5": 0.0665,
"hfopenllm_v2/GPQA": 0.3154,
"hfopenllm_v2/MUSR": 0.4294,
"hfopenllm_v2/MMLU-PRO": 0.3622
}
},
{
"id": "01-ai/Yi-Coder-9B-Chat",
"name": "Yi-Coder-9B-Chat",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"hfopenllm_v2/IFEval": 0.4817,
"hfopenllm_v2/BBH": 0.4814,
"hfopenllm_v2/MATH Level 5": 0.04,
"hfopenllm_v2/GPQA": 0.2475,
"hfopenllm_v2/MUSR": 0.3992,
"hfopenllm_v2/MMLU-PRO": 0.2425
}
},
{
"id": "01-ai/yi-large-preview",
"name": "Yi Large Preview",
"developer": "01-ai",
"evaluator_relationship": null,
"benchmark_scores": {
"helm_lite/Mean win rate": 0.471,
"helm_lite/NarrativeQA": 0.373,
"helm_lite/NaturalQuestions (closed-book)": 0.428,
"helm_lite/OpenbookQA": 0.946,
"helm_lite/MMLU": 0.712,
"helm_lite/MATH": 0.712,
"helm_lite/GSM8K": 0.69,
"helm_lite/LegalBench": 0.519,
"helm_lite/MedQA": 0.66,
"helm_lite/WMT 2014": 0.176,
"helm_mmlu/MMLU All Subjects": 0.793,
"helm_mmlu/Abstract Algebra": 0.6,
"helm_mmlu/Anatomy": 0.83,
"helm_mmlu/College Physics": 0.569,
"helm_mmlu/Computer Security": 0.86,
"helm_mmlu/Econometrics": 0.728,
"helm_mmlu/Global Facts": 0.52,
"helm_mmlu/Jurisprudence": 0.852,
"helm_mmlu/Philosophy": 0.842,
"helm_mmlu/Professional Psychology": 0.853,
"helm_mmlu/Us Foreign Policy": 0.85,
"helm_mmlu/Astronomy": 0.914,
"helm_mmlu/Business Ethics": 0.8,
"helm_mmlu/Clinical Knowledge": 0.857,
"helm_mmlu/Conceptual Physics": 0.864,
"helm_mmlu/Electrical Engineering": 0.779,
"helm_mmlu/Elementary Mathematics": 0.685,
"helm_mmlu/Formal Logic": 0.603,
"helm_mmlu/High School World History": 0.928,
"helm_mmlu/Human Sexuality": 0.901,
"helm_mmlu/International Law": 0.917,
"helm_mmlu/Logical Fallacies": 0.865,
"helm_mmlu/Machine Learning": 0.616,
"helm_mmlu/Management": 0.903,
"helm_mmlu/Marketing": 0.927,
"helm_mmlu/Medical Genetics": 0.83,
"helm_mmlu/Miscellaneous": 0.916,
"helm_mmlu/Moral Scenarios": 0.831,
"helm_mmlu/Nutrition": 0.846,
"helm_mmlu/Prehistory": 0.892,
"helm_mmlu/Public Relations": 0.827,
"helm_mmlu/Security Studies": 0.82,
"helm_mmlu/Sociology": 0.881,
"helm_mmlu/Virology": 0.59,
"helm_mmlu/World Religions": 0.871,
"helm_mmlu/Mean win rate": 0.258
}
}
]
}