Spaces:
Running
Running
| { | |
| "developer": "ai21", | |
| "models": [ | |
| { | |
| "id": "ai21/J1-Grande-v1-17B", | |
| "name": "J1-Grande v1 17B", | |
| "developer": "ai21", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "helm_classic/Mean win rate": 0.433, | |
| "helm_classic/MMLU": 0.27, | |
| "helm_classic/BoolQ": 0.722, | |
| "helm_classic/NarrativeQA": 0.672, | |
| "helm_classic/NaturalQuestions (open-book)": 0.578, | |
| "helm_classic/QuAC": 0.362, | |
| "helm_classic/HellaSwag": 0.739, | |
| "helm_classic/OpenbookQA": 0.52, | |
| "helm_classic/TruthfulQA": 0.193, | |
| "helm_classic/MS MARCO (TREC)": 0.341, | |
| "helm_classic/CNN/DailyMail": 0.143, | |
| "helm_classic/XSUM": 0.122, | |
| "helm_classic/IMDB": 0.953, | |
| "helm_classic/CivilComments": 0.529, | |
| "helm_classic/RAFT": 0.658 | |
| } | |
| }, | |
| { | |
| "id": "ai21/J1-Grande-v2-beta-17B", | |
| "name": "J1-Grande v2 beta 17B", | |
| "developer": "ai21", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "helm_classic/Mean win rate": 0.706, | |
| "helm_classic/MMLU": 0.445, | |
| "helm_classic/BoolQ": 0.812, | |
| "helm_classic/NarrativeQA": 0.725, | |
| "helm_classic/NaturalQuestions (open-book)": 0.625, | |
| "helm_classic/QuAC": 0.392, | |
| "helm_classic/HellaSwag": 0.764, | |
| "helm_classic/OpenbookQA": 0.56, | |
| "helm_classic/TruthfulQA": 0.306, | |
| "helm_classic/MS MARCO (TREC)": 0.46, | |
| "helm_classic/CNN/DailyMail": 0.146, | |
| "helm_classic/XSUM": 0.152, | |
| "helm_classic/IMDB": 0.957, | |
| "helm_classic/CivilComments": 0.546, | |
| "helm_classic/RAFT": 0.679 | |
| } | |
| }, | |
| { | |
| "id": "ai21/J1-Jumbo-v1-178B", | |
| "name": "J1-Jumbo v1 178B", | |
| "developer": "ai21", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "helm_classic/Mean win rate": 0.517, | |
| "helm_classic/MMLU": 0.259, | |
| "helm_classic/BoolQ": 0.776, | |
| "helm_classic/NarrativeQA": 0.695, | |
| "helm_classic/NaturalQuestions (open-book)": 0.595, | |
| "helm_classic/QuAC": 0.358, | |
| "helm_classic/HellaSwag": 0.765, | |
| "helm_classic/OpenbookQA": 0.534, | |
| "helm_classic/TruthfulQA": 0.175, | |
| "helm_classic/MS MARCO (TREC)": 0.363, | |
| "helm_classic/CNN/DailyMail": 0.144, | |
| "helm_classic/XSUM": 0.129, | |
| "helm_classic/IMDB": 0.943, | |
| "helm_classic/CivilComments": 0.553, | |
| "helm_classic/RAFT": 0.681 | |
| } | |
| }, | |
| { | |
| "id": "ai21/J1-Large-v1-7.5B", | |
| "name": "J1-Large v1 7.5B", | |
| "developer": "ai21", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "helm_classic/Mean win rate": 0.285, | |
| "helm_classic/MMLU": 0.241, | |
| "helm_classic/BoolQ": 0.683, | |
| "helm_classic/NarrativeQA": 0.623, | |
| "helm_classic/NaturalQuestions (open-book)": 0.532, | |
| "helm_classic/QuAC": 0.328, | |
| "helm_classic/HellaSwag": 0.7, | |
| "helm_classic/OpenbookQA": 0.514, | |
| "helm_classic/TruthfulQA": 0.197, | |
| "helm_classic/MS MARCO (TREC)": 0.292, | |
| "helm_classic/CNN/DailyMail": 0.134, | |
| "helm_classic/XSUM": 0.102, | |
| "helm_classic/IMDB": 0.956, | |
| "helm_classic/CivilComments": 0.532, | |
| "helm_classic/RAFT": 0.545 | |
| } | |
| }, | |
| { | |
| "id": "ai21/j2-grande", | |
| "name": "Jurassic-2 Grande 17B", | |
| "developer": "ai21", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "helm_lite/Mean win rate": 0.172, | |
| "helm_lite/NarrativeQA": 0.744, | |
| "helm_lite/NaturalQuestions (closed-book)": 0.35, | |
| "helm_lite/OpenbookQA": 0.614, | |
| "helm_lite/MMLU": 0.471, | |
| "helm_lite/MATH": 0.064, | |
| "helm_lite/GSM8K": 0.159, | |
| "helm_lite/LegalBench": 0.468, | |
| "helm_lite/MedQA": 0.39, | |
| "helm_lite/WMT 2014": 0.102 | |
| } | |
| }, | |
| { | |
| "id": "ai21/j2-jumbo", | |
| "name": "Jurassic-2 Jumbo 178B", | |
| "developer": "ai21", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "helm_lite/Mean win rate": 0.215, | |
| "helm_lite/NarrativeQA": 0.728, | |
| "helm_lite/NaturalQuestions (closed-book)": 0.385, | |
| "helm_lite/OpenbookQA": 0.688, | |
| "helm_lite/MMLU": 0.483, | |
| "helm_lite/MATH": 0.103, | |
| "helm_lite/GSM8K": 0.239, | |
| "helm_lite/LegalBench": 0.533, | |
| "helm_lite/MedQA": 0.431, | |
| "helm_lite/WMT 2014": 0.114 | |
| } | |
| }, | |
| { | |
| "id": "ai21/jamba-1.5-large", | |
| "name": "Jamba 1.5 Large", | |
| "developer": "ai21", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "helm_lite/Mean win rate": 0.637, | |
| "helm_lite/NarrativeQA": 0.664, | |
| "helm_lite/NaturalQuestions (closed-book)": 0.394, | |
| "helm_lite/OpenbookQA": 0.948, | |
| "helm_lite/MMLU": 0.683, | |
| "helm_lite/MATH": 0.692, | |
| "helm_lite/GSM8K": 0.846, | |
| "helm_lite/LegalBench": 0.675, | |
| "helm_lite/MedQA": 0.698, | |
| "helm_lite/WMT 2014": 0.203, | |
| "helm_mmlu/MMLU All Subjects": 0.782, | |
| "helm_mmlu/Abstract Algebra": 0.53, | |
| "helm_mmlu/Anatomy": 0.793, | |
| "helm_mmlu/College Physics": 0.51, | |
| "helm_mmlu/Computer Security": 0.8, | |
| "helm_mmlu/Econometrics": 0.614, | |
| "helm_mmlu/Global Facts": 0.54, | |
| "helm_mmlu/Jurisprudence": 0.87, | |
| "helm_mmlu/Philosophy": 0.849, | |
| "helm_mmlu/Professional Psychology": 0.842, | |
| "helm_mmlu/Us Foreign Policy": 0.92, | |
| "helm_mmlu/Astronomy": 0.882, | |
| "helm_mmlu/Business Ethics": 0.77, | |
| "helm_mmlu/Clinical Knowledge": 0.849, | |
| "helm_mmlu/Conceptual Physics": 0.779, | |
| "helm_mmlu/Electrical Engineering": 0.793, | |
| "helm_mmlu/Elementary Mathematics": 0.656, | |
| "helm_mmlu/Formal Logic": 0.619, | |
| "helm_mmlu/High School World History": 0.911, | |
| "helm_mmlu/Human Sexuality": 0.832, | |
| "helm_mmlu/International Law": 0.884, | |
| "helm_mmlu/Logical Fallacies": 0.859, | |
| "helm_mmlu/Machine Learning": 0.688, | |
| "helm_mmlu/Management": 0.864, | |
| "helm_mmlu/Marketing": 0.94, | |
| "helm_mmlu/Medical Genetics": 0.89, | |
| "helm_mmlu/Miscellaneous": 0.931, | |
| "helm_mmlu/Moral Scenarios": 0.686, | |
| "helm_mmlu/Nutrition": 0.869, | |
| "helm_mmlu/Prehistory": 0.892, | |
| "helm_mmlu/Public Relations": 0.755, | |
| "helm_mmlu/Security Studies": 0.771, | |
| "helm_mmlu/Sociology": 0.93, | |
| "helm_mmlu/Virology": 0.554, | |
| "helm_mmlu/World Religions": 0.865, | |
| "helm_mmlu/Mean win rate": 0.147 | |
| } | |
| }, | |
| { | |
| "id": "ai21/jamba-1.5-mini", | |
| "name": "Jamba 1.5 Mini", | |
| "developer": "ai21", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "helm_lite/Mean win rate": 0.414, | |
| "helm_lite/NarrativeQA": 0.746, | |
| "helm_lite/NaturalQuestions (closed-book)": 0.388, | |
| "helm_lite/OpenbookQA": 0.89, | |
| "helm_lite/MMLU": 0.582, | |
| "helm_lite/MATH": 0.318, | |
| "helm_lite/GSM8K": 0.691, | |
| "helm_lite/LegalBench": 0.503, | |
| "helm_lite/MedQA": 0.632, | |
| "helm_lite/WMT 2014": 0.179, | |
| "helm_mmlu/MMLU All Subjects": 0.699, | |
| "helm_mmlu/Abstract Algebra": 0.33, | |
| "helm_mmlu/Anatomy": 0.711, | |
| "helm_mmlu/College Physics": 0.48, | |
| "helm_mmlu/Computer Security": 0.73, | |
| "helm_mmlu/Econometrics": 0.491, | |
| "helm_mmlu/Global Facts": 0.43, | |
| "helm_mmlu/Jurisprudence": 0.88, | |
| "helm_mmlu/Philosophy": 0.752, | |
| "helm_mmlu/Professional Psychology": 0.76, | |
| "helm_mmlu/Us Foreign Policy": 0.9, | |
| "helm_mmlu/Astronomy": 0.822, | |
| "helm_mmlu/Business Ethics": 0.76, | |
| "helm_mmlu/Clinical Knowledge": 0.74, | |
| "helm_mmlu/Conceptual Physics": 0.677, | |
| "helm_mmlu/Electrical Engineering": 0.683, | |
| "helm_mmlu/Elementary Mathematics": 0.553, | |
| "helm_mmlu/Formal Logic": 0.452, | |
| "helm_mmlu/High School World History": 0.84, | |
| "helm_mmlu/Human Sexuality": 0.809, | |
| "helm_mmlu/International Law": 0.893, | |
| "helm_mmlu/Logical Fallacies": 0.81, | |
| "helm_mmlu/Machine Learning": 0.509, | |
| "helm_mmlu/Management": 0.825, | |
| "helm_mmlu/Marketing": 0.915, | |
| "helm_mmlu/Medical Genetics": 0.69, | |
| "helm_mmlu/Miscellaneous": 0.902, | |
| "helm_mmlu/Moral Scenarios": 0.269, | |
| "helm_mmlu/Nutrition": 0.801, | |
| "helm_mmlu/Prehistory": 0.824, | |
| "helm_mmlu/Public Relations": 0.727, | |
| "helm_mmlu/Security Studies": 0.755, | |
| "helm_mmlu/Sociology": 0.876, | |
| "helm_mmlu/Virology": 0.578, | |
| "helm_mmlu/World Religions": 0.842, | |
| "helm_mmlu/Mean win rate": 0.206 | |
| } | |
| }, | |
| { | |
| "id": "ai21/jamba-instruct", | |
| "name": "Jamba Instruct", | |
| "developer": "ai21", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "helm_lite/Mean win rate": 0.287, | |
| "helm_lite/NarrativeQA": 0.658, | |
| "helm_lite/NaturalQuestions (closed-book)": 0.384, | |
| "helm_lite/OpenbookQA": 0.796, | |
| "helm_lite/MMLU": 0.582, | |
| "helm_lite/MATH": 0.38, | |
| "helm_lite/GSM8K": 0.67, | |
| "helm_lite/LegalBench": 0.54, | |
| "helm_lite/MedQA": 0.519, | |
| "helm_lite/WMT 2014": 0.164, | |
| "helm_mmlu/MMLU All Subjects": 0.659, | |
| "helm_mmlu/Abstract Algebra": 0.36, | |
| "helm_mmlu/Anatomy": 0.615, | |
| "helm_mmlu/College Physics": 0.422, | |
| "helm_mmlu/Computer Security": 0.76, | |
| "helm_mmlu/Econometrics": 0.439, | |
| "helm_mmlu/Global Facts": 0.4, | |
| "helm_mmlu/Jurisprudence": 0.796, | |
| "helm_mmlu/Philosophy": 0.749, | |
| "helm_mmlu/Professional Psychology": 0.716, | |
| "helm_mmlu/Us Foreign Policy": 0.91, | |
| "helm_mmlu/Astronomy": 0.73, | |
| "helm_mmlu/Business Ethics": 0.6, | |
| "helm_mmlu/Clinical Knowledge": 0.702, | |
| "helm_mmlu/Conceptual Physics": 0.677, | |
| "helm_mmlu/Electrical Engineering": 0.621, | |
| "helm_mmlu/Elementary Mathematics": 0.497, | |
| "helm_mmlu/Formal Logic": 0.444, | |
| "helm_mmlu/High School World History": 0.797, | |
| "helm_mmlu/Human Sexuality": 0.794, | |
| "helm_mmlu/International Law": 0.835, | |
| "helm_mmlu/Logical Fallacies": 0.706, | |
| "helm_mmlu/Machine Learning": 0.536, | |
| "helm_mmlu/Management": 0.786, | |
| "helm_mmlu/Marketing": 0.885, | |
| "helm_mmlu/Medical Genetics": 0.67, | |
| "helm_mmlu/Miscellaneous": 0.865, | |
| "helm_mmlu/Moral Scenarios": 0.465, | |
| "helm_mmlu/Nutrition": 0.745, | |
| "helm_mmlu/Prehistory": 0.796, | |
| "helm_mmlu/Public Relations": 0.682, | |
| "helm_mmlu/Security Studies": 0.743, | |
| "helm_mmlu/Sociology": 0.891, | |
| "helm_mmlu/Virology": 0.53, | |
| "helm_mmlu/World Religions": 0.813, | |
| "helm_mmlu/Mean win rate": 0.887 | |
| } | |
| }, | |
| { | |
| "id": "ai21/Jurassic-2-Grande-17B", | |
| "name": "Jurassic-2 Grande 17B", | |
| "developer": "ai21", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "helm_classic/Mean win rate": 0.743, | |
| "helm_classic/MMLU": 0.475, | |
| "helm_classic/BoolQ": 0.826, | |
| "helm_classic/NarrativeQA": 0.737, | |
| "helm_classic/NaturalQuestions (open-book)": 0.639, | |
| "helm_classic/QuAC": 0.418, | |
| "helm_classic/HellaSwag": 0.781, | |
| "helm_classic/OpenbookQA": 0.542, | |
| "helm_classic/TruthfulQA": 0.348, | |
| "helm_classic/MS MARCO (TREC)": 0.514, | |
| "helm_classic/CNN/DailyMail": 0.144, | |
| "helm_classic/XSUM": 0.167, | |
| "helm_classic/IMDB": 0.938, | |
| "helm_classic/CivilComments": 0.547, | |
| "helm_classic/RAFT": 0.712 | |
| } | |
| }, | |
| { | |
| "id": "ai21/Jurassic-2-Jumbo-178B", | |
| "name": "Jurassic-2 Jumbo 178B", | |
| "developer": "ai21", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "helm_classic/Mean win rate": 0.824, | |
| "helm_classic/MMLU": 0.48, | |
| "helm_classic/BoolQ": 0.829, | |
| "helm_classic/NarrativeQA": 0.733, | |
| "helm_classic/NaturalQuestions (open-book)": 0.669, | |
| "helm_classic/QuAC": 0.435, | |
| "helm_classic/HellaSwag": 0.788, | |
| "helm_classic/OpenbookQA": 0.558, | |
| "helm_classic/TruthfulQA": 0.437, | |
| "helm_classic/MS MARCO (TREC)": 0.661, | |
| "helm_classic/CNN/DailyMail": 0.149, | |
| "helm_classic/XSUM": 0.182, | |
| "helm_classic/IMDB": 0.938, | |
| "helm_classic/CivilComments": 0.57, | |
| "helm_classic/RAFT": 0.746 | |
| } | |
| }, | |
| { | |
| "id": "ai21/Jurassic-2-Large-7.5B", | |
| "name": "Jurassic-2 Large 7.5B", | |
| "developer": "ai21", | |
| "evaluator_relationship": null, | |
| "benchmark_scores": { | |
| "helm_classic/Mean win rate": 0.553, | |
| "helm_classic/MMLU": 0.339, | |
| "helm_classic/BoolQ": 0.742, | |
| "helm_classic/NarrativeQA": -1.0, | |
| "helm_classic/NaturalQuestions (open-book)": 0.589, | |
| "helm_classic/QuAC": -1.0, | |
| "helm_classic/HellaSwag": 0.729, | |
| "helm_classic/OpenbookQA": 0.53, | |
| "helm_classic/TruthfulQA": 0.245, | |
| "helm_classic/MS MARCO (TREC)": 0.464, | |
| "helm_classic/CNN/DailyMail": 0.136, | |
| "helm_classic/XSUM": 0.142, | |
| "helm_classic/IMDB": 0.956, | |
| "helm_classic/CivilComments": 0.57, | |
| "helm_classic/RAFT": 0.622 | |
| } | |
| } | |
| ] | |
| } |