general-eval-card / data /developers /aleph-alpha.json
GitHub Actions
chore: sync EEE pipeline output [2026-03-28 04:56 UTC]
2edd871
raw
history blame
2.53 kB
{
"developer": "aleph-alpha",
"models": [
{
"id": "aleph-alpha/Luminous-Base-13B",
"name": "Luminous Base 13B",
"developer": "aleph-alpha",
"evaluator_relationship": null,
"benchmark_scores": {
"helm_classic/Mean win rate": 0.315,
"helm_classic/MMLU": 0.27,
"helm_classic/BoolQ": 0.719,
"helm_classic/NarrativeQA": 0.605,
"helm_classic/NaturalQuestions (open-book)": 0.568,
"helm_classic/QuAC": 0.334,
"helm_classic/HellaSwag": -1.0,
"helm_classic/OpenbookQA": -1.0,
"helm_classic/TruthfulQA": 0.182,
"helm_classic/MS MARCO (TREC)": -1.0,
"helm_classic/CNN/DailyMail": 0.11,
"helm_classic/XSUM": 0.105,
"helm_classic/IMDB": 0.939,
"helm_classic/CivilComments": 0.544,
"helm_classic/RAFT": 0.473
}
},
{
"id": "aleph-alpha/Luminous-Extended-30B",
"name": "Luminous Extended 30B",
"developer": "aleph-alpha",
"evaluator_relationship": null,
"benchmark_scores": {
"helm_classic/Mean win rate": 0.485,
"helm_classic/MMLU": 0.321,
"helm_classic/BoolQ": 0.767,
"helm_classic/NarrativeQA": 0.665,
"helm_classic/NaturalQuestions (open-book)": 0.609,
"helm_classic/QuAC": 0.349,
"helm_classic/HellaSwag": -1.0,
"helm_classic/OpenbookQA": -1.0,
"helm_classic/TruthfulQA": 0.221,
"helm_classic/MS MARCO (TREC)": -1.0,
"helm_classic/CNN/DailyMail": 0.139,
"helm_classic/XSUM": 0.124,
"helm_classic/IMDB": 0.947,
"helm_classic/CivilComments": 0.524,
"helm_classic/RAFT": 0.523
}
},
{
"id": "aleph-alpha/Luminous-Supreme-70B",
"name": "Luminous Supreme 70B",
"developer": "aleph-alpha",
"evaluator_relationship": null,
"benchmark_scores": {
"helm_classic/Mean win rate": 0.662,
"helm_classic/MMLU": 0.38,
"helm_classic/BoolQ": 0.775,
"helm_classic/NarrativeQA": 0.711,
"helm_classic/NaturalQuestions (open-book)": 0.649,
"helm_classic/QuAC": 0.37,
"helm_classic/HellaSwag": -1.0,
"helm_classic/OpenbookQA": -1.0,
"helm_classic/TruthfulQA": 0.222,
"helm_classic/MS MARCO (TREC)": -1.0,
"helm_classic/CNN/DailyMail": 0.15,
"helm_classic/XSUM": 0.136,
"helm_classic/IMDB": 0.959,
"helm_classic/CivilComments": 0.562,
"helm_classic/RAFT": 0.653
}
}
]
}