GitHub Actions
chore: sync EEE pipeline output [2026-04-07 05:13 UTC]
ddebd57
raw
history blame
2.69 kB
{
"developer": "AI2",
"models": [
{
"id": "ai2/llama-2-chat-7b-nectar-3.8m.json",
"name": "ai2/llama-2-chat-7b-nectar-3.8m.json",
"developer": "AI2",
"evaluator_relationship": null,
"benchmark_scores": {
"reward-bench/Score": 0.5843,
"reward-bench/Chat": 0.8631,
"reward-bench/Chat Hard": 0.2654,
"reward-bench/Safety": 0.6243
}
},
{
"id": "ai2/llama-2-chat-nectar-180k.json",
"name": "ai2/llama-2-chat-nectar-180k.json",
"developer": "AI2",
"evaluator_relationship": null,
"benchmark_scores": {
"reward-bench/Score": 0.5235,
"reward-bench/Chat": 0.8827,
"reward-bench/Chat Hard": 0.2851,
"reward-bench/Safety": 0.4027
}
},
{
"id": "ai2/llama-2-chat-ultrafeedback-60k.jsonl",
"name": "ai2/llama-2-chat-ultrafeedback-60k.jsonl",
"developer": "AI2",
"evaluator_relationship": null,
"benchmark_scores": {
"reward-bench/Score": 0.644,
"reward-bench/Chat": 0.9441,
"reward-bench/Chat Hard": 0.4539,
"reward-bench/Safety": 0.5338
}
},
{
"id": "ai2/tulu-2-7b-rm-v0-nectar-binarized-3.8m-check...",
"name": "ai2/tulu-2-7b-rm-v0-nectar-binarized-3.8m-check...",
"developer": "AI2",
"evaluator_relationship": null,
"benchmark_scores": {
"reward-bench/Score": 0.7008,
"reward-bench/Chat": 0.9385,
"reward-bench/Chat Hard": 0.3882,
"reward-bench/Safety": 0.7757
}
},
{
"id": "ai2/tulu-2-7b-rm-v0-nectar-binarized-700k.json",
"name": "ai2/tulu-2-7b-rm-v0-nectar-binarized-700k.json",
"developer": "AI2",
"evaluator_relationship": null,
"benchmark_scores": {
"reward-bench/Score": 0.7127,
"reward-bench/Chat": 0.9358,
"reward-bench/Chat Hard": 0.4079,
"reward-bench/Safety": 0.7946
}
},
{
"id": "ai2/tulu-2-7b-rm-v0-nectar-binarized.json",
"name": "ai2/tulu-2-7b-rm-v0-nectar-binarized.json",
"developer": "AI2",
"evaluator_relationship": null,
"benchmark_scores": {
"reward-bench/Score": 0.6756,
"reward-bench/Chat": 0.9134,
"reward-bench/Chat Hard": 0.3904,
"reward-bench/Safety": 0.723
}
},
{
"id": "ai2/tulu-2-7b-rm-v0.json",
"name": "ai2/tulu-2-7b-rm-v0.json",
"developer": "AI2",
"evaluator_relationship": null,
"benchmark_scores": {
"reward-bench/Score": 0.6655,
"reward-bench/Chat": 0.933,
"reward-bench/Chat Hard": 0.4539,
"reward-bench/Safety": 0.6095
}
}
]
}