lemer-bk / results /lemer-bf16-math-temp0.json
lthn's picture
data: add benchmark result lemer-bf16-math-temp0.json
ccb779a verified
{
"model": "/Users/snider/.cache/huggingface/hub/models--lthn--lemer/snapshots/083106e3de02de3606452d58fa0ee2fcb2faff92",
"benchmark": "MMLU-Pro",
"method": "rapid-mlx + openai SDK + google parse_response",
"temperature": 0.0,
"processor": "google/gemma-4-E2B-it",
"num_runs": 1,
"limit_per_subject": 20,
"runs": [
{
"math": {
"correct": 11,
"total": 20,
"accuracy": 0.55
},
"_overall": {
"correct": 11,
"total": 20,
"accuracy": 0.55
},
"_elapsed_s": 345.0
}
]
}