File size: 874 Bytes
ef18673 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | """Benchmark harness registration for SAGE."""
from __future__ import annotations
from dataclasses import dataclass
@dataclass(frozen=True)
class BenchmarkResult:
"""A normalized benchmark result."""
name: str
status: str
score: float | None
detail: str
BENCHMARKS = (
"hellaswag",
"winogrande",
"arc_easy",
"arc_challenge",
"gsm8k",
"math",
"humaneval",
"mbpp",
)
def run_registered_benchmarks(model, tokenizer=None) -> list[BenchmarkResult]:
"""Return a lightweight result set for the configured benchmarks."""
_ = model
_ = tokenizer
return [
BenchmarkResult(
name=name,
status="skipped",
score=None,
detail="Benchmark harness registered; dataset/task execution is external to unit tests.",
)
for name in BENCHMARKS
]
|