File size: 874 Bytes
ef18673
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""Benchmark harness registration for SAGE."""

from __future__ import annotations

from dataclasses import dataclass


@dataclass(frozen=True)
class BenchmarkResult:
    """A normalized benchmark result."""

    name: str
    status: str
    score: float | None
    detail: str


BENCHMARKS = (
    "hellaswag",
    "winogrande",
    "arc_easy",
    "arc_challenge",
    "gsm8k",
    "math",
    "humaneval",
    "mbpp",
)


def run_registered_benchmarks(model, tokenizer=None) -> list[BenchmarkResult]:
    """Return a lightweight result set for the configured benchmarks."""
    _ = model
    _ = tokenizer
    return [
        BenchmarkResult(
            name=name,
            status="skipped",
            score=None,
            detail="Benchmark harness registered; dataset/task execution is external to unit tests.",
        )
        for name in BENCHMARKS
    ]