Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import random | |
| import sys | |
| from pathlib import Path | |
| from typing import Any | |
| REPO_ROOT = Path(__file__).resolve().parents[1] | |
| if str(REPO_ROOT) not in sys.path: | |
| sys.path.insert(0, str(REPO_ROOT)) | |
| from scripts.benchmark_datasets import resolve_benchmark_dataset | |
| from scripts.benchmark_suite import validate_sample | |
| def load_rows(path: Path) -> list[dict[str, Any]]: | |
| rows: list[dict[str, Any]] = [] | |
| for line in path.read_text(encoding="utf-8").splitlines(): | |
| if line.strip(): | |
| rows.append(json.loads(line)) | |
| return rows | |
| def build_subset_rows(*, source: Path, benchmark: str, n: int, seed: int) -> list[dict[str, Any]]: | |
| rows = load_rows(source) | |
| if n > len(rows): | |
| raise ValueError(f"requested subset size {n} exceeds dataset size {len(rows)}") | |
| chooser = random.Random(seed) | |
| selected_indices = sorted(chooser.sample(range(len(rows)), n)) | |
| subset: list[dict[str, Any]] = [] | |
| for index in selected_indices: | |
| row = dict(rows[index]) | |
| validate_sample(benchmark, row) | |
| row["source_row_id"] = index | |
| subset.append(row) | |
| return subset | |
| def write_subset(*, source: Path, benchmark: str, n: int, seed: int, out: Path) -> Path: | |
| subset = build_subset_rows(source=source, benchmark=benchmark, n=n, seed=seed) | |
| out.parent.mkdir(parents=True, exist_ok=True) | |
| out.write_text("".join(json.dumps(row) + "\n" for row in subset), encoding="utf-8") | |
| manifest_path = out.with_suffix(out.suffix + ".manifest.json") | |
| manifest = { | |
| "benchmark": benchmark, | |
| "n": n, | |
| "seed": seed, | |
| "source_path": str(source), | |
| "out_path": str(out), | |
| "source_row_ids": [row["source_row_id"] for row in subset], | |
| } | |
| manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8") | |
| return manifest_path | |
| def parse_args(argv: list[str] | None = None) -> argparse.Namespace: | |
| parser = argparse.ArgumentParser(description="Build a deterministic benchmark subset JSONL and manifest") | |
| parser.add_argument("--benchmark", required=True, choices=["MBPP", "GSM8K", "HumanEval", "ARC-Challenge"]) | |
| parser.add_argument("--samples", type=Path) | |
| parser.add_argument("--n", type=int, required=True) | |
| parser.add_argument("--seed", type=int, required=True) | |
| parser.add_argument("--out", type=Path, required=True) | |
| return parser.parse_args(argv) | |
| def main(argv: list[str] | None = None) -> int: | |
| args = parse_args(argv) | |
| source = resolve_benchmark_dataset(args.benchmark, args.samples) | |
| write_subset(source=source, benchmark=args.benchmark, n=args.n, seed=args.seed, out=args.out) | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |