#!/usr/bin/env python3 from __future__ import annotations import argparse import json import random import sys from pathlib import Path from typing import Any REPO_ROOT = Path(__file__).resolve().parents[1] if str(REPO_ROOT) not in sys.path: sys.path.insert(0, str(REPO_ROOT)) from scripts.benchmark_datasets import resolve_benchmark_dataset from scripts.benchmark_suite import validate_sample def load_rows(path: Path) -> list[dict[str, Any]]: rows: list[dict[str, Any]] = [] for line in path.read_text(encoding="utf-8").splitlines(): if line.strip(): rows.append(json.loads(line)) return rows def build_subset_rows(*, source: Path, benchmark: str, n: int, seed: int) -> list[dict[str, Any]]: rows = load_rows(source) if n > len(rows): raise ValueError(f"requested subset size {n} exceeds dataset size {len(rows)}") chooser = random.Random(seed) selected_indices = sorted(chooser.sample(range(len(rows)), n)) subset: list[dict[str, Any]] = [] for index in selected_indices: row = dict(rows[index]) validate_sample(benchmark, row) row["source_row_id"] = index subset.append(row) return subset def write_subset(*, source: Path, benchmark: str, n: int, seed: int, out: Path) -> Path: subset = build_subset_rows(source=source, benchmark=benchmark, n=n, seed=seed) out.parent.mkdir(parents=True, exist_ok=True) out.write_text("".join(json.dumps(row) + "\n" for row in subset), encoding="utf-8") manifest_path = out.with_suffix(out.suffix + ".manifest.json") manifest = { "benchmark": benchmark, "n": n, "seed": seed, "source_path": str(source), "out_path": str(out), "source_row_ids": [row["source_row_id"] for row in subset], } manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8") return manifest_path def parse_args(argv: list[str] | None = None) -> argparse.Namespace: parser = argparse.ArgumentParser(description="Build a deterministic benchmark subset JSONL and manifest") parser.add_argument("--benchmark", required=True, choices=["MBPP", "GSM8K", "HumanEval", "ARC-Challenge"]) parser.add_argument("--samples", type=Path) parser.add_argument("--n", type=int, required=True) parser.add_argument("--seed", type=int, required=True) parser.add_argument("--out", type=Path, required=True) return parser.parse_args(argv) def main(argv: list[str] | None = None) -> int: args = parse_args(argv) source = resolve_benchmark_dataset(args.benchmark, args.samples) write_subset(source=source, benchmark=args.benchmark, n=args.n, seed=args.seed, out=args.out) return 0 if __name__ == "__main__": raise SystemExit(main())