0x960 / train /benchmark_uci.py
qtzx06's picture
feat: finalize swarm tooling and submission artifacts
eac9d9f
"""Benchmark a local Chess960 eval file against a UCI engine such as Stockfish."""
from __future__ import annotations
import argparse
import importlib.util
import math
import random
from collections.abc import Callable
from dataclasses import asdict, dataclass
from pathlib import Path
import chess
import chess.engine
from zero960.engine.search import select_move
EvalFn = Callable[[chess.Board], int]
@dataclass(slots=True)
class UciBenchmarkResult:
candidate_path: Path
engine_command: str
engine_options: dict[str, bool | int | float | str]
positions: int
max_plies: int
seed: int
candidate_depth: int | None
candidate_nodes: int | None
engine_depth: int | None
engine_nodes: int | None
wins: int
draws: int
losses: int
points: float
total_games: int
score: float
elo_delta_estimate: float
def to_json(self) -> dict[str, object]:
payload = asdict(self)
payload["candidate_path"] = str(self.candidate_path)
return payload
def _load_eval(path: Path) -> EvalFn:
spec = importlib.util.spec_from_file_location(f"zero960_uci_benchmark_{path.stem}", path)
if spec is None or spec.loader is None:
raise RuntimeError(f"failed to load module from {path}")
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
evaluate = getattr(module, "evaluate", None)
if evaluate is None or not callable(evaluate):
raise RuntimeError(f"{path} does not define evaluate(board)")
return evaluate
def _sample_positions(count: int, seed: int) -> list[int]:
rng = random.Random(seed)
population = list(range(960))
if count <= len(population):
return rng.sample(population, count)
return [rng.choice(population) for _ in range(count)]
def _elo_from_score(score: float) -> float:
clipped = min(max(score, 0.01), 0.99)
return -400.0 * math.log10((1.0 / clipped) - 1.0)
def _new_board(chess960_index: int) -> chess.Board:
board = chess.Board.from_chess960_pos(chess960_index)
board.chess960 = True
return board
def _engine_limit(depth: int | None, nodes: int | None) -> chess.engine.Limit:
if depth is not None:
return chess.engine.Limit(depth=depth)
if nodes is not None:
return chess.engine.Limit(nodes=nodes)
raise ValueError("expected depth or nodes limit")
def _parse_option_value(raw_value: str) -> bool | int | float | str:
lowered = raw_value.lower()
if lowered in {"true", "false"}:
return lowered == "true"
try:
return int(raw_value)
except ValueError:
pass
try:
return float(raw_value)
except ValueError:
pass
return raw_value
def _parse_engine_options(pairs: list[str]) -> dict[str, bool | int | float | str]:
options: dict[str, bool | int | float | str] = {}
for pair in pairs:
if "=" not in pair:
raise ValueError(f"invalid --engine-option {pair!r}; expected NAME=VALUE")
name, raw_value = pair.split("=", 1)
option_name = name.strip()
if not option_name:
raise ValueError(f"invalid --engine-option {pair!r}; missing option name")
options[option_name] = _parse_option_value(raw_value.strip())
return options
def _play_game_vs_engine(
chess960_index: int,
candidate_eval: EvalFn,
engine: chess.engine.SimpleEngine,
*,
candidate_is_white: bool,
candidate_depth: int | None,
candidate_nodes: int | None,
engine_depth: int | None,
engine_nodes: int | None,
max_plies: int,
) -> float:
board = _new_board(chess960_index)
candidate_limit = _engine_limit(candidate_depth, candidate_nodes)
opponent_limit = _engine_limit(engine_depth, engine_nodes)
for _ in range(max_plies):
if board.is_game_over(claim_draw=True):
break
candidate_turn = board.turn == chess.WHITE if candidate_is_white else board.turn == chess.BLACK
if candidate_turn:
if candidate_limit.depth is not None:
move = select_move(board, depth=candidate_limit.depth, eval_fn=candidate_eval)
else:
raise ValueError("candidate_nodes is not supported by the local engine path")
else:
result = engine.play(board, opponent_limit)
move = result.move
if move is None:
raise RuntimeError("UCI engine returned no move")
board.push(move)
result = board.result(claim_draw=True)
if result == "1-0":
return 1.0 if candidate_is_white else 0.0
if result == "0-1":
return 0.0 if candidate_is_white else 1.0
return 0.5
def benchmark_eval_vs_uci(
candidate_path: Path,
engine_command: str,
*,
engine_options: dict[str, bool | int | float | str] | None = None,
positions: int = 32,
candidate_depth: int = 2,
candidate_nodes: int | None = None,
engine_depth: int = 1,
engine_nodes: int | None = None,
max_plies: int = 120,
seed: int = 42,
) -> UciBenchmarkResult:
candidate_eval = _load_eval(candidate_path)
start_positions = _sample_positions(positions, seed)
configured_engine_options = dict(engine_options or {})
wins = 0
draws = 0
losses = 0
points = 0.0
with chess.engine.SimpleEngine.popen_uci(engine_command) as engine:
if configured_engine_options:
engine.configure(configured_engine_options)
for chess960_index in start_positions:
white_result = _play_game_vs_engine(
chess960_index,
candidate_eval,
engine,
candidate_is_white=True,
candidate_depth=candidate_depth,
candidate_nodes=candidate_nodes,
engine_depth=engine_depth,
engine_nodes=engine_nodes,
max_plies=max_plies,
)
points += white_result
if white_result == 1.0:
wins += 1
elif white_result == 0.5:
draws += 1
else:
losses += 1
black_result = _play_game_vs_engine(
chess960_index,
candidate_eval,
engine,
candidate_is_white=False,
candidate_depth=candidate_depth,
candidate_nodes=candidate_nodes,
engine_depth=engine_depth,
engine_nodes=engine_nodes,
max_plies=max_plies,
)
points += black_result
if black_result == 1.0:
wins += 1
elif black_result == 0.5:
draws += 1
else:
losses += 1
total_games = len(start_positions) * 2
score = points / total_games if total_games else 0.0
return UciBenchmarkResult(
candidate_path=candidate_path,
engine_command=engine_command,
engine_options=configured_engine_options,
positions=len(start_positions),
max_plies=max_plies,
seed=seed,
candidate_depth=candidate_depth,
candidate_nodes=candidate_nodes,
engine_depth=engine_depth,
engine_nodes=engine_nodes,
wins=wins,
draws=draws,
losses=losses,
points=points,
total_games=total_games,
score=score,
elo_delta_estimate=_elo_from_score(score),
)
def parse_args() -> argparse.Namespace:
root = Path(__file__).resolve().parents[1]
parser = argparse.ArgumentParser(description="Benchmark a local eval file against a UCI engine.")
parser.add_argument(
"--candidate-file",
default=str(root / "src/zero960/workspace_template/eval.py"),
help="Path to the candidate eval.py file.",
)
parser.add_argument(
"--engine-command",
default="stockfish",
help="UCI engine command, for example 'stockfish'.",
)
parser.add_argument(
"--engine-option",
action="append",
default=[],
help="Repeated engine option in NAME=VALUE form, for example UCI_LimitStrength=true.",
)
parser.add_argument("--positions", type=int, default=32)
parser.add_argument("--candidate-depth", type=int, default=2)
parser.add_argument("--candidate-nodes", type=int, default=None)
parser.add_argument("--engine-depth", type=int, default=1)
parser.add_argument("--engine-nodes", type=int, default=None)
parser.add_argument("--max-plies", type=int, default=120)
parser.add_argument("--seed", type=int, default=42)
return parser.parse_args()
def main() -> None:
args = parse_args()
candidate_path = Path(args.candidate_file).resolve()
engine_options = _parse_engine_options(args.engine_option)
result = benchmark_eval_vs_uci(
candidate_path,
args.engine_command,
engine_options=engine_options,
positions=args.positions,
candidate_depth=args.candidate_depth,
candidate_nodes=args.candidate_nodes,
engine_depth=args.engine_depth,
engine_nodes=args.engine_nodes,
max_plies=args.max_plies,
seed=args.seed,
)
print(f"candidate: {result.candidate_path}")
print(f"engine: {result.engine_command}")
if result.engine_options:
print(f"engine_options={result.engine_options}")
print(
f"positions={result.positions} max_plies={result.max_plies} games={result.total_games} seed={result.seed} "
f"candidate_depth={result.candidate_depth} engine_depth={result.engine_depth} "
f"candidate_nodes={result.candidate_nodes} engine_nodes={result.engine_nodes}"
)
print(
f"record={result.wins}-{result.draws}-{result.losses} "
f"points={result.points:.1f}/{result.total_games}"
)
print(f"score={result.score:.3f} elo_delta_estimate={result.elo_delta_estimate:.1f}")
if __name__ == "__main__":
main()