| """ |
| chess_engine.py |
| ─────────────── |
| Thin wrapper around python-chess providing: |
| - Board state management |
| - Legal move validation and parsing |
| - FEN / SAN / UCI conversion helpers |
| - Reward calculation after game end |
| """ |
|
|
| import chess |
| import chess.pgn |
| import random |
| from typing import Optional |
|
|
|
|
| class ChessEngine: |
| """Manages a single game of chess and exposes helpers for the agent loop.""" |
|
|
| def __init__(self): |
| self.board = chess.Board() |
|
|
| |
|
|
| @property |
| def fen(self) -> str: |
| return self.board.fen() |
|
|
| @property |
| def turn(self) -> str: |
| return "white" if self.board.turn == chess.WHITE else "black" |
|
|
| @property |
| def move_number(self) -> int: |
| return self.board.fullmove_number |
|
|
| @property |
| def is_game_over(self) -> bool: |
| return self.board.is_game_over() |
|
|
| @property |
| def result(self) -> Optional[str]: |
| """Returns '1-0', '0-1', '1/2-1/2', or None if game is ongoing.""" |
| if not self.board.is_game_over(): |
| return None |
| outcome = self.board.outcome() |
| if outcome is None: |
| return "1/2-1/2" |
| if outcome.winner == chess.WHITE: |
| return "1-0" |
| if outcome.winner == chess.BLACK: |
| return "0-1" |
| return "1/2-1/2" |
|
|
| @property |
| def legal_moves_uci(self) -> list[str]: |
| return [m.uci() for m in self.board.legal_moves] |
|
|
| @property |
| def legal_moves_san(self) -> list[str]: |
| return [self.board.san(m) for m in self.board.legal_moves] |
|
|
| def reset(self): |
| self.board = chess.Board() |
|
|
| |
|
|
| def apply_move_uci(self, uci: str) -> Optional[str]: |
| """ |
| Apply a UCI move (e.g. 'e2e4') to the board. |
| Returns the SAN string on success, None if the move is illegal. |
| """ |
| try: |
| move = chess.Move.from_uci(uci) |
| if move not in self.board.legal_moves: |
| return None |
| san = self.board.san(move) |
| self.board.push(move) |
| return san |
| except (ValueError, chess.InvalidMoveError): |
| return None |
|
|
| def apply_move_san(self, san: str) -> Optional[str]: |
| """ |
| Apply a SAN move (e.g. 'Nf3') to the board. |
| Returns the UCI string on success, None if illegal. |
| """ |
| try: |
| move = self.board.parse_san(san) |
| uci = move.uci() |
| self.board.push(move) |
| return uci |
| except (ValueError, chess.InvalidMoveError, chess.AmbiguousMoveError): |
| return None |
|
|
| |
|
|
| def parse_model_output(self, text: str) -> Optional[str]: |
| """ |
| Extract the first plausible chess move from raw model output. |
| Tries SAN first, then UCI. Returns the SAN string if valid, else None. |
| """ |
| |
| tokens = text.strip().split() |
| for token in tokens[:5]: |
| clean = token.strip(".,!?;:()") |
| |
| try: |
| move = self.board.parse_san(clean) |
| if move in self.board.legal_moves: |
| return self.board.san(move) |
| except Exception: |
| pass |
| |
| try: |
| move = chess.Move.from_uci(clean) |
| if move in self.board.legal_moves: |
| return self.board.san(move) |
| except Exception: |
| pass |
| return None |
|
|
| def random_legal_move_san(self) -> Optional[str]: |
| """Return a random legal move in SAN notation (fallback).""" |
| legal = list(self.board.legal_moves) |
| if not legal: |
| return None |
| move = random.choice(legal) |
| return self.board.san(move) |
|
|
| |
|
|
| def compute_reward(self, agent_color: str) -> float: |
| """ |
| Terminal reward for the agent after the game ends. |
| +1.0 win |
| -1.0 loss |
| 0.0 draw or game not over |
| """ |
| result = self.result |
| if result is None: |
| return 0.0 |
| if result == "1-0": |
| return 1.0 if agent_color == "white" else -1.0 |
| if result == "0-1": |
| return 1.0 if agent_color == "black" else -1.0 |
| return 0.0 |
|
|
| |
|
|
| def build_prompt(self, agent_color: str, move_history: list[str]) -> str: |
| """ |
| Build the text prompt fed to Qwen for move generation. |
| Keeps it short so the model stays focused on the move token. |
| """ |
| history_str = " ".join(move_history[-20:]) if move_history else "(opening)" |
| legal_sample = ", ".join(self.legal_moves_san[:10]) |
| return ( |
| f"You are a chess engine playing as {agent_color}.\n" |
| f"Position (FEN): {self.fen}\n" |
| f"Move history: {history_str}\n" |
| f"Some legal moves: {legal_sample}\n" |
| f"Reply with ONLY the single best next move in standard algebraic notation (SAN), " |
| f"e.g. 'e4' or 'Nf3'. Do not explain." |
| ) |
|
|
|
|