Spaces:

kaupane
/

Chessformer_Demo

Running on Zero

App Files Files Community

kaupane commited on Jun 4, 2025

Commit

e8a0fd8

verified ·

1 Parent(s): e72f592

Upload 10 files

Browse files

Files changed (10) hide show

Dockerfile +25 -0
README.md +0 -14
app.py +771 -0
model.py +365 -0
requirements.txt +10 -0
utils/__init__.py +17 -0
utils/buffer.py +274 -0
utils/chess_env.py +151 -0
utils/engine.py +759 -0
utils/mapping.py +141 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM python:3.12
+# Install system dependencies including Stockfish
+RUN apt-get update && apt-get install -y \
+    stockfish \
+    && rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /code
+# Copy requirements and install Python dependencies
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Copy application code
+COPY . /code
+# Make sure Stockfish is executable
+RUN chmod +x /usr/bin/stockfish
+# Expose port
+EXPOSE 7860
+# Run the application
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,14 +0,0 @@
----
-title: Chessformer Demo
-emoji: 🌍
-colorFrom: purple
-colorTo: red
-sdk: gradio
-sdk_version: 5.32.1
-app_file: app.py
-pinned: false
-license: mit
-short_description: Play chess with Chessformer or Stockfish!
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,771 @@

+"""
+"""
+import gradio as gr
+import chess
+import chess.svg
+import chess.pgn
+import re
+import torch
+import os
+import io
+import math
+from typing import Optional, Tuple, List
+import traceback
+from datetime import datetime
+from utils import Engine, ChessformerConfig, StockfishConfig, UCI_MOVE_TO_IDX
+from model import ChessFormerModel
+from concurrent.futures import ThreadPoolExecutor
+import spaces
+from huggingface_hub import hf_hub_download
+# Add to ChessApp.__init__
+def __init__(self):
+    # ... existing init code ...
+    self.analysis_executor = ThreadPoolExecutor(max_workers=2)
+def update_evaluations_async(self):
+    """Update evaluations asynchronously"""
+    def update_current_engine():
+        if self.current_engine:
+            try:
+                self.current_engine_eval = self.current_engine.analyze_position(self.board.copy())
+                if self.current_engine_eval is None:
+                    self.current_engine_eval = 0.0
+            except:
+                self.current_engine_eval = 0.0
+    def update_stockfish():
+        try:
+            self.stockfish_eval = self.fast_stockfish_analysis(self.board.copy())
+            if self.stockfish_eval is None:
+                self.stockfish_eval = 0.0
+        except:
+            self.stockfish_eval = 0.0
+    # Run both analyses in parallel
+    future1 = self.analysis_executor.submit(update_current_engine)
+    future2 = self.analysis_executor.submit(update_stockfish)
+    # Wait for both to complete
+    future1.result()
+    future2.result()
+class ChessApp:
+    def __init__(self, device):
+        self.board = chess.Board()
+        self.move_history = []
+        self.current_engine = None
+        self.analysis_engine = None
+        self.game_over = False
+        self.user_color = chess.WHITE
+        self.models = {}
+        self.device = device
+        self.current_engine_eval = 0.0
+        self.stockfish_eval = 0.0
+        self.load_models()
+        self.create_analysis_engine()
+    def load_models(self):
+        model_paths = {
+            "ChessFormer-SL": "./ckpts/chessformer-sl_01.pth",
+            "ChessFormer-RL": "./ckpts/chessformer-rl_final.pth"
+        }
+        for name, path in model_paths.items():
+            if os.path.exists(path):
+                print(f"Loading {name} from {path}...")
+                checkpoint = torch.load(path,map_location=self.device)
+                config = checkpoint["config"]
+                model = ChessFormerModel(**config)
+                model.load_state_dict(checkpoint["model_state_dict"])
+                model.to(self.device)
+                model.eval()
+                self.models[name] = model
+                print(f"Successfully loaded {name}.")
+            else:
+                print(f"Model file not found: {path}")
+    def get_depth_limits(self, engine_type: str) -> Tuple[int,int]:
+        if engine_type == "Stockfish":
+            return 1,24,6
+        else:
+            return 0,6,0
+    def create_evaluation_bar(self, eval_score: float, title: str) -> str:
+        """Create HTML evaluation bar from user's perspective with page-matching colors"""
+        # Convert eval_score from white's perspective to user's perspective
+        user_eval = eval_score if self.user_color == chess.WHITE else -eval_score
+        # Clamp evaluation between -1 and 1 for display
+        clamped_eval = max(-1.0, min(1.0, user_eval))
+        # Convert to percentage (0 = user losing, 100 = user winning)
+        percentage = (clamped_eval + 1.0) / 2.0 * 100
+        # Format evaluation text from user's perspective
+        eval_text = f"{user_eval:+.2f}"
+        if abs(user_eval) > 5:
+            eval_text = "±∞" if user_eval > 0 else "∓∞"
+        # Determine advantage text and colors (matching page theme)
+        if user_eval > 0.5:
+            advantage_text = "WINNING"
+            text_color = "#1e40af"  # Blue-700
+            indicator_color = "#3b82f6"  # Blue-500
+        elif user_eval > 0.1:
+            advantage_text = "SLIGHT ADVANTAGE"
+            text_color = "#1e40af"
+            indicator_color = "#60a5fa"  # Blue-400
+        elif user_eval < -0.5:
+            advantage_text = "LOSING"
+            text_color = "#7c2d12"  # Orange-800 (more muted than red)
+            indicator_color = "#ea580c"  # Orange-600
+        elif user_eval < -0.1:
+            advantage_text = "SLIGHT DISADVANTAGE"
+            text_color = "#9a3412"  # Orange-700
+            indicator_color = "#f97316"  # Orange-500
+        else:
+            advantage_text = "EQUAL POSITION"
+            text_color = "#4b5563"  # Gray-600
+            indicator_color = "#6b7280"  # Gray-500
+        return f"""
+        <div style="margin: 10px 0; font-family: 'Segoe UI', Arial, sans-serif;">
+            <h4 style="margin: 5px 0 10px 0; color: #374151; font-size: 14px; font-weight: 600;">{title}</h4>
+            <!-- Evaluation bar with page-matching gradient -->
+            <div style="width: 100%; height: 40px; border: 2px solid #d1d5db; border-radius: 8px; position: relative;
+                        background: linear-gradient(to right,
+                            #fed7aa 0%,     /* Orange-200 - losing */
+                            #fde68a 20%,    /* Yellow-200 */
+                            #e5e7eb 50%,    /* Gray-200 - equal */
+                            #bfdbfe 80%,    /* Blue-200 */
+                            #93c5fd 100%    /* Blue-300 - winning */
+                        );
+                        box-shadow: inset 0 1px 3px rgba(0,0,0,0.05);">
+                <!-- Evaluation indicator -->
+                <div style="position: absolute; left: {percentage}%; top: 50%; transform: translateX(-50%) translateY(-50%);
+                            background: {indicator_color}; border: 3px solid white; border-radius: 50%; width: 18px; height: 18px;
+                            box-shadow: 0 2px 4px rgba(0,0,0,0.15), 0 0 0 1px #d1d5db; z-index: 10;
+                            transition: all 0.3s ease;"></div>
+            </div>
+            <!-- Evaluation text -->
+            <div style="text-align: center; margin-top: 8px; padding: 8px; background: #f9fafb;
+                        border-radius: 6px; border: 1px solid #e5e7eb;">
+                <div style="font-weight: 600; color: {text_color}; font-size: 16px; margin-bottom: 2px;">
+                    {eval_text}
+                </div>
+                <div style="font-size: 10px; color: {text_color}; text-transform: uppercase; letter-spacing: 0.8px; font-weight: 500; opacity: 0.8;">
+                    {advantage_text}
+                </div>
+            </div>
+        </div>
+        """
+    def create_analysis_engine(self):
+        """Create optimized Stockfish depth 27 engine for analysis"""
+        try:
+            config = StockfishConfig(
+                engine_path="/usr/games/stockfish",
+                depth=27
+            )
+            self.analysis_engine = Engine(type="stockfish", stockfish_config=config)
+            # Configure Stockfish for faster analysis
+            if self.analysis_engine and hasattr(self.analysis_engine, 'engine_path'):
+                # We'll patch the engine creation to use optimized settings
+                pass
+            print("Analysis engine (Stockfish depth 27) created successfully")
+        except Exception as e:
+            print(f"Failed to create analysis engine: {e}")
+            self.analysis_engine = None
+    def update_evaluations(self):
+        """Update evaluations from both engines with optimized Stockfish analysis"""
+        # Get current engine evaluation
+        if self.current_engine:
+            try:
+                self.current_engine_eval = self.current_engine.analyze_position(self.board.copy())
+                if self.current_engine_eval is None:
+                    self.current_engine_eval = 0.0
+            except:
+                self.current_engine_eval = 0.0
+        # Get optimized Stockfish analysis
+        if self.analysis_engine:
+            try:
+                self.stockfish_eval = self.fast_stockfish_analysis(self.board.copy())
+                if self.stockfish_eval is None:
+                    self.stockfish_eval = 0.0
+            except:
+                self.stockfish_eval = 0.0
+    def fast_stockfish_analysis(self, board: chess.Board) -> Optional[float]:
+        """Fast Stockfish analysis with optimized settings"""
+        try:
+            import chess.engine
+            # Create engine with optimized settings
+            with chess.engine.SimpleEngine.popen_uci("/usr/games/stockfish") as engine:
+                # Configure for speed
+                engine.configure({
+                    "Threads": min(8, os.cpu_count() or 4),  # Use multiple threads
+                    "Hash": 256,  # 256MB hash table
+                    "UCI_AnalyseMode": True
+                })
+                # Use time limit instead of depth for faster analysis
+                info = engine.analyse(
+                    board,
+                    chess.engine.Limit(time=1.0),  # 1 second analysis
+                )
+                score_obj = info.get("score")
+                if score_obj is None:
+                    return None
+                pov_score = score_obj.pov(chess.WHITE)
+                if pov_score.is_mate():
+                    mate_score = pov_score.mate()
+                    cp = 10000.0 if mate_score > 0 else -10000.0
+                elif pov_score.cp is not None:
+                    cp = float(pov_score.cp)
+                else:
+                    return None
+                # Normalize score
+                normalized_score = 2 / (1 + math.exp(-0.004 * cp)) - 1
+                return normalized_score
+        except Exception as e:
+            print(f"Fast Stockfish analysis error: {e}")
+            return None
+    def create_engine(self, engine_type: str, depth: int, temperature: float=0.5) -> Optional[Engine]:
+        if engine_type == "Stockfish":
+            config = StockfishConfig(
+                engine_path="/usr/games/stockfish",
+                depth=depth
+            )
+            return Engine(type="stockfish",stockfish_config=config)
+        elif engine_type in self.models:
+            config = ChessformerConfig(
+                chessformer=self.models[engine_type],
+                device=self.device,
+                temperature=temperature,
+                depth=depth if depth > 0 else 0,
+                top_k=8,
+                decay_rate=0.6,
+                max_batch_size=800
+            )
+            return Engine(type="chessformer",chessformer_config=config)
+        return None
+    def parse_move(self, move_str: str) -> Optional[chess.Move]:
+        """Parse move input in either UCI format ("e2e4") or algebraic notation ("Ne5")"""
+        if not move_str:
+            return None
+        move_str = move_str.strip()
+        # Try UCI format first
+        uci_pattern = r'^[a-h][1-8][a-h][1-8][qrbn]?$'
+        if re.match(uci_pattern,move_str.lower()):
+            try:
+                return chess.Move.from_uci(move_str.lower())
+            except ValueError:
+                pass
+        # Try algrebraic notation
+        try:
+            return self.board.parse_san(move_str)
+        except ValueError:
+            pass
+        return None
+    def get_board_svg(self) -> str:
+        """Generate SVG representation of the chess board"""
+        flip = (self.user_color == chess.BLACK)
+        lastmove = None
+        if self.move_history:
+            lastmove = self.move_history[-1]
+        svg = chess.svg.board(
+            board=self.board,
+            flipped=flip,
+            lastmove=lastmove,
+            size=600
+        )
+        return svg
+    def get_move_history_text(self) -> str:
+        """Generate move history in PGN format"""
+        try:
+            game = chess.pgn.Game()
+            game.headers["Event"] = "ChessFormer Demo"
+            game.headers["Date"] = datetime.now().strftime("%Y.%m.%d")
+            game.headers["White"] = "You" if self.user_color == chess.WHITE else "Engine"
+            game.headers["Black"] = "Engine" if self.user_color == chess.WHITE else "You"
+            node = game
+            temp_board = chess.Board()
+            for move in self.move_history:
+                node = node.add_variation(move)
+                temp_board.push(move)
+            if self.game_over:
+                outcome = self.board.outcome()
+                if outcome:
+                    if outcome.winner == chess.WHITE:
+                        game.headers["Result"] = "1-0"
+                    elif outcome.winner == chess.BLACK:
+                        game.headers["Result"] = "0-1"
+                    else:
+                        game.headers["Result"] = "1/2-1/2"
+                else:
+                    game.headers["Result"] = "*"
+            else:
+                game.headers["Result"] = "*"
+            return str(game)
+        except Exception as e:
+            print(f"Error generating move history: {e}")
+            return "Move history unavailable"
+    def export_pgn(self) -> str:
+        return self.get_move_history_text()
+    def import_fen(self, fen: str) -> Tuple[str,str,str,str,str]:
+        try:
+            test_board = chess.Board(fen.strip())
+            self.board = test_board
+            self.move_history = []
+            self.game_over = False
+            self.update_evaluations()
+            return (
+                self.get_board_svg(),
+                self.get_move_history_text(),
+                f"Position loaded from FEN: {fen}",
+                "",
+                self.create_evaluation_bar(self.stockfish_eval, "Stockfish Analysis (from your perspective)"),
+                self.create_evaluation_bar(self.current_engine_eval, "Engine Analysis (from your perspective)")
+            )
+        except Exception as e:
+            return (
+                self.get_board_svg(),
+                self.get_move_history_text(),
+                f"Invalid FEN: {str(e)}",
+                "",
+                self.create_evaluation_bar(self.stockfish_eval, "Stockfish Analysis (from your perspective)"),
+                self.create_evaluation_bar(self.current_engine_eval, "Engine Analysis (from your perspective)")
+            )
+    def import_pgn(self, pgn_text: str) -> Tuple[str,str,str,str,str]:
+        try:
+            pgn_io = io.StringIO(pgn_text.strip())
+            game = chess.pgn.read_game(pgn_io)
+            if game is None:
+                raise ValueError("Could not parse PGN")
+            self.board = game.board()
+            self.move_history = []
+            for move in game.mainline_moves():
+                self.board.push(move)
+                self.move_history.append(move)
+            self.game_over = self.board.is_game_over()
+            self.update_evaluations()
+            return (
+                self.get_board_svg(),
+                self.get_move_history_text(),
+                f"Game loaded from PGN ({len(self.move_history)} moves)",
+                "",
+                self.create_evaluation_bar(self.stockfish_eval, "Stockfish Analysis (from your perspective)"),
+                self.create_evaluation_bar(self.current_engine_eval, "Engine Analysis (from your perspective)")
+            )
+        except Exception as e:
+            return (
+                self.get_board_svg(),
+                self.get_move_history_text(),
+                f"Invalid PGN: {str(e)}",
+                "",
+                self.create_evaluation_bar(self.stockfish_eval, "Stockfish Analysis (from your perspective)"),
+                self.create_evaluation_bar(self.current_engine_eval, "Engine Analysis (from your perspective)")
+            )
+    def make_user_move(self, move_str: str) -> Tuple[str,str,str,str,str,str]:
+        if self.game_over:
+            return (
+                self.get_board_svg(),
+                self.get_move_history_text(),
+                "Game is over. Click 'New Game' to start a new game.",
+                "",
+                self.create_evaluation_bar(self.stockfish_eval, "Stockfish Analysis (from your perspective)"),
+                self.create_evaluation_bar(self.current_engine_eval, "Engine Analysis (from your perspective)")
+            )
+        if self.board.turn != self.user_color:
+            return (
+                self.get_board_svg(),
+                self.get_move_history_text(),
+                "It's not your turn now!",
+                "",
+                self.create_evaluation_bar(self.stockfish_eval, "Stockfish Analysis (from your perspective)"),
+                self.create_evaluation_bar(self.current_engine_eval, "Engine Analysis (from your perspective)")
+            )
+        move = self.parse_move(move_str)
+        if move is None:
+            return (
+                self.get_board_svg(),
+                self.get_move_history_text(),
+                f"Invalid move: '{move_str}'. Try formats like 'e2e4' or 'Ne5'",
+                "",
+                self.create_evaluation_bar(self.stockfish_eval, "Stockfish Analysis (from your perspective)"),
+                self.create_evaluation_bar(self.current_engine_eval, "Engine Analysis (from your perspective)")
+            )
+        if move not in self.board.legal_moves:
+            return (
+                self.get_board_svg(),
+                self.get_move_history_text(),
+                f"Illegal move: '{move_str}'",
+                "",
+                self.create_evaluation_bar(self.stockfish_eval, "Stockfish Analysis (from your perspective)"),
+                self.create_evaluation_bar(self.current_engine_eval, "Engine Analysis (from your perspective)")
+            )
+        self.board.push(move)
+        self.move_history.append(move)
+        self.update_evaluations()
+        if self.board.is_game_over():
+            self.game_over = True
+            outcome = self.board.outcome()
+            if outcome:
+                if outcome.winner == self.user_color:
+                    status = "Congratulations! You won!"
+                elif outcome.winner is None:
+                    status = "Game drawn."
+                else:
+                    status = "You lost."
+                status += f" ({outcome.termination.name})"
+            else:
+                status = "Game over."
+            return (
+                self.get_board_svg(),
+                self.get_move_history_text(),
+                status,
+                "",
+                self.create_evaluation_bar(self.stockfish_eval, "Stockfish Analysis (from your perspective)"),
+                self.create_evaluation_bar(self.current_engine_eval, "Engine Analysis (from your perspective)")
+            )
+        # Get engine move
+        try:
+            engine_move_uci, engine_value = self.current_engine.move(self.board)
+            if engine_move_uci == "<claim_draw>":
+                self.game_over = True
+                status = "Engine claimed a draw."
+            else:
+                engine_move = chess.Move.from_uci(engine_move_uci)
+                self.board.push(engine_move)
+                self.move_history.append(engine_move)
+                if self.board.is_game_over():
+                    self.game_over = True
+                    outcome = self.board.outcome()
+                    if outcome:
+                        if outcome.winner == self.user_color:
+                            status = "🎉🏆 CONGRATULATIONS! YOU WON! 🏆🎉"
+                            status += f"\n🎯 Victory by {outcome.termination.name}! 🎯"
+                        elif outcome.winner is None:
+                            status = "🤝 GAME DRAWN 🤝"
+                            status += f"\n⚖️ Draw by {outcome.termination.name} ⚖️"
+                        else:
+                            status = "😔 YOU LOST 😔"
+                            status += f"\n💔 Defeated by {outcome.termination.name} 💔"
+                    else:
+                        status = "🏁 GAME OVER 🏁"
+                else:
+                    status = f"Engine played: {engine_move.uci()}."
+        except Exception as e:
+            status = f"Engine error: {str(e)}"
+            print(f"Engine error: {e}")
+            traceback.print_exc()
+        return (
+            self.get_board_svg(),
+            self.get_move_history_text(),
+            status,
+            "", # clear input
+            self.create_evaluation_bar(self.stockfish_eval, "Stockfish Analysis (from your perspective)"),
+            self.create_evaluation_bar(self.current_engine_eval, "Engine Analysis (from your perspective)")
+        )
+    def new_game(self, engine_type: str, depth: int, color: str, temperature: float) -> Tuple[str,str,str,str,str,str]:
+        "Start a new game"
+        self.board = chess.Board()
+        self.move_history = []
+        self.game_over = False
+        self.user_color = chess.WHITE if color == "White" else chess.BLACK
+        # Create new engine
+        self.current_engine = self.create_engine(engine_type, depth, temperature)
+        self.update_evaluations()
+        if self.current_engine is None:
+            status = f"Failed to create {engine_type} engine."
+        else:
+            status = f"New game started! You are playing {color} against {engine_type} (depth {depth})."
+            # If user is black, make engine move first
+            if self.user_color == chess.BLACK:
+                try:
+                    engine_move_uci, engine_value = self.current_engine.move(self.board)
+                    if engine_move_uci != "<claim_draw>":
+                        engine_move = chess.Move.from_uci(engine_move_uci)
+                        self.board.push(engine_move)
+                        self.move_history.append(engine_move)
+                        status += f" Engine opened with: {engine_move.uci()}"
+                except Exception as e:
+                    status += f" Engine error on first move: {str(e)}"
+        return (
+            self.get_board_svg(),
+            self.get_move_history_text(),
+            status,
+            "",
+            self.create_evaluation_bar(self.stockfish_eval, "Stockfish Analysis (from your perspective)"),
+            self.create_evaluation_bar(self.current_engine_eval, "Engine Analysis (from your perspective)")
+        )
+app = ChessApp(torch.device("cpu"))
+def create_interface():
+    """Create the Gradio interface with improved layout"""
+    with gr.Blocks(title="ChessFormer Demo", theme=gr.themes.Soft()) as interface:
+        gr.Markdown("# 🏆 ChessFormer Demo")
+        gr.Markdown("Play chess against ChessFormer models or Stockfish!")
+        with gr.Row():
+            # Left column - Analysis + History
+            with gr.Column(scale=1):
+                gr.Markdown("### 📊 Position Analysis")
+                # Stockfish Analysis
+                stockfish_eval_display = gr.HTML(
+                    value=app.create_evaluation_bar(0.0, "Stockfish Analysis"),
+                    label="Stockfish"
+                )
+                # Current Engine Analysis
+                current_engine_eval_display = gr.HTML(
+                    value=app.create_evaluation_bar(0.0, "Engine Analysis"),
+                    label="Engine"
+                )
+                # Move history
+                gr.Markdown("### 📝 Game History")
+                history_display = gr.Textbox(
+                    value=app.get_move_history_text(),
+                    label="PGN",
+                    lines=12,
+                    max_lines=15,
+                    interactive=False
+                )
+            # Middle column - Game Board + Controls
+            with gr.Column(scale=4):
+                # Chess board display
+                board_display = gr.HTML(
+                    value=app.get_board_svg(),
+                    label="Chess Board"
+                )
+                # Move input
+                with gr.Row():
+                    move_input = gr.Textbox(
+                        placeholder="Enter move (e.g., 'e2e4' or 'Ne5')",
+                        label="Your Move",
+                        scale=4
+                    )
+                    move_button = gr.Button("Make Move", variant="primary", scale=1)
+                # Game status
+                status_display = gr.Textbox(
+                    value="Click 'New Game' to start playing!",
+                    label="Game Status",
+                    interactive=False,
+                    lines=2
+                )
+            # Right column - Settings + Import/Export
+            with gr.Column(scale=2):
+                # Engine settings
+                gr.Markdown("### ⚙️ Game Settings")
+                engine_choices = ["Stockfish"] + list(app.models.keys())
+                engine_select = gr.Dropdown(
+                    choices=engine_choices,
+                    value="ChessFormer-SL" if engine_choices else None,
+                    label="Opponent Engine"
+                )
+                depth_slider = gr.Slider(
+                    minimum=0,
+                    maximum=6,
+                    value=0,
+                    step=1,
+                    label="Engine Depth"
+                )
+                color_select = gr.Radio(
+                    choices=["White", "Black"],
+                    value="White",
+                    label="Your Color"
+                )
+                temperature_slider = gr.Slider(
+                    minimum=0.1,
+                    maximum=2.0,
+                    value=0.5,
+                    step=0.1,
+                    label="Temperature (ChessFormer only)"
+                )
+                new_game_button = gr.Button("🔄 New Game", variant="secondary", size="lg")
+                # Import/Export section
+                gr.Markdown("### 📁 Import/Export")
+                with gr.Tabs():
+                    with gr.Tab("Import FEN"):
+                        fen_input = gr.Textbox(
+                            placeholder="rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1",
+                            label="FEN String",
+                            lines=2
+                        )
+                        import_fen_button = gr.Button("Import FEN")
+                    with gr.Tab("Import PGN"):
+                        pgn_input = gr.Textbox(
+                            placeholder="1. e4 e5 2. Nf3 Nc6...",
+                            label="PGN Text",
+                            lines=3
+                        )
+                        import_pgn_button = gr.Button("Import PGN")
+                    with gr.Tab("Export"):
+                        export_button = gr.Button("📁 Download PGN")
+                        export_output = gr.File(label="Download")
+        # Available models info
+        gr.Markdown("### 🤖 Available Models")
+        if app.models:
+            model_info = "**Loaded ChessFormer models:**\n" + "\n".join([f"• {name}" for name in app.models.keys()])
+        else:
+            model_info = "⚠️ No ChessFormer models found. Make sure model checkpoints are in the ./ckpts/ directory."
+        gr.Markdown(model_info)
+        # Function to update depth limits based on engine selection
+        def update_depth_limits(engine_type):
+            min_depth, max_depth, value = app.get_depth_limits(engine_type)
+            return gr.Slider(minimum=min_depth, maximum=max_depth, value=value, step=1)
+        # Function to export PGN
+        def export_pgn_file():
+            pgn_content = app.export_pgn()
+            filename = f"chess_game_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pgn"
+            with open(filename, 'w') as f:
+                f.write(pgn_content)
+            return filename
+        # Event handlers (same as before...)
+        engine_select.change(
+            fn=update_depth_limits,
+            inputs=[engine_select],
+            outputs=[depth_slider]
+        )
+        move_button.click(
+            fn=app.make_user_move,
+            inputs=[move_input],
+            outputs=[board_display, history_display, status_display, move_input,
+                    stockfish_eval_display, current_engine_eval_display]
+        )
+        move_input.submit(
+            fn=app.make_user_move,
+            inputs=[move_input],
+            outputs=[board_display, history_display, status_display, move_input,
+                    stockfish_eval_display, current_engine_eval_display]
+        )
+        new_game_button.click(
+            fn=app.new_game,
+            inputs=[engine_select, depth_slider, color_select, temperature_slider],
+            outputs=[board_display, history_display, status_display, move_input,
+                    stockfish_eval_display, current_engine_eval_display]
+        )
+        import_fen_button.click(
+            fn=app.import_fen,
+            inputs=[fen_input],
+            outputs=[board_display, history_display, status_display, fen_input,
+                    stockfish_eval_display, current_engine_eval_display]
+        )
+        import_pgn_button.click(
+            fn=app.import_pgn,
+            inputs=[pgn_input],
+            outputs=[board_display, history_display, status_display, pgn_input,
+                    stockfish_eval_display, current_engine_eval_display]
+        )
+        export_button.click(
+            fn=export_pgn_file,
+            outputs=[export_output]
+        )
+        # Auto-start a new game when interface loads
+        interface.load(
+            fn=app.new_game,
+            inputs=[gr.State("Stockfish"), gr.State(6), gr.State("White"), gr.State(0.5)],
+            outputs=[board_display, history_display, status_display, move_input,
+                    stockfish_eval_display, current_engine_eval_display]
+        )
+    return interface
+if __name__ == "__main__":
+    # Create and launch interface
+    interface = create_interface()
+    interface.launch()

model.py ADDED Viewed

	@@ -0,0 +1,365 @@

+import torch
+import torch.nn as nn
+from typing import List, Dict, Tuple
+from huggingface_hub import PyTorchModelHubMixin
+from utils import MAX_HALFMOVES, MAX_FULLMOVES, EMPTY_SQ_IDX, PIECE_TO_IDX, SQUARE_TO_IDX, IDX_TO_UCI_MOVE
+# --- Tokenizer --- #
+class FENTokenizer(nn.Module):
+    """Convert FEN (and repetitions) to a sequence of tokens"""
+    def __init__(self, hidden_size,dtype):
+        super().__init__()
+        self.side_embed = nn.Embedding(2,hidden_size,dtype=dtype) # black/white embedding
+        self.castling_embed_k = nn.Parameter(torch.randn(1,1,hidden_size,dtype=dtype))
+        self.castling_embed_q = nn.Parameter(torch.randn(1,1,hidden_size,dtype=dtype))
+        self.castling_embed_K = nn.Parameter(torch.randn(1,1,hidden_size,dtype=dtype))
+        self.castling_embed_Q = nn.Parameter(torch.randn(1,1,hidden_size,dtype=dtype))
+        self.no_castling_embed = nn.Parameter(torch.randn(1,1,hidden_size,dtype=dtype))
+        self.piece_embed = nn.Embedding(13,hidden_size,dtype=dtype) # 6 for white pieces, 6 for black pieces, 1 for empty
+        self.no_en_passant_embed = nn.Parameter(torch.randn(1,1,hidden_size,dtype=dtype)) # use positional embed for the target square, or a special one for '-'
+        self.half_move_embed = nn.Embedding(MAX_HALFMOVES,hidden_size,dtype=dtype)
+        self.full_move_embed = nn.Embedding(MAX_FULLMOVES,hidden_size,dtype=dtype)
+        self.repetition_embed = nn.Embedding(3,hidden_size,dtype=dtype)
+        self.pos_embed = nn.Embedding(64,hidden_size,dtype=dtype) # positional embedding
+    def _parse_fen_string(self, fen_str: str) -> Dict:
+        parts = fen_str.split()
+        if len(parts) != 6:
+            raise ValueError(f"Invalid FEN string: {fen_str}. Expected 6 fields")
+        return {
+            "piece_placement": parts[0],
+            "side_to_move": parts[1],
+            "castling": parts[2],
+            "en_passant": parts[3],
+            "halfmove_clock": parts[4],
+            "fullmove_number": parts[5],
+        }
+    def forward(self, fen_list: List[str], repetitions: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            fen: List of fen strings
+        Returns:
+            torch tensor of shape (n_fen,73,hidden_size) where 73 tokens consists of:
+                64 piece tokens (fen's first field) +
+                1 which-side-to-move token (fen's second field) +
+                4 casting rights tokens (fen's third field) +
+                1 en-passant target token (fen's fourth field) +
+                1 half move clock token (fen's fifth field) +
+                1 full move number token (fen's fifth field) +
+                1 repetition count token (repetitions input)
+        """
+        batch_size = len(fen_list)
+        assert batch_size == repetitions.shape[0]
+        assert len(repetitions.size()) == 1
+        batch_tokens = []
+        device = self.side_embed.weight.device
+        # Precompute all square indices
+        square_indices = torch.arange(64, device=device)
+        all_pos_embeds = self.pos_embed(square_indices) # (64,D)
+        for fen_str in fen_list:
+            parsed_fen = self._parse_fen_string(fen_str)
+            tokens = []
+            # --- 1. Piece Placement (64 tokens) ---
+            piece_indices = torch.full((64,), EMPTY_SQ_IDX, dtype=torch.long, device=device)
+            current_rank = 7 # Start from rank 8
+            current_file = 0 # Start from file 'a'
+            for char in parsed_fen["piece_placement"]:
+                if char == '/':
+                    current_rank -= 1
+                    current_file = 0
+                elif char.isdigit():
+                    current_file += int(char)
+                elif char in PIECE_TO_IDX:
+                    sq_idx = current_rank * 8 + current_file
+                    if 0 <= sq_idx < 64:
+                         piece_indices[sq_idx] = PIECE_TO_IDX[char]
+                    else:
+                         raise ValueError(f"Invalid FEN piece placement: {parsed_fen['piece_placement']}")
+                    current_file += 1
+                else:
+                     raise ValueError(f"Invalid character in FEN piece placement: {char}")
+            piece_embeds = self.piece_embed(piece_indices) # (64, D)
+            # Add positional embeddings
+            board_tokens = piece_embeds + all_pos_embeds # (64, D)
+            tokens.append(board_tokens)
+            # --- 2. Side to Move (1 token) ---
+            side_idx = 0 if parsed_fen["side_to_move"] == 'w' else 1
+            side_token = self.side_embed(torch.tensor(side_idx, device=device)).unsqueeze(0) # (1, D)
+            tokens.append(side_token)
+            # --- 3. Castling Rights (4 tokens) ---
+            castling_str = parsed_fen["castling"]
+            castling_tokens = torch.cat([
+                self.castling_embed_K if 'K' in castling_str else self.no_castling_embed.expand(1, 1, -1),
+                self.castling_embed_Q if 'Q' in castling_str else self.no_castling_embed.expand(1, 1, -1),
+                self.castling_embed_k if 'k' in castling_str else self.no_castling_embed.expand(1, 1, -1),
+                self.castling_embed_q if 'q' in castling_str else self.no_castling_embed.expand(1, 1, -1)
+            ], dim=1).squeeze(0) # (4, D)
+            tokens.append(castling_tokens)
+            # --- 4. En Passant Target (1 token) ---
+            en_passant_str = parsed_fen["en_passant"]
+            if en_passant_str == '-':
+                en_passant_token = self.no_en_passant_embed.squeeze(0) # (1, D)
+            else:
+                if en_passant_str in SQUARE_TO_IDX:
+                    sq_idx = SQUARE_TO_IDX[en_passant_str]
+                    en_passant_token = self.pos_embed(torch.tensor(sq_idx, device=device)).unsqueeze(0) # (1, D)
+                else:
+                    raise ValueError(f"Invalid en passant square: {en_passant_str}")
+            tokens.append(en_passant_token)
+            # --- 5. Half Move Clock (1 token) ---
+            try:
+                half_move_int = int(parsed_fen["halfmove_clock"])
+            except ValueError:
+                 raise ValueError(f"Invalid halfmove clock value: {parsed_fen['halfmove_clock']}")
+            # Clamp value before embedding lookup
+            half_move_clamped = torch.clamp(torch.tensor(half_move_int, device=device), 0, MAX_HALFMOVES - 1)
+            half_move_token = self.half_move_embed(half_move_clamped).unsqueeze(0) # (1, D)
+            tokens.append(half_move_token)
+            # --- 6. Full Move Number (1 token) ---
+            try:
+                full_move_int = int(parsed_fen["fullmove_number"])
+            except ValueError:
+                 raise ValueError(f"Invalid fullmove number value: {parsed_fen['fullmove_number']}")
+             # Clamp value (min 1 for full moves) before embedding lookup (adjusting for 0-based index)
+            full_move_clamped = torch.clamp(torch.tensor(full_move_int, device=device), 1, MAX_FULLMOVES) - 1
+            full_move_token = self.full_move_embed(full_move_clamped).unsqueeze(0) # (1, D)
+            tokens.append(full_move_token)
+            # Concatenate all tokens for this FEN string
+            # Shapes: (64, D), (1, D), (4, D), (1, D), (1, D), (1, D) -> Total 72 tokens
+            fen_embedding = torch.cat(tokens, dim=0) # (72, D)
+            batch_tokens.append(fen_embedding)
+        # Stack into a batch
+        batch_tokens = torch.stack(batch_tokens, dim=0) # (B,72,D)
+        # ---7. Repetition Count (1 token) ---
+        repetitions = repetitions - 1 # from 1~3 to 0~2
+        repetitions = torch.clamp(repetitions,0,2) # if repetition count >3 but no player claimed a draw, it will be treated as 3 repetitions
+        repetition_tokens = self.repetition_embed(repetitions) # (B,D)
+        repetition_tokens = repetition_tokens.unsqueeze(1) # (B,1,D)
+        return torch.cat([batch_tokens,repetition_tokens], dim=1) # (B, 73, D)
+# --- Helper Modules --- #
+class SwiGLUFFN(nn.Module):
+    def __init__(self,
+                 d_model,
+                 dim_feedforward,
+                 dropout: float,
+                 bias_up: bool=False,
+                 bias_gate: bool=False,
+                 bias_down: bool=True,
+                 dtype=None):
+        super().__init__()
+        self.up_proj = nn.Linear(d_model,dim_feedforward,bias=bias_up,dtype=dtype)
+        self.gate_proj = nn.Linear(d_model,dim_feedforward,bias=bias_gate,dtype=dtype)
+        self.down_proj = nn.Linear(dim_feedforward,d_model,bias=bias_down,dtype=dtype)
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, x):
+        x = self.up_proj(x) * self.dropout(nn.functional.silu(self.gate_proj(x)))
+        return self.down_proj(x)
+class TransformerEncoderLayer(nn.Module):
+    """Custom transformer encoder layer with RMSNorm and SwiGLUFFN"""
+    def __init__(self,
+                 d_model: int,
+                 nhead: int,
+                 dim_feedforward: int,
+                 dropout: float,
+                 batch_first: bool=True,
+                 norm_first: bool=False,
+                 dtype=None):
+        super().__init__()
+        self.norm_first = norm_first
+        self.norm1 = nn.RMSNorm(d_model,dtype=dtype)
+        self.dropout_sa = nn.Dropout(dropout)
+        self.self_attn = nn.MultiheadAttention(
+            d_model,
+            nhead,
+            dropout=dropout,
+            bias=False,
+            batch_first=batch_first,
+            dtype=dtype
+        )
+        self.norm2 = nn.RMSNorm(d_model,dtype=dtype)
+        self.dropout_ff = nn.Dropout(dropout)
+        self.mlp = SwiGLUFFN(
+            d_model,
+            dim_feedforward,
+            dropout=dropout,
+            bias_up=False,
+            bias_gate=False,
+            bias_down=True,
+            dtype=dtype
+            )
+    def forward(self, x, return_attention=False):
+        if self.norm_first:
+            if return_attention:
+                x_norm = self.norm1(x)
+                attn_output, attn_weights = self._sa_block(x_norm,return_attention=True)
+                x = x + attn_output
+                x = x + self._ff_block(self.norm2(x))
+                return x, attn_weights
+            else:
+                x = x + self._sa_block(self.norm1(x))
+                x = x + self._ff_block(self.norm2(x))
+                return x
+        else:
+            if return_attention:
+                attn_output, attn_weights = self._sa_block(x, return_attention=True)
+                x = self.norm1(x + attn_output)
+                x = self.norm2(x + self._ff_block(x))
+                return x, attn_weights
+            else:
+                x = self.norm1(x + self._sa_block(x))
+                x = self.norm2(x + self._ff_block(x))
+                return x
+    def _sa_block(self, x, return_attention=False):
+        if return_attention:
+            attn_output, attn_weights = self.self_attn(x,x,x,need_weights=True,average_attn_weights=False)
+            return self.dropout_sa(attn_output), attn_weights
+        else:
+            x = self.self_attn(x,x,x)[0]
+            return self.dropout_sa(x)
+    def _ff_block(self,x):
+        x = self.mlp(x)
+        return self.dropout_ff(x)
+    nn.TransformerEncoderLayer
+# --- Model Arch --- #
+class ChessFormerModel(nn.Module, PyTorchModelHubMixin):
+    def __init__(self,
+                 num_blocks,
+                 hidden_size,
+                 intermediate_size,
+                 num_heads,
+                 dropout: float=0.00,
+                 possible_moves: int=len(IDX_TO_UCI_MOVE), # 1969 structurally valid moves
+                 dtype=None):
+        super().__init__()
+        self.fen_tokenizer = FENTokenizer(hidden_size,dtype=dtype)
+        self.act_token = nn.Parameter(torch.randn((1,1,hidden_size),dtype=dtype) * 0.02)
+        self.val_token = nn.Parameter(torch.randn((1,1,hidden_size),dtype=dtype) * 0.02)
+        self.act_proj = nn.Linear(hidden_size,possible_moves,dtype=dtype)
+        self.val_proj = nn.Linear(hidden_size,1,dtype=dtype)
+        self.blocks = nn.ModuleList(
+            TransformerEncoderLayer(
+                d_model=hidden_size,
+                nhead=num_heads,
+                dim_feedforward=intermediate_size,
+                dropout=dropout,
+                batch_first=True,
+                norm_first=True,
+                dtype=dtype
+            ) for _ in range(num_blocks)
+        )
+        self.dtype=dtype
+        self.possible_moves = possible_moves
+        self.final_norm = nn.RMSNorm(hidden_size)
+        self._initialize_weights()
+    def _initialize_weights(self):
+        """Initialize weights"""
+        for m in self.modules():
+            if isinstance(m,nn.Linear):
+                nn.init.kaiming_normal_(m.weight,mode='fan_in',nonlinearity='relu')
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Embedding):
+                nn.init.normal_(m.weight, std=0.02)
+            elif isinstance(m, nn.LayerNorm):
+                if hasattr(m, 'weight'):
+                    nn.init.constant_(m.weight, 1.0)
+                if hasattr(m, 'bias') and m.bias is not None:
+                    nn.init.constant_(m.weight, 0.0)
+            elif isinstance(m, nn.RMSNorm):
+                if hasattr(m, 'weight'):
+                    nn.init.constant_(m.weight, 1.0)
+        tokenizer_params = dict(self.fen_tokenizer.named_parameters())
+        params_to_init = [
+            self.act_token, self.val_token,
+            tokenizer_params.get('castling_embed_k'), tokenizer_params.get('castling_embed_q'),
+            tokenizer_params.get('castling_embed_K'), tokenizer_params.get('castling_embed_Q'),
+            tokenizer_params.get('no_castling_embed'), tokenizer_params.get('no_en_passant_embed')
+        ]
+        for param in params_to_init:
+            if param is not None and param.requires_grad:
+                nn.init.normal_(param, std=0.02)
+    def forward(self, fen: List[str], repetitions: torch.Tensor, return_attention: bool=False) -> torch.Tensor:
+        x = self.fen_tokenizer(fen,repetitions) # (B,73,D), pos embed are added here
+        bs = x.shape[0]
+        x = torch.cat([x,self.act_token.expand(bs,-1,-1),self.val_token.expand(bs,-1,-1)],dim=1) # (B,75,D)
+        attention_maps = [] if return_attention else None
+        for block in self.blocks:
+            if return_attention:
+                x, attn = block(x, return_attention=True)
+                attention_maps.append(attn)
+            else:
+                x = block(x)
+        x = self.final_norm(x)
+        act = x[:,-2,:]
+        val = x[:,-1,:]
+        act_logits = self.act_proj(act) # (B,1969)
+        val = self.val_proj(val) # (B,1)
+        if return_attention:
+            return act_logits, val.squeeze(1), attention_maps
+        else:
+            return act_logits, val.squeeze(1)
+def load_model(ckpt_path):
+    checkpoint = torch.load(ckpt_path)
+    model_config = checkpoint["model_config"]
+    model = ChessFormerModel(**model_config)
+    model.load_state_dict(checkpoint["model_state_dict"])
+    return model
+if __name__ == "__main__":
+    checkpoint = torch.load("./ckpts/chessformer-sl_01.pth",map_location=torch.device("cpu"))
+    model = ChessFormerModel(**checkpoint["config"])
+    model.load_state_dict(checkpoint["model_state_dict"])
+    model.push_to_hub("kaupane/ChessFormer-SL")

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+gradio
+torch
+python-chess
+chess
+huggingface-hub
+transformers
+numpy
+Pillow
+datasets

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from .buffer import ReplayBuffer, Game
+from .chess_env import BatchChessEnv
+from .engine import Engine, ChessformerConfig, StockfishConfig
+from .mapping import UCI_MOVE_TO_IDX, IDX_TO_UCI_MOVE, MAX_HALFMOVES, MAX_FULLMOVES, EMPTY_SQ_IDX, PIECE_TO_IDX, SQUARE_TO_IDX
+__all__ = ['ReplayBuffer',
+           'BatchChessEnv',
+           'Engine',
+           'Game',
+           'UCI_MOVE_TO_IDX',
+           'IDX_TO_UCI_MOVE',
+           'MAX_HALFMOVES',
+           'MAX_FULLMOVES',
+           'EMPTY_SQ_IDX',
+           'PIECE_TO_IDX',
+           'SQUARE_TO_IDX'
+           ]

utils/buffer.py ADDED Viewed

	@@ -0,0 +1,274 @@

+import torch
+from collections import deque
+import numpy as np
+from typing import List, Iterator, Tuple, Optional
+import chess
+class Game:
+    """
+    Represents a single chess game trajectory with all relevant data for RL training.
+    Acts as a *temporary* buffer inside loop
+    Handles:
+        - Storing trajectory data (fens, reps, actions, log_probs, values, invalid_masks)
+        - Tracking game status (active/complete)
+    """
+    def __init__(self):
+        self.active = True
+        self.valid = True
+        self.completion_reason = None
+        self.game_result = None
+        self.fens = []
+        self.repetition_counts = []
+        self.actions = []
+        self.values = []
+        self.log_probs = []
+        self.invalid_masks = []
+    def update_trajectory(self, fen, rep, act, val, logp, inv_m):
+        self.fens.append(fen)
+        self.repetition_counts.append(rep)
+        self.actions.append(act)
+        self.values.append(val)
+        self.log_probs.append(logp)
+        self.invalid_masks.append(inv_m)
+    def update_game_status(self, done, reason):
+        if done == True:
+            self.active = False
+            if reason in ["1-0","0-1","1/2-1/2"]:
+                self.completion_reason = reason
+                self.game_result = reason
+            else:
+                self.completion_reason = reason
+                self.game_result = None
+                self.valid = False
+    def get_white_trajectory(self):
+        """Extract the trajectory for white"""
+        indices = []
+        for i in range(len(self.fens) - 1):
+            board = chess.Board(self.fens[i])
+            if board.turn:  # True if white to move
+                indices.append(i)
+        return {
+            'fens': [self.fens[i] for i in indices],
+            'repetition_counts': [self.repetition_counts[i] for i in indices],
+            'actions': [self.actions[i] for i in indices],
+            'values': [self.values[i] for i in indices],
+            'log_probs': [self.log_probs[i] for i in indices],
+            'invalid_masks': [self.invalid_masks[i] for i in indices]
+        }
+    def get_black_trajectory(self):
+        """Extract the trajectory for black pieces."""
+        indices = []
+        for i in range(len(self.fens) - 1):
+            board = chess.Board(self.fens[i])
+            if not board.turn:  # False if black to move
+                indices.append(i)
+        return {
+            'fens': [self.fens[i] for i in indices],
+            'repetition_counts': [self.repetition_counts[i] for i in indices],
+            'actions': [self.actions[i] for i in indices],
+            'values': [self.values[i] for i in indices],
+            'log_probs': [self.log_probs[i] for i in indices],
+            'invalid_masks': [self.invalid_masks[i] for i in indices]
+        }
+class ReplayBuffer:
+    """
+    The buffer class for PPO reinforcement learning.
+    Handles:
+        - store samples including:
+            1. fens
+            2. reps
+            3. actions
+            4. log_probs
+            5. values
+            6. invalid_masks
+        - calculate advantage based on reward and value (7. advantage)
+        - output samples in batches
+    Since PPO is largely on-policy, so the replay buffer will not be so large that deque is not appropriate
+    """
+    def __init__(self,
+                 capacity: int,
+                 batch_size: int,
+                 gamma: float,
+                 gae_lambda: float,
+                 shuffle: bool=True
+    ):
+        self.gamma = gamma
+        self.gae_lambda = gae_lambda
+        self.fens = deque(maxlen=capacity)
+        self.repetition_counts = deque(maxlen=capacity)
+        self.actions = deque(maxlen=capacity)
+        self.log_probs = deque(maxlen=capacity)
+        self.values = deque(maxlen=capacity)
+        self.invalid_masks = deque(maxlen=capacity)
+        self.advantages = deque(maxlen=capacity)
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+    def push_game(self, game: Game):
+        """
+        Process a completed game and add its trajectories to the buffer.
+        Handles reward computation for both white and black players.
+        """
+        if not game.valid:
+            return
+        result = game.game_result
+        if result not in ["1-0","0-1","1/2-1/2"]:
+            raise ValueError(f"Result not recognized: {result}. Either an incompleted game was passed in, or game.update_game_status() method is wrong.")
+        if result == "1-0": result = 1
+        elif result == "0-1": result = -1
+        elif result == "1/2-1/2": result = 0
+        white_traj = game.get_white_trajectory()
+        if white_traj["fens"]:
+            self._process_trajectory(
+                white_traj["fens"],
+                white_traj["repetition_counts"],
+                white_traj["actions"],
+                white_traj["log_probs"],
+                white_traj["values"],
+                white_traj["invalid_masks"],
+                result
+            )
+        black_traj = game.get_black_trajectory()
+        if black_traj["fens"]:
+            self._process_trajectory(
+                black_traj["fens"],
+                black_traj["repetition_counts"],
+                black_traj["actions"],
+                black_traj["log_probs"],
+                black_traj["values"],
+                black_traj["invalid_masks"],
+                -result # flip reward for black's perspective
+            )
+    def _process_trajectory(self, fens, reps, actions, log_probs, values, invalid_masks, final_reward):
+        """Process a trajectory for one player, compute advantages and add to buffer"""
+        values_tensor = torch.tensor(values) if not torch.is_tensor(values) else values
+        advantages = self._compute_advantage(values_tensor, final_reward)
+        for i in range(len(fens)):
+            self.fens.append(fens[i])
+            self.repetition_counts.append(reps[i])
+            self.actions.append(actions[i])
+            self.log_probs.append(log_probs[i])
+            self.values.append(values[i])
+            self.invalid_masks.append(invalid_masks[i])
+            self.advantages.append(advantages[i])
+    def _compute_advantage(self, value_traj: torch.Tensor, final_reward: float) -> torch.Tensor:
+        """
+        Calculate GAE with only a terminal reward: r_t = 0 for t < T-1 and r_{T-1} = final_reward
+        Args:
+            value_traj: value prediction of the model
+            final_reward: game result
+        Returns:
+            advantage, torch.Tensor of shape same with value_traj
+        """
+        vals = value_traj.detach().cpu().float()
+        T = vals.shape[0] if vals.dim() > 0 else 1
+        adv = torch.zeros(T)
+        next_value = 0.0
+        gae = 0.0
+        for t in reversed(range(T)):
+            reward = final_reward if t == T-1 else 0.0
+            delta = reward + self.gamma * next_value - vals[t]
+            gae = delta + self.gamma * self.gae_lambda * gae
+            adv[t] = gae
+            next_value = vals[t]
+        return adv
+    def sample(self) -> Iterator[Tuple[List[str],   # fen
+                                       torch.Tensor,# rep
+                                       torch.Tensor,# act
+                                       torch.Tensor,# logp
+                                       torch.Tensor,# val
+                                       torch.Tensor,# inv_m
+                                       torch.Tensor]]: # adv
+        """Yield minibatches of size batch_size for training"""
+        n = len(self.fens)
+        if n < self.batch_size:
+            return
+        idxs = np.arange(n)
+        if self.shuffle:
+            np.random.shuffle(idxs)
+        for start in range(0, n, self.batch_size):
+            batch_idx = idxs[start:start+self.batch_size]
+            if len(batch_idx) < self.batch_size:
+                break
+            fens_b = [self.fens[i] for i in batch_idx]
+            reps_b = torch.stack([
+                self.repetition_counts[i].detach().clone() if torch.is_tensor(self.repetition_counts[i])
+                else torch.tensor(self.repetition_counts[i])
+                for i in batch_idx
+            ])
+            acts_b = torch.stack([
+                self.actions[i].detach().clone() if torch.is_tensor(self.actions[i])
+                else torch.tensor(self.actions[i])
+                for i in batch_idx
+            ])
+            logps_b = torch.stack([
+                self.log_probs[i].detach().clone() if torch.is_tensor(self.log_probs[i])
+                else torch.tensor(self.log_probs[i])
+                for i in batch_idx
+            ])
+            vals_b = torch.stack([
+                self.values[i].detach().clone() if torch.is_tensor(self.values[i])
+                else torch.tensor(self.values[i])
+                for i in batch_idx
+            ])
+            advs_b = torch.stack([
+                self.advantages[i].detach().clone() if torch.is_tensor(self.advantages[i])
+                else torch.tensor(self.advantages[i])
+                for i in batch_idx
+            ])
+            invs_b = torch.stack([
+                self.invalid_masks[i] if torch.is_tensor(self.invalid_masks[i])
+                else torch.tensor(self.invalid_masks[i])
+                for i in batch_idx
+            ])
+            yield fens_b, reps_b, acts_b, logps_b, vals_b, invs_b, advs_b
+    def __len__(self) -> int:
+        return len(self.fens)
+    def clear(self) -> None:
+        self.fens.clear()
+        self.repetition_counts.clear()
+        self.actions.clear()
+        self.log_probs.clear()
+        self.values.clear()
+        self.invalid_masks.clear()
+        self.advantages.clear()

utils/chess_env.py ADDED Viewed

	@@ -0,0 +1,151 @@

+"""Provide a gym-like environment for clarity"""
+import chess
+import torch
+import time
+from typing import List, Tuple, Dict
+try:
+    from .mapping import IDX_TO_UCI_MOVE, UCI_MOVE_TO_IDX
+except:
+    from mapping import IDX_TO_UCI_MOVE, UCI_MOVE_TO_IDX
+class BatchChessEnv:
+    """A single chess environment with sparse terminal reward"""
+    def __init__(self, batch_size: int, max_moves: int=200):
+        self.batch_size = batch_size
+        self.max_moves = max_moves
+        self.reset()
+    def reset(self) -> Tuple[List[str], torch.Tensor]:
+        """
+        Starts all games from the initial position
+        Returns:
+            fens (List[str]), repetition_counts (torch.Tensor of shape [batch_size,])
+        """
+        self.boards = [chess.Board() for _ in range(self.batch_size)]
+        self.move_counts = [0] * self.batch_size
+        self.done_flags = [False] * self.batch_size
+        fens = [self.boards[0].fen()] * self.batch_size
+        reps = torch.ones(self.batch_size,dtype=torch.long)
+        return fens, reps # (bs,)
+    def _compute_rep(self, board: chess.Board) -> int:
+        board_copy = board.copy()
+        trasposition_key = board_copy._transposition_key()
+        count = 0
+        while board_copy.move_stack:
+            board_copy.pop()
+            if board_copy._transposition_key() == trasposition_key:
+                count += 1
+        return count + 1 # 1 for fresh position
+    def step(self, uci_moves: List[str]) -> Tuple[List[str],    # next fens (next state)
+                                                  torch.Tensor, # next reps (next state)
+                                                  List[bool],   # dones
+                                                  List[Dict]]:  # infos
+        """
+        Apply one move per game in the batch.
+        Args:
+            uci_moves: list of UCI strings (plus "<claim_draw>")
+        Returns:
+            next_fens: new FENs for each game,                          List[str]
+            reps: repetition counts,                                    Tensor[batch_size]
+            dones: whether this game is now terminated,                 List[bool]
+            infos: info dict with 'result' key for completed games      List[dict]
+        """
+        next_fens, reps, dones, infos = [], [], [], []
+        for i, move in enumerate(uci_moves):
+            board = self.boards[i]
+            info = {
+                "max_steps_exceeded": False,
+                "truncation_due_to_error": False,
+                "result": None
+            }
+            done = self.done_flags[i]
+            if done:
+                # Game already done, pass through the existing state
+                next_fens.append(board.fen())
+                reps.append(1)
+                dones.append(True)
+                infos.append(info)
+                continue
+            if move == "0000":
+                # Skip through dummy moves
+                next_fens.append(board.fen())
+                reps.append(1)
+                dones.append(True)
+                infos.append(info)
+                continue
+            if board.is_game_over():
+                # Game already over
+                done = True
+                info["result"] = board.result()
+                next_fens.append(board.fen())
+                reps.append(self._compute_rep(board))
+                dones.append(done)
+                infos.append(info)
+                continue
+            try:
+                if move == "<claim_draw>":
+                    if board.can_claim_draw():
+                        done = True
+                        info['result'] = "1/2-1/2"
+                    else:
+                        raise ValueError(f"Invalid move ('<claim_draw>') passed in.")
+                else:
+                    try:
+                        m = chess.Move.from_uci(move)
+                        if m in board.legal_moves:
+                            board.push(m)
+                            self.move_counts[i] += 1
+                            if board.is_game_over():
+                                done = True
+                                info['result'] = board.result()
+                        else:
+                            raise ValueError(f"Invalid move ('{m.uci()}') passed in.")
+                    except Exception as e:
+                        done = True
+                        info['truncation_due_to_error'] = True
+                        print(f"Unexpected error: {e}")
+                if self.move_counts[i] >= self.max_moves:
+                    done = True
+                    info['max_steps_exceeded'] = True
+                    info['result'] = "1/2-1/2"
+                next_fens.append(board.fen())
+                reps.append(self._compute_rep(board))
+                dones.append(done)
+                infos.append(info)
+            except Exception as e:
+                print(f"Error processing move {move} for board {i}: {e}")
+                done = True
+                info["truncation_due_to_error"] = True
+                next_fens.append(board.fen())
+                reps.append(self._compute_rep(board))
+                dones.append(done)
+                infos.append(info)
+            self.done_flags[i] = done
+        reps = torch.tensor(reps,dtype=torch.long) # [bs,]
+        return next_fens, reps, dones, infos
+if __name__ == "__main__":
+    env = BatchChessEnv(1)
+    env.reset()
+    board = env.boards[0]
+    board.push(chess.Move.from_uci("e2e4"))
+    new_board = board.copy()
+    rep = env._compute_rep(new_board)
+    print(rep)

utils/engine.py ADDED Viewed

	@@ -0,0 +1,759 @@

+"""An engine class to provide a universal way to interact with both chessformer and stockfish"""
+import torch
+import chess
+import math
+import chess.engine
+import multiprocessing
+from dataclasses import dataclass, field
+from functools import partial
+import time
+import os
+try:
+    from .mapping import UCI_MOVE_TO_IDX, IDX_TO_UCI_MOVE
+except ImportError:
+    from mapping import UCI_MOVE_TO_IDX, IDX_TO_UCI_MOVE
+from torch.distributions import Categorical
+from typing import Optional, Tuple, List, Union
+@dataclass
+class ChessformerConfig:
+    chessformer: torch.nn.Module=None
+    device: Optional[torch.device]=None
+    temperature: float=0.5
+    depth: int=2
+    top_k: int=8
+    decay_rate: float=0.6
+    max_batch_size: int=896
+@dataclass
+class StockfishConfig:
+    engine_path: str="/usr/games/stockfish"
+    depth: int=16
+def _stockfish_worker(board_fen: str, engine_path: str, depth: int) -> Optional[Tuple[str, float]]:
+    """
+    Analyzes a single board FEN using a temporary Stockfish engine instance.
+    Designed for use with multiprocessing.
+    Returns the best move UCI and the normalized score [-1,1].
+    Does not handle draw claims explicitly as FEN lacks history.
+    Caller should check board.is_game_over() on the main board object.
+    """
+    engine = None
+    try:
+        engine = chess.engine.SimpleEngine.popen_uci(engine_path)
+        # initialize board from FEN - history is lost here
+        board = chess.Board(board_fen)
+        info = engine.analyse(board, chess.engine.Limit(depth=depth))
+        score_obj = info.get("score")
+        pv = info.get("pv")
+        if score_obj is None or pv is None or not pv:
+            # Analysis failed
+            print(f"Warning: Stockfish analysis failed for FEN: {board_fen}")
+            return None
+        best_move_uci = pv[0].uci()
+        pov_score = score_obj.pov(board.turn)
+        cp = None
+        if pov_score.is_mate():
+            mate_score = pov_score.mate()
+            cp = 10000.0 if mate_score > 0 else -10000.0
+        elif pov_score.cp is not None:
+            cp = float(pov_score.cp)
+        else:
+            print(f"Warning: Stockfish score object lacks cp/mate for FEN: {board_fen}")
+            return None # analysis is unclear
+        normalized_cp = 2 / (1 + math.exp(-0.004*cp)) - 1
+        return best_move_uci, normalized_cp
+    except (chess.engine.EngineError, chess.engine.EngineTerminatedError, FileNotFoundError, ValueError) as e:
+        print(f"Stockfish worker error for FEN {board_fen}: {e}")
+        return None
+    finally:
+        if engine:
+            engine.quit()
+def _compute_repetition_single(board: chess.Board) -> int:
+    """Compute repetition count. Used in _chessformer_move and _batch_chessformer_move"""
+    transposition_key = board._transposition_key()
+    count = 0
+    if board.move_stack:
+        if board._transposition_key() == transposition_key:
+            count = 1
+    else:
+        count = 1
+    try:
+        # Iterate back through history
+        while board.move_stack:
+            move = board.pop() # note that history is lost here
+            if board.is_irreversible(move):
+                break
+            if board._transposition_key() == transposition_key:
+                count += 1
+    except Exception as e:
+        print(f"Error occurred during repetition count for board {board.fen()}: {e}")
+        return 1 # fallback to 1
+    return max(1, count)
+# Engine class, designed to be used in the Evaluator class and app.py
+class Engine:
+    def __init__(self,
+                 type: str,
+                 chessformer_config: Optional[ChessformerConfig]=None,
+                 stockfish_config: Optional[StockfishConfig]=None):
+        self.type = type
+        if type == "chessformer":
+            if chessformer_config is None:
+                raise ValueError("ChessformerConfig must be provided for chessformer engine.")
+            self.config = chessformer_config
+            if self.config.chessformer is None:
+                raise ValueError("ChessFormer model must be provided in config.")
+            if self.config.device is None:
+                self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            elif isinstance(self.config.device, str):
+                self.device = torch.device(self.config.device)
+            else:
+                self.device = self.config.device
+            self.model = self.config.chessformer
+            self.model.to(self.device)
+            self.model.eval()
+            if not (self.config.temperature > 0):
+                raise ValueError("Temperature must be greater than 0.")
+            if not (self.config.top_k > 0):
+                raise ValueError("Top-k must be greater than 0.")
+            if not (self.config.depth >= 0):
+                raise ValueError("Depth must be greater than or equal to 0.")
+            if not (0.0 < self.config.decay_rate <= 1.0):
+                raise ValueError("Decay rate must be in range (0.0,1.0].")
+            if not (self.config.max_batch_size > 0):
+                raise ValueError("Max batch size must be an integer greater than 0.")
+            self.temperature = self.config.temperature
+            self.top_k = self.config.top_k
+            self.initial_k = self.top_k
+            self.depth = self.config.depth
+            self.decay_rate = self.config.decay_rate
+            self.max_batch_size = self.config.max_batch_size
+        elif type == "stockfish":
+            if stockfish_config is None:
+                raise ValueError("StockfishConfig must be provided for stockfish engine.")
+            self.config = stockfish_config
+            self.engine_path = self.config.engine_path
+            self.depth = self.config.depth
+            if self.config.engine_path is None:
+                raise ValueError("Engine path must be provided in config.")
+            try:
+                with chess.engine.SimpleEngine.popen_uci(self.config.engine_path) as test:
+                    pass
+            except (FileNotFoundError, chess.engine.EngineError) as e:
+                raise ValueError(f"Invalid engine path or engine not found: {e}")
+        else:
+            raise ValueError("Invalid engine type. Choose 'chessformer' or 'stockfish'.")
+    def get_invalid_mask(self, boards: List[chess.Board]) -> torch.Tensor:
+        bs = len(boards)
+        possible_moves = len(UCI_MOVE_TO_IDX)
+        invalid_mask = torch.full((bs,possible_moves), -torch.inf, dtype=torch.float32, device=self.device)
+        for idx,board in enumerate(boards):
+            if board.is_game_over(claim_draw=True):
+                continue # leave all as -inf
+            legal_moves = list(board.legal_moves)
+            legal_move_ids = [UCI_MOVE_TO_IDX[move.uci()] for move in legal_moves]
+            if legal_move_ids:
+                invalid_mask[idx,legal_move_ids] = 0
+            if board.can_claim_draw():
+                invalid_mask[idx,0] = 0
+        return invalid_mask
+    def compute_repetition(self, boards: List[chess.Board]) -> torch.Tensor:
+        """Use multiprocessing to compute repetition count for a batch of boards."""
+        bs = len(boards)
+        num_workers = min(bs, max(1, os.cpu_count()//2 if os.cpu_count else 1))
+        if bs < num_workers * 2: # avoid overhead for very small batches per worker
+            num_workers = max(1, bs//2)
+        try:
+            if num_workers > 1 and bs > 1:
+                board_copies = [board.copy(stack=True) for board in boards]
+                with multiprocessing.Pool(processes=num_workers) as pool:
+                    counts = pool.map(_compute_repetition_single, board_copies)
+            else:
+                # Run sequentially if only one worker needed or very small batch
+                counts = [_compute_repetition_single(b.copy(stack=True)) for b in boards]
+            counts_tensor = torch.tensor(counts, dtype=torch.long, device=self.device)
+            return counts_tensor # (B,)
+        except Exception as e:
+            print(f"Error during batch repetition computation: {e}")
+            # Fall back to single board computation if multiprocessing fails
+            return torch.ones((bs,),dtype=torch.long, device=self.device)
+    def _raw_chessformer_move(self, board: chess.Board, return_perplexity: bool=False) -> Tuple[str,float]:
+        """Get the next move from ChessFormer model with optional tactical verification."""
+        # Get FEN
+        fen = board.fen()
+        # Compute repetition
+        count_tensor = self.compute_repetition([board])
+        move_logits, value = self.model([fen],count_tensor)
+        move_logits = move_logits.squeeze(0) # remove batch dimension since it will always be 1
+        value = value.squeeze(0).item()
+        # Calculate invalid mask
+        legal_moves = list(board.legal_moves)
+        if not legal_moves and not board.can_claim_draw():
+            # No legal moves. Should not happen if this function is called correctly, but it wouldn't hurt to add a check
+            return None
+        legal_move_ids = [UCI_MOVE_TO_IDX[move.uci()] for move in legal_moves]
+        invalid_mask = torch.ones_like(move_logits) * (-torch.inf)
+        invalid_mask[legal_move_ids] = 0
+        if board.can_claim_draw():
+            invalid_mask[0] = 0
+        move_logits = move_logits + invalid_mask
+        if return_perplexity:
+            probs = torch.softmax(move_logits, dim=-1)
+            perplexity = torch.exp(-torch.sum(probs*torch.log(probs+1e-8))).item()
+        top_k_ids = torch.topk(move_logits, self.top_k, dim=-1).indices
+        top_k_mask = torch.ones_like(move_logits) * (-torch.inf)
+        top_k_mask[top_k_ids] = 0
+        move_logits = move_logits + top_k_mask
+        move_logits = move_logits / self.temperature
+        # Sample
+        dist = Categorical(logits=move_logits)
+        move_id = dist.sample().item()
+        move = IDX_TO_UCI_MOVE[move_id]
+        if return_perplexity:
+            return move, value, perplexity
+        else:
+            return move, value
+    def _search_enhanced_move(self, board: chess.Board, return_perplexity: bool=False, verbose: bool=False) -> Tuple[str,float]:
+        """Get move from chessformer using tactical search"""
+        # Step 1: Build search tree level by level
+        current_boards = [board] # aggregate board to a list for batch inference
+        board_probs = [1] # the probabilities of getting to this position (estimated)
+        terminal_leaves = [] # (root_move, prob, game_result_value) ^from white's perspective
+        search_leaves = [] # (root_move, prob, board) - leaves not terminal but reached max depth therefore needs evaluation from model
+        # Track which root_move each board came from
+        board_to_root_move = [None] # root board has no parent move
+        for depth in range(self.depth+1):
+            if not current_boards:
+                break
+            k = max(1,int(self.initial_k*(self.decay_rate**depth)))
+            fens = [b.fen() for b in current_boards]
+            reps = self.compute_repetition(current_boards)
+            with torch.no_grad():
+                logits, values = self.model(fens,reps)
+            next_boards = []
+            next_board_probs = []
+            next_board_to_root_move = []
+            # Process each board at current depth
+            for board_idx, current_board in enumerate(current_boards):
+                board_logits = logits[board_idx]
+                board_prob = board_probs[board_idx]
+                parent_root_move = board_to_root_move[board_idx]
+                # Check if game is over
+                if current_board.is_game_over(claim_draw=True):
+                    outcome = current_board.outcome(claim_draw=True)
+                    if outcome.winner == chess.WHITE:
+                        game_value = 1.0
+                    elif outcome.winner == chess.BLACK:
+                        game_value = -1.0
+                    else:
+                        game_value = 0.0
+                    terminal_leaves.append((parent_root_move, board_prob, game_value))
+                    continue
+                # If we've reached max depth, add to search leaves
+                if depth == self.depth:
+                    search_leaves.append((parent_root_move, board_prob, current_board))
+                    continue
+                # Otherwise, recursively search deeper
+                invalid_mask = self.get_invalid_mask([current_board])[0]
+                masked_logits = board_logits + invalid_mask
+                top_k_values, top_k_indices = torch.topk(masked_logits,k=min(k,torch.sum(invalid_mask==0).item()))
+                top_k_probs = torch.softmax(top_k_values,dim=0)
+                if depth==0:
+                    initial_masked_logits = masked_logits.squeeze(0)
+                    initial_invalid_mask = invalid_mask.squeeze(0)
+                    initial_top_k_indices = top_k_indices
+                # Expand each top k move
+                for i,move_idx in enumerate(top_k_indices):
+                    move_prob = top_k_probs[i].item()
+                    move_uci = IDX_TO_UCI_MOVE[move_idx.item()]
+                    root_move = parent_root_move if parent_root_move is not None else move_uci
+                    new_board = current_board.copy()
+                    if move_uci == "<claim_draw>":
+                        if new_board.can_claim_draw():
+                            terminal_leaves.append((root_move,board_prob*move_prob,0.0))
+                            continue
+                        else:
+                            continue # should not happen, invalid draw claim
+                    else:
+                        move = chess.Move.from_uci(move_uci)
+                        new_board.push(move)
+                    next_boards.append(new_board)
+                    next_board_probs.append(board_prob*move_prob)
+                    next_board_to_root_move.append(root_move)
+            current_boards = next_boards
+            board_probs = next_board_probs
+            board_to_root_move = next_board_to_root_move
+        # Step 2: Evaluate all search leaves
+        if search_leaves:
+            search_boards = [leaf[2] for leaf in search_leaves]
+            search_fens = [b.fen() for b in search_boards]
+            search_reps = self.compute_repetition(search_boards)
+            with torch.no_grad():
+                _, search_values = self.model(search_fens, search_reps)
+            for i, (root_move, prob, leaf_board) in enumerate(search_leaves):
+                value = search_values[i].item()
+                white_perspective_value = value if leaf_board.turn == chess.WHITE else -value
+                terminal_leaves.append((root_move,prob,white_perspective_value))
+        # Step 3: Aggregate all leaves using probability weights
+        root_move_weighted_values = {}
+        root_move_total_probs = {}
+        for root_move, prob, value in terminal_leaves:
+            if root_move not in root_move_weighted_values:
+                root_move_weighted_values[root_move] = 0.0
+                root_move_total_probs[root_move] = 0.0
+            root_move_weighted_values[root_move] += prob * value
+            root_move_total_probs[root_move] += prob
+        final_value = sum(root_move_weighted_values.values())
+        final_value = final_value if board.turn == chess.WHITE else -final_value
+        root_move_values = {}
+        for root_move in root_move_total_probs:
+            if root_move_total_probs[root_move] > 0:
+                root_move_values[root_move] = root_move_weighted_values[root_move] / root_move_total_probs[root_move]
+            else:
+                root_move_values[root_move] = 0
+        # Step 4: Confidence-based weighting with search results
+        initial_probs = torch.softmax(initial_masked_logits,dim=0)
+        entropy = -torch.sum(initial_probs*torch.log(initial_probs+1e-8))
+        max_entropy = math.log(torch.sum(initial_invalid_mask==0).item())
+        confidence = 1.0 - (entropy/max_entropy) if max_entropy > 0 else 1.0
+        if root_move_values:
+            search_adjustment_logits = torch.zeros_like(initial_masked_logits)
+            for move_uci,search_value in root_move_values.items():
+                move_idx = UCI_MOVE_TO_IDX[move_uci]
+                search_adjustment_logits[move_idx] += search_value
+            # flip value according to perpective
+            search_adjustment_logits = search_adjustment_logits if board.turn==chess.WHITE else -search_adjustment_logits
+            search_adjustment_logits = search_adjustment_logits - search_adjustment_logits.mean()
+            # Normalize search logits to be in the same norm as the initial logits
+            initial_valid_norm = torch.norm(initial_masked_logits[initial_top_k_indices]) + 1e-8
+            search_valid_norm = torch.norm(search_adjustment_logits[initial_top_k_indices]) + 1e-8
+            normalized_search = search_adjustment_logits * initial_valid_norm / search_valid_norm
+            normalized_initial = initial_masked_logits
+            adjusted_logits = confidence * normalized_initial + (1 - confidence) * normalized_search
+        else:
+            adjusted_logits = initial_masked_logits
+        # Apply temperature and top-k filtering
+        top_k_mask = torch.full_like(adjusted_logits, -torch.inf)
+        top_k_mask[initial_top_k_indices] = 0
+        adjusted_logits = adjusted_logits + top_k_mask
+        adjusted_logits = adjusted_logits / self.temperature
+        dist = Categorical(logits=adjusted_logits)
+        move_idx = dist.sample().item()
+        move_uci = IDX_TO_UCI_MOVE[move_idx]
+        if return_perplexity:
+            final_probs = torch.softmax(adjusted_logits,dim=0)
+            perplexity = torch.exp(-torch.sum(final_probs * torch.log(final_probs + 1e-8))).item()
+            if verbose and self.depth > 0:
+                print(f"\n--- Search Enhanced Move Debug Info ({board.fen()}) ---")
+                print(f"Confidence: {confidence:.4f}")
+                print("\nMove Analysis (Initial Top-K Candidates):")
+                print(f"{'Move':<8} {'Initial Logit':<15} {'Search Adj. Logit':<19} {'Final Adj. Logit':<18} {'Final Prob':<12}")
+                print(f"{'-'*8:<8} {'-'*15:<15} {'-'*19:<19} {'-'*18:<18} {'-'*12:<12}")
+                for i, idx in enumerate(initial_top_k_indices):
+                    move_uci_v = IDX_TO_UCI_MOVE[idx.item()]
+                    initial_logit = normalized_initial[idx].item()
+                    search_adj_logit_val = normalized_search[idx].item() if root_move_values else 0.0
+                    final_adj_logit = adjusted_logits[idx].item()
+                    final_prob_val = final_probs[idx].item()
+                    print(f"{move_uci_v:<8} {initial_logit:<15.4f} {search_adj_logit_val:<19.4f} {final_adj_logit:<18.4f} {final_prob_val:<12.4f}")
+                print(f"\nPerplexity: {perplexity:.4f}")
+                print(f"Predicted Value (White's POV): {final_value:.4f}")
+                print("\nLeaf Node Values (Root Move, Probability, Value from White's POV):")
+                for rm, prob, val in terminal_leaves:
+                    print(f"  Root Move: {rm:<8}, Prob: {prob:<.4f}, Value: {val:<.4f}")
+                print("--------------------------------------------------")
+            return move_uci, final_value, perplexity
+        else:
+            return move_uci, final_value
+    def _chessformer_move(self, board: chess.Board, return_perplexity: bool=False, verbose: bool=False) -> Tuple[str,float]:
+        """Get move from chessformer with optional search enhance"""
+        if self.depth == 0:
+            return self._raw_chessformer_move(board,return_perplexity)
+        else:
+            return self._search_enhanced_move(board,return_perplexity,verbose)
+    def _stockfish_move(self, board: chess.Board, return_perplexity: bool=False) -> Tuple[str,float]:
+        """Get best move from stockfish"""
+        try:
+            engine = chess.engine.SimpleEngine.popen_uci(self.engine_path)
+            info = engine.analyse(board, chess.engine.Limit(depth=self.depth))
+        except (chess.engine.EngineError, chess.engine.EngineTerminatedError) as e:
+            print(f"Stockfish error: {e}")
+            return None
+        loss_threshold = -0.4
+        score_obj = info.get("score")
+        can_claim_draw = board.can_claim_draw()
+        if score_obj is None or info.get("pv") is None or not info.get("pv"):
+            # Invalid analysis result
+            return None
+        pv = info["pv"]
+        pov_score = score_obj.pov(chess.WHITE)
+        cp = None
+        if pov_score.is_mate():
+            mate_score = pov_score.mate()
+            cp = 10000.0 if mate_score > 0 else -10000.0
+            relative_score = score_obj.relative
+            if relative_score.is_mate():
+                cp = 10000.0 if relative_score.mate() > 0 else -10000.0
+            else:
+                if relative_score.cp is not None:
+                    cp = float(relative_score.cp)
+                else:
+                    return None
+        elif pov_score.cp is not None:
+            relative_score = score_obj.relative
+            if relative_score.cp is not None:
+                cp = float(relative_score.cp)
+            else:
+                return None
+        else:
+            return None
+        if cp is not None:
+            normalized_score = 2 / (1+math.exp(-0.004*cp)) - 1
+        else:
+            return None
+        if can_claim_draw and normalized_score < loss_threshold:
+            best_move_uci = "<claim_draw>"
+        else:
+            best_move_uci = pv[0].uci()
+        if engine:
+            engine.quit()
+        if return_perplexity:
+            return best_move_uci, normalized_score, None
+        else:
+            return best_move_uci, normalized_score
+    def _batch_chessformer_move(self, boards: List[chess.Board]) -> List[Tuple[str, float]]:
+        """Get the next moves from Chessformer model using batch inference."""
+        bs = len(boards)
+        if bs > self.max_batch_size:
+            raise ValueError(f"num boards ({bs}) exceeded max batch size ({self.max_batch_size}).")
+        fens = [board.fen() for board in boards]
+        count_tensor = self.compute_repetition(boards) # shape (bs, 1)
+        count_tensor = count_tensor.to(self.device)
+        with torch.no_grad():
+            move_logits, values = self.model(fens, count_tensor)
+        invalid_mask = self.get_invalid_mask(boards)
+        # Apply mask
+        move_logits = move_logits + invalid_mask
+        all_masked = torch.all(torch.isinf(move_logits), dim=-1)
+        # Apply top-p filtering
+        if 0.0 < self.top_p < 1.0: # Apply only if top_p is strictly between 0 and 1
+            sorted_logits, sorted_indices = torch.sort(move_logits, descending=True, dim=-1)
+            cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
+            sorted_indices_to_remove = cumulative_probs > self.top_p
+            sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
+            sorted_indices_to_remove[..., 0] = 0
+            indices_to_remove = torch.zeros_like(move_logits, dtype=torch.bool).scatter_(
+                dim=-1, index=sorted_indices, src=sorted_indices_to_remove
+            )
+            move_logits[indices_to_remove] = -torch.inf
+        # Apply temperature
+        temp = self.temperature if self.temperature > 0 else 1.0
+        move_logits = move_logits / temp
+        # Sample moves
+        dist = Categorical(logits=move_logits)
+        try:
+            sampled_indices = dist.sample()
+        except RuntimeError as e:
+            print(f"Error sampling moves: {e}. Checking logit values...")
+            results = []
+            for i in range(bs):
+                print(f"Board {i} logits sum: {torch.logsumexp(move_logits[i], dim=-1)}")
+                results.append(None) # indicate failure
+            return results
+        results = []
+        for i in range(bs):
+            if all_masked[i]:
+                results.append(None) # Game already over
+                continue
+            move_id = sampled_indices[i].item()
+            move_uci = IDX_TO_UCI_MOVE.get(move_id)
+            value = values[i].item()
+            if move_uci is None:
+                print(f"Warning: Sampled move ID {move_id} not in IDX_TO_UCI_MOVE map")
+                results.append(None)
+                continue
+            results.append((move_uci, value))
+        return results
+    def _batch_stockfish_move(self, boards: List[chess.Board], allow_claim_draw: bool=False) -> List[Tuple[str, float]]:
+        """Get the next moves from Stockfish engine using multiprocessing"""
+        if allow_claim_draw:
+            """Use sequential processing to maintain board history"""
+            return [self._stockfish_move(board) for board in boards]
+        else:
+            """Use multiprocessing to speed up if no need to include claim draw logic"""
+            bs = len(boards)
+            num_workers = min(bs, max(1, os.cpu_count()//2 if os.cpu_count() else 1))
+            if bs < num_workers * 2:
+                num_workers = max(1, bs//2)
+                if bs == 1: num_workers = 1
+            board_fens = [board.fen() for board in boards]
+            worker_func = partial(_stockfish_worker,
+                                  engine_path=self.engine_path,
+                                  depth=self.depth)
+            results: List[Optional[Tuple[str,float]]] = [None] * bs
+            active_indices = [i for i,b in enumerate(boards) if not b.is_game_over(claim_draw=True)]
+            active_fens = [board_fens[i] for i in active_indices]
+            if not active_fens:
+                # All games are over
+                return results # list of None
+            try:
+                if num_workers > 1 and len(active_fens) > 1:
+                    with multiprocessing.Pool(processes=num_workers) as pool:
+                        worker_results = pool.map(worker_func, active_fens)
+                else:
+                    worker_results = [worker_func(fen) for fen in active_fens]
+                for i, res in enumerate(worker_results):
+                    original_index = active_indices[i]
+                    results[original_index] = res
+            except Exception as e:
+                print(f"Error during batch Stockfish move: {e}")
+            return results
+    def move(self, board: chess.Board, return_perplexity: bool=False) -> Tuple[str, float]:
+        if self.type == "stockfish":
+            return self._stockfish_move(board, return_perplexity)
+        elif self.type == "chessformer":
+            return self._chessformer_move(board, return_perplexity)
+        else:
+            raise ValueError(f"Invalid engine type: {self.type}")
+    def batch_move(self, boards: List[chess.Board]) -> List[Tuple[str, float]]:
+        if self.type == "stockfish":
+            return self._batch_stockfish_move(boards)
+        elif self.type == "chessformer":
+            return self._batch_chessformer_move(boards)
+        else:
+            raise ValueError(f"Invalid engine type: {self.type}")
+    def analyze_position(self, board: chess.Board) -> Optional[float]:
+        """
+        Analyzes the given **single board** position using the engine.
+        For Stockfish, returns list of centipawn scores from white's perspective;
+        For ChessFormer, returns list of models's value estimates
+        Returns None if analysis failed.
+        """
+        if self.type == "stockfish":
+            try:
+                engine = chess.engine.SimpleEngine.popen_uci(self.engine_path)
+                info = engine.analyse(board,chess.engine.Limit(depth=self.depth))
+                engine.quit()
+            except Exception as e:
+                print(f"Stockfish error: {e}")
+                return None
+            score_obj = info.get("score")
+            pov_score = score_obj.pov(chess.WHITE)
+            cp = None
+            if pov_score.is_mate():
+                mate_score = pov_score.mate()
+                cp = 10000.0 if mate_score > 0 else -10000.0
+                relative_score = score_obj.relative
+                if relative_score.is_mate():
+                    cp = 10000.0 if relative_score.mate() > 0 else -10000.0
+                else:
+                    if relative_score.cp is not None:
+                        cp = float(relative_score.cp)
+                    else:
+                        return None
+            elif pov_score.cp is not None:
+                relative_score = score_obj.relative
+                if relative_score.cp is not None:
+                    cp = float(relative_score.cp)
+                else:
+                    return None
+            else:
+                return None
+            if cp is not None:
+                normalized_score = 2 / (1+math.exp(-0.004*cp)) - 1
+                return normalized_score if board.turn == chess.WHITE else -normalized_score
+            else:
+                return None
+        elif self.type == "chessformer":
+            fen = board.fen()
+            count_tensor = self.compute_repetition([board.copy(stack=True)])
+            with torch.no_grad():
+                _, value = self.model([fen],count_tensor)
+            value = value.item()
+            return value if board.turn == chess.WHITE else -value
+        else:
+            raise ValueError(f"Invalid engine type.")
+def test_search_enhanced_move(model_path,device):
+    """Test the search-enhanced move functionality"""
+    print("\n--- Testing Search-Enhanced ChessFormer ---")
+    import sys
+    sys.path.append("./")
+    try:
+        from model import ChessFormerModel
+    except ImportError:
+        from model import ChessFormerModel
+    # Load the trained model
+    checkpoint = torch.load(model_path,map_location=device)
+    config = checkpoint["config"]
+    model = ChessFormerModel(**config)
+    model.load_state_dict(checkpoint["model_state_dict"])
+    model.to(device)
+    # Test different search configurations
+    test_configs = [
+        #{"depth": 0, "top_k": 8, "decay_rate": 0.6, "temperature": 0.2},  # No search (baseline)
+        #{"depth": 1, "top_k": 8, "decay_rate": 0.6, "temperature": 0.2},  # Shallow search
+        {"depth": 8, "top_k": 8, "decay_rate": 0.5, "temperature": 0.5},  # Medium search
+    ]
+    # Test positions
+    test_positions = [
+        #chess.Board(),  # Starting position
+        #chess.Board("r1bqkb1r/pppp1ppp/2n2n2/4p3/2B1P3/5N2/PPPP1PPP/RNBQK2R w KQkq - 4 4"),  # Italian Game
+        #chess.Board("rnbqkbnr/pp1ppppp/8/2p5/4P3/8/PPPP1PPP/RNBQKBNR w KQkq c6 0 2"),  # Sicilian Defense
+        #chess.Board("r1bq1rk1/ppp2ppp/2n2n2/2bpp3/2B1P3/3P1N2/PPP2PPP/RNBQ1RK1 w - - 0 6"),  # Complex middlegame
+        chess.Board("r1b1k2r/1p2bpp1/2p1p1np/2N1P3/1q1P4/5N2/B1Q2PPP/R3R1K1 w kq - 0 19"), # blunder: c2e4
+        chess.Board("rn1qk2r/1b2bpp1/1pp1pn1p/p7/3P4/2PB1N2/PP1NQPPP/R1B1R1K1 w kq - 2 12"), # blunder: e2e6
+    ]
+    for i, cfg in enumerate(test_configs):
+        print(f"\n--- Test Configuration {i+1}: Depth={cfg['depth']}, Top-K={cfg['top_k']}, Decay={cfg['decay_rate']}, Temp={cfg['temperature']} ---")
+        chessformer_config = ChessformerConfig(
+            chessformer=model,
+            device=device,
+            temperature=cfg['temperature'],
+            depth=cfg['depth'],
+            top_k=cfg['top_k'],
+            decay_rate=cfg['decay_rate']
+        )
+        engine = Engine(type="chessformer", chessformer_config=chessformer_config)
+        for j, board in enumerate(test_positions):
+            print(f"\n--- Analyzing Position {j+1}: {board.fen()} ---")
+            try:
+                move, value, perplexity = engine._chessformer_move(board, return_perplexity=True, verbose=True)
+                print(f"Selected Move: {move}, Predicted Value (White's POV): {value:.4f}, Perplexity: {perplexity:.4f}")
+            except Exception as e:
+                print(f"Error analyzing position {board.fen()}: {e}")
+                import traceback
+                traceback.print_exc()
+if __name__ == "__main__":
+    model_path = "./ckpts/chessformer-sl_01.pth"
+    device = torch.device("cpu")
+    test_search_enhanced_move(model_path,device)

utils/mapping.py ADDED Viewed

	@@ -0,0 +1,141 @@

+from typing import List, Dict, Tuple, Set
+# --- Constants --- #
+MAX_HALFMOVES = 128 # cap for embedding table size
+MAX_FULLMOVES = 256 # cap for embedding table size
+# --- Helper Mappings --- #
+PIECE_TO_IDX: Dict[str, int] = {
+    'P': 0, 'N': 1, 'B': 2, 'R': 3, 'Q': 4, 'K': 5,
+    'p': 6, 'n': 7, 'b': 8, 'r': 9, 'q': 10, 'k': 11,
+    '.': 12
+}
+IDX_TO_PIECE: Dict[int, str] = {v: k for k, v in PIECE_TO_IDX.items()}
+EMPTY_SQ_IDX = PIECE_TO_IDX['.']
+# Map algebraic square notation (e.g., 'a1', 'h8') to 0-63 index
+# a1=0, b1=1, ..., h1=7, a2=8, ..., h8=63
+SQUARE_TO_IDX: Dict[str, int] = {
+    f"{file}{rank}": (rank - 1) * 8 + (ord(file) - ord('a'))
+    for rank in range(1, 9)
+    for file in 'abcdefgh'
+}
+IDX_TO_SQUARE: Dict[int, str] = {v: k for k, v in SQUARE_TO_IDX.items()}
+# --- Coordinate and Notation Helpers ---
+# Precompute maps for efficiency
+_IDX_TO_COORDS: Dict[int, Tuple[int, int]] = {i: (i // 8, i % 8) for i in range(64)} # (rank, file) 0-7
+_COORDS_TO_IDX: Dict[Tuple[int, int], int] = {v: k for k, v in _IDX_TO_COORDS.items()}
+_IDX_TO_ALG: Dict[int, str] = {
+    i: f"{chr(ord('a') + file)}{rank + 1}"
+    for i, (rank, file) in _IDX_TO_COORDS.items()
+}
+_ALG_TO_IDX: Dict[str, int] = {v: k for k, v in _IDX_TO_ALG.items()}
+def _coords_to_alg(r: int, f: int) -> str:
+    """Converts 0-indexed (rank, file) to algebraic notation."""
+    if 0 <= r < 8 and 0 <= f < 8:
+        return f"{chr(ord('a') + f)}{r + 1}"
+    # This should not happen with valid indices, but good for safety
+    raise ValueError(f"Invalid coordinates: ({r}, {f})")
+def generate_structurally_valid_move_map() -> Dict[str, int]:
+    """
+    Generates a dictionary mapping chess moves that are geometrically possible
+    by *some* standard piece (K, Q, R, B, N, or P) to unique integer indices.
+    It excludes moves that are structurally impossible for any piece to make
+    in one turn (e.g., a1->h5 for non-knight).
+    Includes standard UCI promotions (e.g., "e7e8q"), replacing the
+    corresponding simple pawn move to the final rank (e.g., "e7e8").
+    This is based purely on piece movement geometry, not the current board state.
+    Returns:
+        Dict[str, int]: A map from the valid UCI move string to a unique
+                        integer index (0 to N-1). The size N is expected
+                        to be around 1800-1900.
+    """
+    valid_moves: Set[str] = set()
+    # Keep track of base moves (like 'e7e8') that are replaced by promotions
+    # according to UCI standard.
+    promo_base_moves_to_exclude: Set[str] = set()
+    # 1. Generate all geometrically possible non-promotion moves
+    for from_idx in range(64):
+        from_r, from_f = _IDX_TO_COORDS[from_idx]
+        from_alg = _IDX_TO_ALG[from_idx]
+        for to_idx in range(64):
+            if from_idx == to_idx:
+                continue
+            to_r, to_f = _IDX_TO_COORDS[to_idx]
+            to_alg = _IDX_TO_ALG[to_idx]
+            dr, df = to_r - from_r, to_f - from_f
+            abs_dr, abs_df = abs(dr), abs(df)
+            # Check if the geometry matches any standard piece movement
+            # Note: Queen moves are covered by Rook + Bishop checks.
+            # Note: Pawn single pushes/captures are covered by King/Rook/Bishop geometry.
+            # Note: Pawn double pushes are covered by Rook geometry.
+            is_king_move = max(abs_dr, abs_df) == 1
+            is_knight_move = (abs_dr == 2 and abs_df == 1) or (abs_dr == 1 and abs_df == 2)
+            is_rook_move = dr == 0 or df == 0 # Includes King horiz/vert & pawn double push
+            is_bishop_move = abs_dr == abs_df # Includes King diagonal & pawn capture/push
+            if is_king_move or is_knight_move or is_rook_move or is_bishop_move:
+                 uci_move = f"{from_alg}{to_alg}"
+                 valid_moves.add(uci_move)
+    # 2. Generate promotion moves explicitly and mark base moves for exclusion
+    promo_pieces = ['q', 'r', 'b', 'n']
+    for from_f in range(8):
+        # White promotions (from rank 7 (idx 6) to rank 8 (idx 7))
+        from_r_w, to_r_w = 6, 7
+        if from_r_w != 7: # Ensure we are on the correct rank before promotion
+            from_alg_w = _coords_to_alg(from_r_w, from_f)
+            # Possible destinations: push (df=0), capture left (df=-1), capture right (df=1)
+            for df in [-1, 0, 1]:
+                to_f_w = from_f + df
+                if 0 <= to_f_w < 8:
+                    to_alg_w = _coords_to_alg(to_r_w, to_f_w)
+                    base_move = f"{from_alg_w}{to_alg_w}"
+                    #promo_base_moves_to_exclude.add(base_move) # Mark e.g. "e7e8" for exclusion
+                    for p in promo_pieces:
+                        valid_moves.add(f"{base_move}{p}") # Add e.g. "e7e8q"
+        # Black promotions (from rank 2 (idx 1) to rank 1 (idx 0))
+        from_r_b, to_r_b = 1, 0
+        if from_r_b != 0: # Ensure we are on the correct rank before promotion
+            from_alg_b = _coords_to_alg(from_r_b, from_f)
+            # Possible destinations: push (df=0), capture left (df=-1), capture right (df=1)
+            for df in [-1, 0, 1]:
+                to_f_b = from_f + df
+                if 0 <= to_f_b < 8:
+                    to_alg_b = _coords_to_alg(to_r_b, to_f_b)
+                    base_move = f"{from_alg_b}{to_alg_b}"
+                    #promo_base_moves_to_exclude.add(base_move) # Mark e.g. "e2e1" for exclusion
+                    for p in promo_pieces:
+                        valid_moves.add(f"{base_move}{p}") # Add e.g. "e2e1q"
+    # 3. Remove the base moves that were replaced by promotions
+    final_valid_moves = valid_moves - promo_base_moves_to_exclude
+    # 4. Add draw claim
+    final_valid_moves.add("<claim_draw>")
+    # 5. Create the final map with sorted keys for deterministic indices
+    sorted_moves = sorted(list(final_valid_moves))
+    move_map = {move: i for i, move in enumerate(sorted_moves)}
+    # Optional: Print the number of moves found for verification
+    # print(f"Generated {len(move_map)} structurally valid unique UCI moves.")
+    return move_map
+UCI_MOVE_TO_IDX = generate_structurally_valid_move_map()
+IDX_TO_UCI_MOVE = {v:k for k,v in UCI_MOVE_TO_IDX.items()}