MaximeMuhlethaler
/

chess-llm-MaximeMuh3

Safetensors

chess_transformer

Model card Files Files and versions

xet

Community

MaximeMuhlethaler commited on Jan 13

Commit

8cee25e

verified ·

1 Parent(s): 1ab595d

Upload utils.py with huggingface_hub

Browse files

Files changed (1) hide show

utils.py +305 -0

utils.py ADDED Viewed

	@@ -0,0 +1,305 @@

+"""
+Utility functions for the Chess Challenge.
+This module provides helper functions for:
+- Parameter counting and budget analysis
+- Model registration with Hugging Face
+- Move validation with python-chess
+"""
+from __future__ import annotations
+from typing import Dict, Optional, TYPE_CHECKING
+import torch.nn as nn
+if TYPE_CHECKING:
+    from src.model import ChessConfig
+def count_parameters(model: nn.Module, trainable_only: bool = True) -> int:
+    """
+    Count the number of parameters in a model.
+    Args:
+        model: The PyTorch model.
+        trainable_only: If True, only count trainable parameters.
+    Returns:
+        Total number of parameters.
+    """
+    if trainable_only:
+        return sum(p.numel() for p in model.parameters() if p.requires_grad)
+    return sum(p.numel() for p in model.parameters())
+def count_parameters_by_component(model: nn.Module) -> Dict[str, int]:
+    """
+    Count parameters broken down by model component.
+    Args:
+        model: The PyTorch model.
+    Returns:
+        Dictionary mapping component names to parameter counts.
+    """
+    counts = {}
+    for name, module in model.named_modules():
+        if len(list(module.children())) == 0:  # Leaf module
+            param_count = sum(p.numel() for p in module.parameters(recurse=False))
+            if param_count > 0:
+                counts[name] = param_count
+    return counts
+def estimate_parameters(config: "ChessConfig") -> Dict[str, int]:
+    """
+    Estimate the parameter count for a given configuration.
+    This is useful for planning your architecture before building the model.
+    Args:
+        config: Model configuration.
+    Returns:
+        Dictionary with estimated parameter counts by component.
+    """
+    V = config.vocab_size
+    d = config.n_embd
+    L = config.n_layer
+    n_ctx = config.n_ctx
+    n_inner = config.n_inner
+    estimates = {
+        "token_embeddings": V * d,
+        "position_embeddings": n_ctx * d,
+        "attention_qkv_per_layer": 3 * d * d,
+        "attention_proj_per_layer": d * d,
+        "ffn_per_layer": 2 * d * n_inner,
+        "layernorm_per_layer": 4 * d,  # 2 LayerNorms, each with weight and bias
+        "final_layernorm": 2 * d,
+    }
+    # Calculate totals
+    per_layer = (
+        estimates["attention_qkv_per_layer"] +
+        estimates["attention_proj_per_layer"] +
+        estimates["ffn_per_layer"] +
+        estimates["layernorm_per_layer"]
+    )
+    estimates["total_transformer_layers"] = L * per_layer
+    # LM head (tied with embeddings by default)
+    if config.tie_weights:
+        estimates["lm_head"] = 0
+        estimates["lm_head_note"] = "Tied with token embeddings"
+    else:
+        estimates["lm_head"] = V * d
+    # Grand total
+    estimates["total"] = (
+        estimates["token_embeddings"] +
+        estimates["position_embeddings"] +
+        estimates["total_transformer_layers"] +
+        estimates["final_layernorm"] +
+        estimates["lm_head"]
+    )
+    return estimates
+def print_parameter_budget(config: "ChessConfig", limit: int = 1_000_000) -> None:
+    """
+    Print a formatted parameter budget analysis.
+    Args:
+        config: Model configuration.
+        limit: Parameter limit to compare against.
+    """
+    estimates = estimate_parameters(config)
+    print("=" * 60)
+    print("PARAMETER BUDGET ANALYSIS")
+    print("=" * 60)
+    print(f"\nConfiguration:")
+    print(f"  vocab_size (V) = {config.vocab_size}")
+    print(f"  n_embd (d)     = {config.n_embd}")
+    print(f"  n_layer (L)    = {config.n_layer}")
+    print(f"  n_head         = {config.n_head}")
+    print(f"  n_ctx          = {config.n_ctx}")
+    print(f"  n_inner        = {config.n_inner}")
+    print(f"  tie_weights    = {config.tie_weights}")
+    print(f"\nParameter Breakdown:")
+    print(f"  Token Embeddings:    {estimates['token_embeddings']:>10,}")
+    print(f"  Position Embeddings: {estimates['position_embeddings']:>10,}")
+    print(f"  Transformer Layers:  {estimates['total_transformer_layers']:>10,}")
+    print(f"  Final LayerNorm:     {estimates['final_layernorm']:>10,}")
+    if config.tie_weights:
+        print(f"  LM Head:             {'(tied)':>10}")
+    else:
+        print(f"  LM Head:             {estimates['lm_head']:>10,}")
+    print(f"  " + "-" * 30)
+    print(f"  TOTAL:               {estimates['total']:>10,}")
+    print(f"\nBudget Status:")
+    print(f"  Limit:    {limit:>10,}")
+    print(f"  Used:     {estimates['total']:>10,}")
+    print(f"  Remaining:{limit - estimates['total']:>10,}")
+    if estimates['total'] <= limit:
+        print(f"\n Within budget! ({estimates['total'] / limit * 100:.1f}% used)")
+    else:
+        print(f"\n OVER BUDGET by {estimates['total'] - limit:,} parameters!")
+    print("=" * 60)
+def validate_move_with_chess(move: str, board_fen: Optional[str] = None) -> bool:
+    """
+    Validate a move using python-chess.
+    This function converts the dataset's extended UCI format to standard UCI
+    and validates it against the current board state.
+    Args:
+        move: Move in extended UCI format (e.g., "WPe2e4", "BNg8f6(x)").
+        board_fen: FEN string of the current board state (optional).
+    Returns:
+        True if the move is legal, False otherwise.
+    """
+    try:
+        import chess
+    except ImportError:
+        raise ImportError("python-chess is required for move validation. "
+                         "Install it with: pip install python-chess")
+    # Parse the extended UCI format
+    # Format: [W|B][Piece][from_sq][to_sq][suffix]
+    # Example: WPe2e4, BNg8f6(x), WKe1g1(o)
+    if len(move) < 6:
+        return False
+    # Extract components
+    color = move[0]  # W or B
+    piece = move[1]  # P, N, B, R, Q, K
+    from_sq = move[2:4]  # e.g., "e2"
+    to_sq = move[4:6]  # e.g., "e4"
+    # Check for promotion
+    promotion = None
+    if "=" in move:
+        promo_idx = move.index("=")
+        promotion = move[promo_idx + 1].lower()
+    # Create board
+    board = chess.Board(board_fen) if board_fen else chess.Board()
+    # Build UCI move string
+    uci_move = from_sq + to_sq
+    if promotion:
+        uci_move += promotion
+    try:
+        move_obj = chess.Move.from_uci(uci_move)
+        return move_obj in board.legal_moves
+    except (ValueError, chess.InvalidMoveError):
+        return False
+def convert_extended_uci_to_uci(move: str) -> str:
+    """
+    Convert extended UCI format to standard UCI format.
+    Args:
+        move: Move in extended UCI format (e.g., "WPe2e4").
+    Returns:
+        Move in standard UCI format (e.g., "e2e4").
+    """
+    if len(move) < 6:
+        return move
+    # Extract squares
+    from_sq = move[2:4]
+    to_sq = move[4:6]
+    # Check for promotion
+    promotion = ""
+    if "=" in move:
+        promo_idx = move.index("=")
+        promotion = move[promo_idx + 1].lower()
+    return from_sq + to_sq + promotion
+def convert_uci_to_extended(
+    uci_move: str,
+    board_fen: str,
+) -> str:
+    """
+    Convert standard UCI format to extended UCI format.
+    Args:
+        uci_move: Move in standard UCI format (e.g., "e2e4").
+        board_fen: FEN string of the current board state.
+    Returns:
+        Move in extended UCI format (e.g., "WPe2e4").
+    """
+    try:
+        import chess
+    except ImportError:
+        raise ImportError("python-chess is required for move conversion.")
+    board = chess.Board(board_fen)
+    move = chess.Move.from_uci(uci_move)
+    # Get color
+    color = "W" if board.turn == chess.WHITE else "B"
+    # Get piece
+    piece = board.piece_at(move.from_square)
+    piece_letter = piece.symbol().upper() if piece else "P"
+    # Build extended UCI
+    from_sq = chess.square_name(move.from_square)
+    to_sq = chess.square_name(move.to_square)
+    result = f"{color}{piece_letter}{from_sq}{to_sq}"
+    # Add promotion
+    if move.promotion:
+        result += f"={chess.piece_symbol(move.promotion).upper()}"
+    # Add suffix for captures
+    if board.is_capture(move):
+        result += "(x)"
+    # Add suffix for check/checkmate
+    board.push(move)
+    if board.is_checkmate():
+        if "(x)" in result:
+            result = result.replace("(x)", "(x+*)")
+        else:
+            result += "(+*)"
+    elif board.is_check():
+        if "(x)" in result:
+            result = result.replace("(x)", "(x+)")
+        else:
+            result += "(+)"
+    board.pop()
+    # Handle castling notation
+    if board.is_castling(move):
+        if move.to_square in [chess.G1, chess.G8]:  # Kingside
+            result = result.replace("(x)", "").replace("(+)", "") + "(o)"
+        else:  # Queenside
+            result = result.replace("(x)", "").replace("(+)", "") + "(O)"
+    return result