Upload 6 files

Browse files

Files changed (6) hide show

64L1024D_1e-3maxlr_470k_step_1ep_1480ELO.pth +3 -0
autoplay_muliproc.py +151 -0
chesstransformer.py +251 -0
environment.yml +66 -0
play.py +72 -0
tokenizer.py +163 -0

64L1024D_1e-3maxlr_470k_step_1ep_1480ELO.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:82fb0554f04255f854344432380ba0719af4e14c631ff8a0c9905a8e99cfbaf2
+size 9746197380

autoplay_muliproc.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import torch
+import chess
+import chess.engine
+import logging
+import math
+import argparse
+import multiprocessing as mp
+from chesstransformer import ChessTransformer
+import tokenizer as tk
+from tqdm import tqdm
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(processName)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+parser = argparse.ArgumentParser(description='Chess Transformer Testing')
+parser.add_argument('--cores', type=int, default=2, help='Cores to use for CPU chess engine')
+parser.add_argument('--games', type=int, default=10, help='Number of games to play')
+parser.add_argument('--stockfish_elo', type=int, default=1320, help='ELO rating for Stockfish. Min 1320')
+parser.add_argument('--stockfish_path', type=str, default='./stockfish/stockfish-ubuntu-x86-64', help='Path to Stockfish binary')
+args = parser.parse_args()
+def setup_model():
+    logger.info("Loading ChessTransformer model...")
+    model = ChessTransformer()
+    model.load_state_dict(torch.load('./64L1024D_1e-3maxlr_470k_step_1ep_1480ELO.pth')["model_state_dict"])
+    model.eval().cuda()
+    logger.info("Model loaded successfully.")
+    return model
+def predict_top_k_moves(model, tokenizer, game_sequence, k=100, device='cuda'):
+    game_sequence = torch.tensor([tokenizer.tokenize_game(game_sequence)], dtype=torch.long).to(device)
+    with torch.no_grad():
+        output = model(game_sequence)
+        next_move = output[0, -1, :]
+        next_softmax = torch.nn.functional.softmax(next_move, dim=-1)
+        top_k_probs, top_k_indices = torch.topk(next_softmax, k)
+        top_k_moves = [tokenizer.get_move(idx.item()) for idx in top_k_indices]
+    return list(zip(top_k_moves, top_k_probs.tolist()))
+def get_legal_move(board, moves):
+    for move, prob in moves:
+        try:
+            if chess.Move.from_uci(move) in board.legal_moves:
+                return move, prob
+        except ValueError:
+            continue
+    return None, None
+def play_game(model, tokenizer, stockfish_path, stockfish_elo, model_is_white, game_number):
+    #logger.info(f"Game {game_number}: Starting. Model playing as {'white' if model_is_white else 'black'}")
+    engine = chess.engine.SimpleEngine.popen_uci(stockfish_path)
+    engine.configure({"UCI_LimitStrength": True, "UCI_Elo": stockfish_elo})
+    board = chess.Board()
+    game_sequence = ['start']
+    move_count = 0
+    while not board.is_game_over():
+        move_count += 1
+        if (board.turn == chess.WHITE) == model_is_white:
+            top_k_moves = predict_top_k_moves(model, tokenizer, game_sequence)
+            legal_move, prob = get_legal_move(board, top_k_moves)
+            if legal_move is None:
+                logger.warning(f"Game {game_number}: No legal moves found in top-k on move {move_count}. Game over.")
+                return "0-1" if model_is_white else "1-0", move_count
+            board.push_uci(legal_move)
+            game_sequence.append(legal_move)
+            logger.debug(f"Game {game_number}: Model's move: {legal_move} (probability: {prob:.4f})")
+        else:
+            result = engine.play(board, chess.engine.Limit(time=0.1))
+            board.push(result.move)
+            game_sequence.append(result.move.uci())
+            logger.debug(f"Game {game_number}: Stockfish's move: {result.move.uci()}")
+    engine.quit()
+    result = board.result()
+    #logger.info(f"Game {game_number}: Finished. Result: {result}. Total moves: {move_count}")
+    return result, move_count
+def worker(args):
+    model, tokenizer, stockfish_path, stockfish_elo, game_number = args
+    model_is_white = game_number % 2 == 0
+    result, move_count = play_game(model, tokenizer, stockfish_path, stockfish_elo, model_is_white, game_number)
+    return result, game_number, move_count
+def calculate_elo_from_win_rate(win_rate, opponent_elo):
+    """Calculate ELO based on win rate against an opponent."""
+    if win_rate == 0:
+        return float('-inf')
+    if win_rate == 1:
+        return float('inf')
+    elo_diff = -400 * math.log10(1 / win_rate - 1)
+    return opponent_elo + elo_diff
+def main():
+    mp.set_start_method('spawn')  # Set start method to 'spawn' for CUDA support
+    num_games = args.games
+    stockfish_elo = args.stockfish_elo
+    stockfish_path = args.stockfish_path
+    logger.info(f"Starting tournament: {num_games} games, Stockfish ELO: {stockfish_elo}")
+    model = setup_model()
+    tokenizer = tk.Tokenizer()
+    num_processes = args.cores
+    logger.info(f"Using {num_processes} CPU cores for parallel processing")
+    tasks = [(model, tokenizer, stockfish_path, stockfish_elo, i) for i in range(num_games)]
+    results = []
+    with mp.Pool(processes=num_processes) as pool:
+        with tqdm(total=num_games, desc="Games Progress") as pbar:
+            for result in pool.imap_unordered(worker, tasks):
+                results.append(result)
+                pbar.update()
+    # Process results
+    wins = draws = losses = 0
+    total_moves = 0
+    for result, game_number, move_count in results:
+        if result == "1-0" and game_number % 2 == 0:
+            wins += 1
+        elif result == "0-1" and game_number % 2 == 1:
+            wins += 1
+        elif result == "1/2-1/2":
+            draws += 1
+        else:
+            losses += 1
+        total_moves += move_count
+    win_rate = (wins + 0.5 * draws) / num_games
+    final_model_elo = calculate_elo_from_win_rate(win_rate, stockfish_elo)
+    elo_change = final_model_elo - stockfish_elo
+    logger.info("Tournament completed. Final results:")
+    logger.info(f"Total games: {num_games}")
+    logger.info(f"Wins: {wins}, Losses: {losses}, Draws: {draws}")
+    logger.info(f"Win rate: {win_rate:.2%}")
+    logger.info(f"Average moves per game: {total_moves/num_games:.2f}")
+    logger.info(f"Stockfish ELO: {stockfish_elo}")
+    logger.info(f"Final Model ELO: {final_model_elo:.2f}")
+    logger.info(f"ELO Change: {elo_change:+.2f}")
+if __name__ == "__main__":
+    main()

chesstransformer.py ADDED Viewed

	@@ -0,0 +1,251 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+class PositionalEncoding(nn.Module):
+    def __init__(self, d_model, max_len=5000):
+        super(PositionalEncoding, self).__init__()
+        pe = torch.zeros(max_len, d_model)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0).transpose(0, 1)
+        self.register_buffer('pe', pe)
+    def forward(self, x):
+        x = x + self.pe[:x.size(0), :]
+        return x
+class StochasticDepth(nn.Module):
+    def __init__(self, p=0.8):
+        super().__init__()
+        self.p = p
+    def forward(self, x, residual):
+        if self.training:
+            if torch.rand(1).item() < self.p:
+                return x + residual
+            else:
+                return x
+        else:
+            return x + self.p * residual
+class AdvancedTransformerLayer(nn.Module):
+    def __init__(self, d_model, nhead, dropout=0.1, stoch_depth_p=0.8):
+        super().__init__()
+        dim_feedforward = 4 * d_model
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.ff = nn.Sequential(
+            nn.Linear(d_model, dim_feedforward),
+            nn.ReLU(),
+            nn.Linear(dim_feedforward, d_model)
+        )
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.stoch_depth = StochasticDepth(stoch_depth_p)
+    def forward(self, x, src_mask=None, src_key_padding_mask=None):
+        # x shape: (seq_len, batch_size, d_model)
+        norm_x = self.norm1(x)
+        # Convert boolean mask to float mask
+        if src_key_padding_mask is not None:
+            src_key_padding_mask = src_key_padding_mask.float().masked_fill(
+                src_key_padding_mask, float('-inf')).masked_fill(~src_key_padding_mask, float(0.0))
+        attn_output, _ = self.self_attn(norm_x, norm_x, norm_x,
+                                        attn_mask=src_mask,
+                                        key_padding_mask=src_key_padding_mask)
+        x = self.stoch_depth(x, self.dropout(attn_output))
+        norm_x = self.norm2(x)
+        ff_output = self.ff(norm_x)
+        x = self.stoch_depth(x, self.dropout(ff_output))
+        return x
+class ChessTransformer(nn.Module):
+    def __init__(self, num_layers=64, d_model=1024, nhead=8, dropout=0.1, stoch_depth_p=0.9, num_tokens=2066, pad_token_id=2064):
+        super().__init__()
+        self.embedding = nn.Embedding(num_tokens, d_model)
+        self.pos_encoder = PositionalEncoding(d_model)
+        self.layers = nn.ModuleList([
+            AdvancedTransformerLayer(d_model, nhead, dropout, stoch_depth_p)
+            for _ in range(num_layers)
+        ])
+        self.norm = nn.LayerNorm(d_model)
+        self.output = nn.Linear(d_model, num_tokens)
+        self.d_model = d_model
+        self.padding_idx = pad_token_id
+    def generate_square_subsequent_mask(self, sz):
+        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
+        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
+        return mask
+    def pad_sequences(self, sequences):
+        padding_value = self.padding_idx
+        max_len = max(len(seq) for seq in sequences)
+        padded_seqs = [seq + [padding_value] * (max_len - len(seq)) for seq in sequences]
+        return torch.LongTensor(padded_seqs)
+    def forward(self, x):
+        # x shape: (batch_size, seq_len)
+        batch_size, seq_len = x.size()
+        # Create padding mask
+        padding_mask = (x == self.padding_idx)
+        # Create causal mask
+        causal_mask = self.generate_square_subsequent_mask(seq_len).to(x.device)
+        # Embed and add positional encoding
+        x = self.embedding(x).transpose(0, 1) * math.sqrt(self.d_model)
+        x = self.pos_encoder(x)
+        # Pass through each layer
+        for layer in self.layers:
+            x = layer(x, src_mask=causal_mask, src_key_padding_mask=padding_mask)
+        x = self.norm(x)
+        output = self.output(x.transpose(0, 1))
+        return output
+def winning_moves_loss(output, ground_truth, win_labels, pad_token_id=2064, start_token_id=2065):
+    """
+    Compute the loss only for the winning moves of white and black.
+    """
+    output = output.cuda()
+    ground_truth = ground_truth.cuda()
+    win_labels = win_labels.cuda()
+    batch_size, seq_len, num_tokens = output.shape
+    # Shift the ground truth to align with the output predictions
+    ground_truth_shifted = ground_truth[:, 1:].contiguous()
+    output_shifted = output[:, :-1, :].contiguous()
+    # Flatten the output and ground truth for easier masking
+    output_flat = output_shifted.view(-1, num_tokens)
+    ground_truth_flat = ground_truth_shifted.view(-1)
+    # Apply log softmax to the flattened output
+    output_log_softmax = F.log_softmax(output_flat, dim=-1)
+    # Repeat win_labels for each move in the sequence
+    win_labels_expanded = win_labels.unsqueeze(1).repeat(1, seq_len - 1).view(-1)
+    # Create a mask for the winning moves
+    move_indices = torch.arange(seq_len - 1, device=output.device).unsqueeze(0).repeat(batch_size, 1).view(-1)
+    white_win_mask = (win_labels_expanded == 1) & (move_indices % 2 == 0)
+    black_win_mask = (win_labels_expanded == 0) & (move_indices % 2 == 1)
+    # Combine the masks
+    selected_moves_mask = (white_win_mask | black_win_mask) & (ground_truth_flat != pad_token_id) & (ground_truth_flat != start_token_id)
+    # Calculate the negative log-likelihood loss only for the selected moves
+    loss = F.nll_loss(output_log_softmax, ground_truth_flat, reduction='none')
+    loss = loss * selected_moves_mask.float()
+    # Average the loss over the selected moves
+    selected_moves_count = selected_moves_mask.float().sum()
+    if selected_moves_count > 0:
+        loss = loss.sum() / selected_moves_count
+    else:
+        loss = loss.sum()  # If no moves are selected, return 0 loss
+    return loss
+def all_moves_loss(output, ground_truth, pad_token_id=2064, start_token_id=2065):
+    """
+    Compute the loss for all valid moves in the sequence, excluding start and padding tokens.
+    """
+    batch_size, seq_len, num_tokens = output.shape
+    output = output.cuda()
+    ground_truth = ground_truth.cuda()
+    # Shift the output and ground truth to align them
+    output_shifted = output[:, :-1, :].contiguous()
+    ground_truth_shifted = ground_truth[:, 1:].contiguous()
+    # Flatten the shifted output and ground truth
+    output_flat = output_shifted.view(-1, num_tokens)
+    ground_truth_flat = ground_truth_shifted.view(-1)
+    # Apply log softmax to the flattened output
+    output_log_softmax = F.log_softmax(output_flat, dim=-1)
+    # Create a mask for all valid moves (excluding padding and start tokens)
+    valid_moves_mask = ((ground_truth_flat != pad_token_id) &
+                        (ground_truth_flat != start_token_id))
+    # Calculate the negative log-likelihood loss for all moves
+    loss = F.nll_loss(output_log_softmax, ground_truth_flat, reduction='none')
+    # Apply the mask to exclude padding and start tokens
+    loss = loss * valid_moves_mask.float()
+    # Average the loss over all valid moves
+    valid_moves_count = valid_moves_mask.float().sum()
+    if valid_moves_count > 0:
+        loss = loss.sum() / valid_moves_count
+    else:
+        loss = loss.sum()  # If no valid moves, return 0 loss
+    return loss
+def weighted_chess_loss(output, ground_truth, win_labels, winning_weight=1.0, losing_weight=0.1, pad_token_id=2064, start_token_id=2065):
+    """
+    Compute a weighted loss for all moves, with higher weight for winning moves.
+    """
+    output = output.cuda()
+    ground_truth = ground_truth.cuda()
+    win_labels = win_labels.cuda()
+    batch_size, seq_len, num_tokens = output.shape
+    # Shift the ground truth to align with the output predictions
+    ground_truth_shifted = ground_truth[:, 1:].contiguous()
+    output_shifted = output[:, :-1, :].contiguous()
+    # Flatten the output and ground truth for easier masking
+    output_flat = output_shifted.view(-1, num_tokens)
+    ground_truth_flat = ground_truth_shifted.view(-1)
+    # Apply log softmax to the flattened output
+    output_log_softmax = F.log_softmax(output_flat, dim=-1)
+    # Repeat win_labels for each move in the sequence
+    win_labels_expanded = win_labels.unsqueeze(1).repeat(1, seq_len - 1).view(-1)
+    # Create masks for winning and losing moves
+    move_indices = torch.arange(seq_len - 1, device=output.device).unsqueeze(0).repeat(batch_size, 1).view(-1)
+    white_win_mask = (win_labels_expanded == 1) & (move_indices % 2 == 0)
+    black_win_mask = (win_labels_expanded == 0) & (move_indices % 2 == 1)
+    winning_moves_mask = white_win_mask | black_win_mask
+    # Create a mask for all valid moves (excluding padding and start tokens)
+    valid_moves_mask = (ground_truth_flat != pad_token_id) & (ground_truth_flat != start_token_id)
+    # Calculate the negative log-likelihood loss for all valid moves
+    loss = F.nll_loss(output_log_softmax, ground_truth_flat, reduction='none')
+    # Apply weights based on whether the move is winning or losing
+    weights = torch.where(winning_moves_mask & valid_moves_mask, winning_weight, losing_weight)
+    # Apply the weights and the valid moves mask to the loss
+    weighted_loss = loss * weights * valid_moves_mask.float()
+    # Average the loss over all valid moves
+    valid_moves_count = valid_moves_mask.float().sum()
+    if valid_moves_count > 0:
+        avg_loss = weighted_loss.sum() / valid_moves_count
+    else:
+        avg_loss = weighted_loss.sum()  # If no valid moves, return 0 loss
+    return avg_loss

environment.yml ADDED Viewed

	@@ -0,0 +1,66 @@

+name: chessbot
+channels:
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - bzip2=1.0.8=h5eee18b_6
+  - ca-certificates=2024.9.24=h06a4308_0
+  - expat=2.6.3=h6a678d5_0
+  - ld_impl_linux-64=2.40=h12ee557_0
+  - libffi=3.4.4=h6a678d5_1
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libuuid=1.41.5=h5eee18b_0
+  - ncurses=6.4=h6a678d5_0
+  - openssl=3.0.15=h5eee18b_0
+  - pip=24.2=py312h06a4308_0
+  - python=3.12.7=h5148396_0
+  - readline=8.2=h5eee18b_0
+  - setuptools=75.1.0=py312h06a4308_0
+  - sqlite=3.45.3=h5eee18b_0
+  - tk=8.6.14=h39e8969_0
+  - wheel=0.44.0=py312h06a4308_0
+  - xz=5.4.6=h5eee18b_1
+  - zlib=1.2.13=h5eee18b_1
+  - pip:
+      - absl-py==2.1.0
+      - chess==1.11.0
+      - filelock==3.13.1
+      - fsspec==2024.2.0
+      - grpcio==1.66.2
+      - jinja2==3.1.3
+      - markdown==3.7
+      - markupsafe==2.1.5
+      - mpmath==1.3.0
+      - networkx==3.2.1
+      - numpy==2.1.2
+      - nvidia-cublas-cu12==12.4.2.65
+      - nvidia-cuda-cupti-cu12==12.4.99
+      - nvidia-cuda-nvrtc-cu12==12.4.99
+      - nvidia-cuda-runtime-cu12==12.4.99
+      - nvidia-cudnn-cu12==9.1.0.70
+      - nvidia-cufft-cu12==11.2.0.44
+      - nvidia-curand-cu12==10.3.5.119
+      - nvidia-cusolver-cu12==11.6.0.99
+      - nvidia-cusparse-cu12==12.3.0.142
+      - nvidia-nccl-cu12==2.20.5
+      - nvidia-nvjitlink-cu12==12.4.99
+      - nvidia-nvtx-cu12==12.4.99
+      - packaging==24.1
+      - pandas==2.2.3
+      - protobuf==5.28.2
+      - pyarrow==17.0.0
+      - python-dateutil==2.9.0.post0
+      - pytz==2024.2
+      - six==1.16.0
+      - sympy==1.12
+      - tensorboard==2.18.0
+      - tensorboard-data-server==0.7.2
+      - torch==2.4.1+cu124
+      - tqdm==4.66.5
+      - triton==3.0.0
+      - typing-extensions==4.9.0
+      - tzdata==2024.2
+      - werkzeug==3.0.4

play.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import torch
+import torch.nn.functional as F
+from chesstransformer import ChessTransformer
+import tokenizer as tk
+model = ChessTransformer()
+model.load_state_dict(torch.load('./64L1024D_1e-3maxlr_470k_step_1ep_1480ELO.pth')["model_state_dict"])
+model.eval().cuda()
+# Initialize tokenizer
+t = tk.Tokenizer()
+def predict_move(model, game_sequence, tokenizer, device='cuda', top_k=5):
+    model.eval()
+    game_sequence = torch.tensor([tokenizer.tokenize_game(game_sequence)], dtype=torch.long).to(device)
+    with torch.no_grad():
+        output = model(game_sequence)
+        logits = output[0, -1, :]  # Get logits for the last move
+        top_k_logits, top_k_indices = torch.topk(logits, top_k)
+        # Apply softmax to get probabilities
+        probs = F.softmax(top_k_logits, dim=-1)
+        # Sample from the probability distribution
+        sampled_index = torch.multinomial(probs, 1).item()
+        sampled_token = top_k_indices[sampled_index].item()
+        sampled_move = tokenizer.untokenize_game([sampled_token])[0]
+        # Get all top_k moves and their probabilities for display
+        top_k_moves = [tokenizer.untokenize_game([idx.item()])[0] for idx in top_k_indices]
+        top_k_probs = probs.cpu().numpy()
+    return sampled_move, top_k_moves, top_k_probs
+def play_game():
+    input_game = []
+    print("Let's play chess! Enter your moves in UCI format (e.g., 'e2e4'). Type 'exit' to quit or 'undo' to undo the last move.")
+    while True:
+        user_move = input("Your move: ").strip()
+        if user_move.lower() == 'exit':
+            print("Game over. Thanks for playing!")
+            break
+        elif user_move.lower() == 'undo':
+            if len(input_game) >= 2:
+                input_game.pop()  # Remove bot's move
+                input_game.pop()  # Remove user's move
+                print("Last move undone. Current game sequence:", input_game)
+            else:
+                print("Cannot undo. No moves to undo.")
+            continue
+        input_game.append(user_move)
+        print("Current game sequence:", input_game)
+        try:
+            bot_move, top_moves, top_probs = predict_move(model, input_game, t)
+            # Display top moves and their probabilities
+            moves_probs_str = ', '.join(f"{move} ({prob:.2%})" for move, prob in zip(top_moves, top_probs))
+            print(f"Top {len(top_moves)} moves and probabilities: {moves_probs_str}")
+            print(f"Bot's sampled move: {bot_move}")
+            input_game.append(bot_move)
+        except Exception as e:
+            print("An error occurred:", e)
+            break
+if __name__ == "__main__":
+    play_game()

tokenizer.py ADDED Viewed

	@@ -0,0 +1,163 @@

+class Tokenizer:
+    def __init__(self):
+        self.move_dict = create_move_dict()
+        self.inverse_dict = inverse_move_dict(self.move_dict)
+    def tokenize_game(self, moves_list):
+        tokenized_moves = []
+        for move in moves_list:
+            tokenized_moves.append(self.move_dict[move])
+        return tokenized_moves
+    def untokenize_game(self, tokenized_moves):
+        inverse_moves = []
+        for move in tokenized_moves:
+            if move == 2064:
+                inverse_moves.append("[pad]")
+                continue
+            if move == 2065:
+                inverse_moves.append("[start]")
+                continue
+            inverse_moves.append(self.inverse_dict[move])
+        return inverse_moves
+    def tokenize_move(self, move):
+        return self.move_dict[move]
+    def get_move(self, tokenized_move):
+        return self.inverse_dict[tokenized_move]
+# Helper function to convert square index to algebraic notation
+def square_to_algebraic(square):
+    files = 'abcdefgh'
+    ranks = '12345678'
+    file = files[square % 8]
+    rank = ranks[square // 8]
+    return file + rank
+# Modified chess_moves function to account for all moves
+def chess_moves(starting_square):
+    moves = []
+    ss = starting_square
+    # Calculate file and rank
+    file_start = (ss // 8) * 8
+    file_end = file_start + 7
+    # Horizontal moves - to left
+    for i in range(ss - 1, file_start - 1, -1):
+        moves.append((ss, i))
+    # Horizontal moves - to right
+    for i in range(ss + 1, file_end + 1):
+        moves.append((ss, i))
+    # Vertical moves - above
+    for i in range(ss + 8, 64, 8):
+        moves.append((ss, i))
+    # Vertical moves - below
+    for i in range(ss - 8, -1, -8):
+        moves.append((ss, i))
+    # Diagonal moves
+    # Upper left
+    i = ss
+    while (i := i + 7) < 64 and i % 8 != 7:
+        moves.append((ss, i))
+    # Lower left
+    i = ss
+    while (i := i - 9) >= 0 and i % 8 != 7:
+        moves.append((ss, i))
+    # Upper right
+    i = ss
+    while (i := i + 9) < 64 and i % 8 != 0:
+        moves.append((ss, i))
+    # Lower right
+    i = ss
+    while (i := i - 7) >= 0 and i % 8 != 0:
+        moves.append((ss, i))
+    # Inner 5x5 square
+    for j in range(-2, 3):
+        for i in range(-2, 3):
+            target = ss + i + j * 8
+            if 0 <= target < 64 and (target // 8 == (ss // 8) + j) and target != ss:
+                moves.append((ss, target))
+    # Pawn moves (including promotions)
+    if ss // 8 == 1:  # White pawn's initial position
+        if ss + 8 < 64:
+            moves.append((ss, ss + 8))
+            if (ss + 16) < 64:
+                moves.append((ss, ss + 16))
+        if ss + 9 < 64 and (ss + 9) % 8 != 0:
+            moves.append((ss, ss + 9))
+        if ss + 7 < 64 and (ss + 7) % 8 != 7:
+            moves.append((ss, ss + 7))
+    elif ss // 8 == 6:  # Black pawn's initial position
+        if ss - 8 >= 0:
+            moves.append((ss, ss - 8))
+            if (ss - 16) >= 0:
+                moves.append((ss, ss - 16))
+        if ss - 9 >= 0 and (ss - 9) % 8 != 7:
+            moves.append((ss, ss - 9))
+        if ss - 7 >= 0 and (ss - 7) % 8 != 0:
+            moves.append((ss, ss - 7))
+    #remove duplicate tuples
+    seen = set()
+    result = []
+    for item in moves:
+        if item not in seen:
+            seen.add(item)
+            result.append(item)
+    return result
+# Function to create a dictionary of moves with promotion
+def create_move_dict():
+    move_dict = {}
+    count = 0
+    promotion_pieces = ['q', 'r', 'b', 'n']  # Queen, Rook, Bishop, Knight
+    for i in range(64):
+        for move in chess_moves(i):
+            start_sq_algebraic = square_to_algebraic(move[0])
+            end_sq_algebraic = square_to_algebraic(move[1])
+            move_dict[f"{start_sq_algebraic}{end_sq_algebraic}"] = count
+            count += 1
+            # Add promotions if applicable
+            if move[1] // 8 == 7 and i // 8 == 6:  # White pawn reaching last rank
+                for piece in promotion_pieces:
+                    move_dict[f"{start_sq_algebraic}{end_sq_algebraic}{piece}"] = count
+                    count += 1
+            elif move[1] // 8 == 0 and i // 8 == 1:  # Black pawn reaching last rank
+                for piece in promotion_pieces:
+                    move_dict[f"{start_sq_algebraic}{end_sq_algebraic}{piece}"] = count
+                    count += 1
+    move_dict["pad"] = 2064
+    move_dict["start"] = 2065
+    return move_dict
+def inverse_move_dict(move_dict):
+    inverse_dict = {}
+    for k, v in move_dict.items():
+        inverse_dict[v] = k
+    return inverse_dict
+def tokenize_game(moves_list):
+    move_dict = create_move_dict()
+    tokenized_moves = []
+    for move in moves_list:
+        tokenized_moves.append(move_dict[move])
+    return tokenized_moves
+if __name__ == "__main__":
+    t = Tokenizer()