Chess Challenge submission by Nestor02

Browse files

Files changed (8) hide show

README.md +26 -0
config.json +30 -0
generation_config.json +9 -0
model.safetensors +3 -0
special_tokens_map.json +6 -0
tokenizer.py +256 -0
tokenizer_config.json +50 -0
vocab.json +150 -0

README.md ADDED Viewed

	@@ -0,0 +1,26 @@

+---
+library_name: transformers
+tags:
+- chess
+- llm-course
+- chess-challenge
+license: mit
+---
+# chess_done
+Chess model submitted to the LLM Course Chess Challenge.
+## Submission Info
+- **Submitted by**: [Nestor02](https://huggingface.co/Nestor02)
+- **Parameters**: 993,312
+- **Organization**: LLM-course
+## Model Details
+- **Architecture**: Chess Transformer (GPT-style)
+- **Vocab size**: 148
+- **Embedding dim**: 144
+- **Layers**: 7
+- **Heads**: 4

config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "architectures": [
+    "ChessForCausalLM"
+  ],
+  "attention_type": "gqa",
+  "auto_map": {
+    "AutoConfig": "model.ChessConfig",
+    "AutoModelForCausalLM": "model.ChessForCausalLM"
+  },
+  "bos_token_id": 1,
+  "dropout": 0.1,
+  "dtype": "float32",
+  "eos_token_id": 2,
+  "ffn_type": "swiglu",
+  "layer_norm_epsilon": 1e-05,
+  "legal_loss_weight": 0.0,
+  "model_type": "chess_transformer",
+  "n_ctx": 256,
+  "n_embd": 144,
+  "n_head": 4,
+  "n_inner": 256,
+  "n_kv_heads": 2,
+  "n_layer": 7,
+  "pad_token_id": 0,
+  "pos_encoding": "rope",
+  "rope_theta": 10000.0,
+  "tie_weights": true,
+  "transformers_version": "4.57.3",
+  "vocab_size": 148
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": [
+    2
+  ],
+  "pad_token_id": 0,
+  "transformers_version": "4.57.3"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee5e4fab00a102d89e3735c2df86891c8554d2cdb72ac0e91b7c50191067dd76
+size 3980104

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "[BOS]",
+  "eos_token": "[EOS]",
+  "pad_token": "[PAD]",
+  "unk_token": "[UNK]"
+}

tokenizer.py ADDED Viewed

	@@ -0,0 +1,256 @@

+"""
+Decomposed Chess Tokenizer (v2) for the Chess Challenge.
+This tokenizer factorizes each move into a small set of reusable tokens:
+- One token for (color + piece): e.g. "WP", "BN"
+- One token for the from-square with role suffix: e.g. "e2_f"
+- One token for the to-square with role suffix: e.g. "e4_t"
+- Optional promotion token: "q", "r", "b", "n"
+It is compatible with the teacher evaluator's supported formats:
+- Standard: "WPe2e4", "BNg8f6", with optional annotations "(x)", "(+)", "(o)/(O)", "(Q)"
+- Decomposed: "WP e2_f e4_t"
+- UCI: "e2e4", "e7e8q"
+- UCI spaced: "e2 e4"
+The tokenizer parses those inputs and emits the decomposed tokens above.
+"""
+from __future__ import annotations
+import json
+import os
+import re
+from pathlib import Path
+from typing import Dict, List, Optional
+from transformers import PreTrainedTokenizer
+class ChessTokenizer(PreTrainedTokenizer):
+    model_input_names = ["input_ids", "attention_mask"]
+    vocab_files_names = {"vocab_file": "vocab.json"}
+    PAD_TOKEN = "[PAD]"
+    BOS_TOKEN = "[BOS]"
+    EOS_TOKEN = "[EOS]"
+    UNK_TOKEN = "[UNK]"
+    _COLOR_PIECE_RE = re.compile(r"^[WB][PNBRQK]$")
+    _SQUARE_RE = re.compile(r"[a-h][1-8]")
+    _SQUARE_ROLE_RE = re.compile(r"^([a-h][1-8])_([ft])$", re.IGNORECASE)
+    _PLAIN_SQUARE_RE = re.compile(r"^[a-h][1-8]$", re.IGNORECASE)
+    def __init__(
+        self,
+        vocab_file: Optional[str] = None,
+        vocab: Optional[Dict[str, int]] = None,
+        **kwargs,
+    ):
+        self._pad_token = self.PAD_TOKEN
+        self._bos_token = self.BOS_TOKEN
+        self._eos_token = self.EOS_TOKEN
+        self._unk_token = self.UNK_TOKEN
+        # Remove any duplicate special-token entries passed through kwargs to avoid collisions.
+        kwargs.pop("pad_token", None)
+        kwargs.pop("bos_token", None)
+        kwargs.pop("eos_token", None)
+        kwargs.pop("unk_token", None)
+        if vocab is not None:
+            self._vocab = vocab
+        elif vocab_file is not None and os.path.exists(vocab_file):
+            with open(vocab_file, "r", encoding="utf-8") as f:
+                self._vocab = json.load(f)
+        else:
+            self._vocab = self._create_default_vocab()
+        self._ids_to_tokens = {v: k for k, v in self._vocab.items()}
+        super().__init__(
+            pad_token=self._pad_token,
+            bos_token=self._bos_token,
+            eos_token=self._eos_token,
+            unk_token=self._unk_token,
+            **kwargs,
+        )
+    @classmethod
+    def build_vocab_from_dataset(
+        cls,
+        *_,
+        **__,
+    ) -> "ChessTokenizer2":
+        """
+        Kept for API compatibility with `train.py`.
+        The v2 tokenizer uses a fixed vocabulary (colors/pieces/squares/promotions),
+        so dataset statistics are not required.
+        """
+        return cls()
+    def _create_default_vocab(self) -> Dict[str, int]:
+        special_tokens = [self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN]
+        color_pieces = [
+            f"{color}{piece}"
+            for color in ("W", "B")
+            for piece in ("P", "N", "B", "R", "Q", "K")
+        ]
+        squares = [f"{file}{rank}" for rank in range(1, 9) for file in "abcdefgh"]
+        square_from = [f"{sq}_f" for sq in squares]
+        square_to = [f"{sq}_t" for sq in squares]
+        promotions = ["q", "r", "b", "n"]
+        # Deterministic order for reproducibility.
+        all_tokens = special_tokens + color_pieces + square_from + square_to + promotions
+        return {tok: idx for idx, tok in enumerate(all_tokens)}
+    @property
+    def vocab_size(self) -> int:
+        return len(self._vocab)
+    def get_vocab(self) -> Dict[str, int]:
+        return dict(self._vocab)
+    def _tokenize(self, text: str) -> List[str]:
+        parts = text.strip().split()
+        if not parts:
+            return []
+        out: List[str] = []
+        next_role = "f"  # Used only when squares arrive without _f/_t.
+        for part in parts:
+            if part in {self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN}:
+                out.append(part)
+                next_role = "f"
+                continue
+            # Decomposed color+piece token: "WP", "BN", ...
+            if self._COLOR_PIECE_RE.match(part.upper()):
+                out.append(part.upper())
+                next_role = "f"
+                continue
+            # Square with role suffix: "e2_f" / "e4_t"
+            m_role = self._SQUARE_ROLE_RE.match(part)
+            if m_role:
+                sq = m_role.group(1).lower()
+                role = m_role.group(2).lower()
+                out.append(f"{sq}_{role}")
+                next_role = "t" if role == "f" else "f"
+                continue
+            # Plain square: "e2" (assign role by position)
+            if self._PLAIN_SQUARE_RE.match(part):
+                sq = part.lower()
+                out.append(f"{sq}_{next_role}")
+                next_role = "t" if next_role == "f" else "f"
+                continue
+            # Promotion token as its own chunk: "q", "=Q", "(Q)" etc.
+            promo = self._extract_promotion(part)
+            if promo and self._looks_like_promo_only(part):
+                out.append(promo)
+                continue
+            # Standard / UCI move chunk: "WPe2e4(x+)", "e2e4", "e7e8=Q", ...
+            move_tokens = self._tokenize_move_chunk(part)
+            if move_tokens:
+                out.extend(move_tokens)
+                next_role = "f"
+                continue
+            # Skip pure annotation chunks if they appear separated (rare).
+            if re.fullmatch(r"[\(\)\+\*xoO=]+", part):
+                continue
+            out.append(self.UNK_TOKEN)
+        return out
+    def _looks_like_promo_only(self, part: str) -> bool:
+        part_stripped = part.strip()
+        if re.fullmatch(r"[qrbnQRBN]", part_stripped):
+            return True
+        if re.fullmatch(r"=[qrbnQRBN]", part_stripped):
+            return True
+        if re.fullmatch(r"\([qrbnQRBN]\)", part_stripped):
+            return True
+        return False
+    def _extract_promotion(self, text: str) -> Optional[str]:
+        text_lower = text.lower()
+        m = re.search(r"\(([qrbn])\)", text_lower)
+        if m:
+            return m.group(1)
+        m = re.search(r"=([qrbn])", text_lower)
+        if m:
+            return m.group(1)
+        return None
+    def _tokenize_move_chunk(self, chunk: str) -> List[str]:
+        chunk_stripped = chunk.strip()
+        if not chunk_stripped:
+            return []
+        chunk_lower = chunk_stripped.lower()
+        squares = re.findall(self._SQUARE_RE, chunk_lower)
+        if len(squares) < 2:
+            return []
+        from_sq, to_sq = squares[0], squares[1]
+        color_piece = None
+        if len(chunk_stripped) >= 2 and self._COLOR_PIECE_RE.match(chunk_stripped[:2].upper()):
+            color_piece = chunk_stripped[:2].upper()
+        tokens: List[str] = []
+        if color_piece:
+            tokens.append(color_piece)
+        tokens.append(f"{from_sq}_f")
+        tokens.append(f"{to_sq}_t")
+        # Promotion: look right after the destination square.
+        after_to = chunk_lower.find(to_sq)
+        if after_to != -1:
+            remaining = chunk_lower[after_to + 2 : after_to + 6]
+            m = re.search(r"[=]?([qrbn])", remaining)
+            if m:
+                tokens.append(m.group(1))
+        # Also support dataset-style "(Q)" promotions.
+        promo = self._extract_promotion(chunk_stripped)
+        if promo and promo not in tokens:
+            tokens.append(promo)
+        return tokens
+    def _convert_token_to_id(self, token: str) -> int:
+        return self._vocab.get(token, self._vocab.get(self.UNK_TOKEN, 0))
+    def _convert_id_to_token(self, index: int) -> str:
+        return self._ids_to_tokens.get(index, self.UNK_TOKEN)
+    def convert_tokens_to_string(self, tokens: List[str]) -> str:
+        special = {self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN}
+        return " ".join(t for t in tokens if t not in special)
+    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple:
+        if not os.path.isdir(save_directory):
+            os.makedirs(save_directory, exist_ok=True)
+        vocab_file = os.path.join(
+            save_directory,
+            (filename_prefix + "-" if filename_prefix else "") + "vocab.json",
+        )
+        with open(vocab_file, "w", encoding="utf-8") as f:
+            json.dump(self._vocab, f, ensure_ascii=False, indent=2)
+        return (vocab_file,)

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[BOS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[EOS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "auto_map": {
+    "AutoTokenizer": [
+      "tokenizer.ChessTokenizer",
+      null
+    ]
+  },
+  "bos_token": "[BOS]",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "[EOS]",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "tokenizer_class": "ChessTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.json ADDED Viewed

	@@ -0,0 +1,150 @@

+{
+  "[PAD]": 0,
+  "[BOS]": 1,
+  "[EOS]": 2,
+  "[UNK]": 3,
+  "WP": 4,
+  "WN": 5,
+  "WB": 6,
+  "WR": 7,
+  "WQ": 8,
+  "WK": 9,
+  "BP": 10,
+  "BN": 11,
+  "BB": 12,
+  "BR": 13,
+  "BQ": 14,
+  "BK": 15,
+  "a1_f": 16,
+  "b1_f": 17,
+  "c1_f": 18,
+  "d1_f": 19,
+  "e1_f": 20,
+  "f1_f": 21,
+  "g1_f": 22,
+  "h1_f": 23,
+  "a2_f": 24,
+  "b2_f": 25,
+  "c2_f": 26,
+  "d2_f": 27,
+  "e2_f": 28,
+  "f2_f": 29,
+  "g2_f": 30,
+  "h2_f": 31,
+  "a3_f": 32,
+  "b3_f": 33,
+  "c3_f": 34,
+  "d3_f": 35,
+  "e3_f": 36,
+  "f3_f": 37,
+  "g3_f": 38,
+  "h3_f": 39,
+  "a4_f": 40,
+  "b4_f": 41,
+  "c4_f": 42,
+  "d4_f": 43,
+  "e4_f": 44,
+  "f4_f": 45,
+  "g4_f": 46,
+  "h4_f": 47,
+  "a5_f": 48,
+  "b5_f": 49,
+  "c5_f": 50,
+  "d5_f": 51,
+  "e5_f": 52,
+  "f5_f": 53,
+  "g5_f": 54,
+  "h5_f": 55,
+  "a6_f": 56,
+  "b6_f": 57,
+  "c6_f": 58,
+  "d6_f": 59,
+  "e6_f": 60,
+  "f6_f": 61,
+  "g6_f": 62,
+  "h6_f": 63,
+  "a7_f": 64,
+  "b7_f": 65,
+  "c7_f": 66,
+  "d7_f": 67,
+  "e7_f": 68,
+  "f7_f": 69,
+  "g7_f": 70,
+  "h7_f": 71,
+  "a8_f": 72,
+  "b8_f": 73,
+  "c8_f": 74,
+  "d8_f": 75,
+  "e8_f": 76,
+  "f8_f": 77,
+  "g8_f": 78,
+  "h8_f": 79,
+  "a1_t": 80,
+  "b1_t": 81,
+  "c1_t": 82,
+  "d1_t": 83,
+  "e1_t": 84,
+  "f1_t": 85,
+  "g1_t": 86,
+  "h1_t": 87,
+  "a2_t": 88,
+  "b2_t": 89,
+  "c2_t": 90,
+  "d2_t": 91,
+  "e2_t": 92,
+  "f2_t": 93,
+  "g2_t": 94,
+  "h2_t": 95,
+  "a3_t": 96,
+  "b3_t": 97,
+  "c3_t": 98,
+  "d3_t": 99,
+  "e3_t": 100,
+  "f3_t": 101,
+  "g3_t": 102,
+  "h3_t": 103,
+  "a4_t": 104,
+  "b4_t": 105,
+  "c4_t": 106,
+  "d4_t": 107,
+  "e4_t": 108,
+  "f4_t": 109,
+  "g4_t": 110,
+  "h4_t": 111,
+  "a5_t": 112,
+  "b5_t": 113,
+  "c5_t": 114,
+  "d5_t": 115,
+  "e5_t": 116,
+  "f5_t": 117,
+  "g5_t": 118,
+  "h5_t": 119,
+  "a6_t": 120,
+  "b6_t": 121,
+  "c6_t": 122,
+  "d6_t": 123,
+  "e6_t": 124,
+  "f6_t": 125,
+  "g6_t": 126,
+  "h6_t": 127,
+  "a7_t": 128,
+  "b7_t": 129,
+  "c7_t": 130,
+  "d7_t": 131,
+  "e7_t": 132,
+  "f7_t": 133,
+  "g7_t": 134,
+  "h7_t": 135,
+  "a8_t": 136,
+  "b8_t": 137,
+  "c8_t": 138,
+  "d8_t": 139,
+  "e8_t": 140,
+  "f8_t": 141,
+  "g8_t": 142,
+  "h8_t": 143,
+  "q": 144,
+  "r": 145,
+  "b": 146,
+  "n": 147
+}