Chess Challenge submission by corentincaris

Browse files

Files changed (5) hide show

README.md +2 -2
config.json +1 -1
model.safetensors +2 -2
tokenizer.py +71 -92
vocab.json +68 -647

README.md CHANGED Viewed

@@ -14,13 +14,13 @@ Chess model submitted to the LLM Course Chess Challenge.
 ## Submission Info
 - **Submitted by**: [corentincaris](https://huggingface.co/corentincaris)
-- **Parameters**: 980,880
 - **Organization**: LLM-course
 ## Model Details
 - **Architecture**: Chess Transformer (GPT-style)
-- **Vocab size**: 651
 - **Embedding dim**: 120
 - **Layers**: 5
 - **Heads**: 8

 ## Submission Info
 - **Submitted by**: [corentincaris](https://huggingface.co/corentincaris)
+- **Parameters**: 911,400
 - **Organization**: LLM-course
 ## Model Details
 - **Architecture**: Chess Transformer (GPT-style)
+- **Vocab size**: 72
 - **Embedding dim**: 120
 - **Layers**: 5
 - **Heads**: 8

config.json CHANGED Viewed

@@ -16,5 +16,5 @@
   "pad_token_id": 0,
   "tie_weights": true,
   "transformers_version": "4.57.5",
-  "vocab_size": 651
 }

   "pad_token_id": 0,
   "tie_weights": true,
   "transformers_version": "4.57.5",
+  "vocab_size": 72
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb36b137cba1ba135d520ebe357fc4c09f0779faf7b31da82b74d4d277c0e06f
-size 3928944

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3884f7282fb27a863b10d268ab8d6f2eb0d1c24c8e67461d58e189cd8bd262f
+size 3651024

tokenizer.py CHANGED Viewed

@@ -1,32 +1,30 @@
 """
 Custom Chess Tokenizer for the Chess Challenge.
-This tokenizer splits moves into 3 parts:
-1. Piece (e.g., WP)
-2. From Square (e.g., e2)
-3. To Square + Suffix (e.g., e4 or e4(x))
 """
 from __future__ import annotations
 import json
 import os
 from typing import Dict, List, Optional
 from transformers import PreTrainedTokenizer
 class ChessTokenizer(PreTrainedTokenizer):
-    """
-    A custom tokenizer for chess moves using a 3-part split.
-    Splits "WPe2e4(x)" into ["WP", "e2", "e4(x)"].
-    """
     model_input_names = ["input_ids", "attention_mask"]
-    vocab_files_names = {"vocab_file": "vocab.json"}
-    # Special tokens
     PAD_TOKEN = "[PAD]"
     BOS_TOKEN = "[BOS]"
     EOS_TOKEN = "[EOS]"
@@ -38,15 +36,18 @@ class ChessTokenizer(PreTrainedTokenizer):
         vocab: Optional[Dict[str, int]] = None,
         **kwargs,
     ):
-        # Clean kwargs to avoid conflicts
         kwargs.pop("pad_token", None)
         kwargs.pop("bos_token", None)
         kwargs.pop("eos_token", None)
         kwargs.pop("unk_token", None)
-        self.vocab_file = vocab_file
-        # Load vocab
         if vocab is not None:
             self._vocab = vocab
         elif vocab_file is not None and os.path.exists(vocab_file):
@@ -54,113 +55,91 @@ class ChessTokenizer(PreTrainedTokenizer):
                 self._vocab = json.load(f)
         else:
             self._vocab = self._create_default_vocab()
         self._ids_to_tokens = {v: k for k, v in self._vocab.items()}
         super().__init__(
-            pad_token=self.PAD_TOKEN,
-            bos_token=self.BOS_TOKEN,
-            eos_token=self.EOS_TOKEN,
-            unk_token=self.UNK_TOKEN,
             **kwargs,
         )
     def _create_default_vocab(self) -> Dict[str, int]:
-        """Create a minimal default vocabulary with just special tokens."""
         special_tokens = [self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN]
         vocab = {token: idx for idx, token in enumerate(special_tokens)}
         return vocab
-    @property
-    def vocab_size(self) -> int:
-        return len(self._vocab)
-    def get_vocab(self) -> Dict[str, int]:
-        return dict(self._vocab)
     def _tokenize(self, text: str) -> List[str]:
         """
-        Tokenize a string of moves into 3 components per move.
         """
-        tokens = []
-        raw_moves = text.strip().split()
-        for move in raw_moves:
-            if len(move) >= 6:
-                # 1. Piece (WP)
-                tokens.append(move[:2])
-                # 2. From (e2)
-                tokens.append(move[2:4])
-                # 3. To (e4 or e4(x)) - grab the rest
-                tokens.append(move[4:])
-            else:
-                tokens.append(self.UNK_TOKEN)
-        return tokens
     def _convert_token_to_id(self, token: str) -> int:
-        return self._vocab.get(token, self._vocab.get(self.UNK_TOKEN))
     def _convert_id_to_token(self, index: int) -> str:
         return self._ids_to_tokens.get(index, self.UNK_TOKEN)
     def convert_tokens_to_string(self, tokens: List[str]) -> str:
-        # Filter specials
-        filtered = [t for t in tokens if t not in [self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN]]
-        # Join with space. Result: "WP e2 e4 BN g8 f6"
-        return " ".join(filtered)
     def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple:
         if not os.path.isdir(save_directory):
             os.makedirs(save_directory, exist_ok=True)
         vocab_file = os.path.join(
-            save_directory,
-            (filename_prefix + "-" if filename_prefix else "") + "vocab.json",
         )
         with open(vocab_file, "w", encoding="utf-8") as f:
             json.dump(self._vocab, f, ensure_ascii=False, indent=2)
         return (vocab_file,)
     @classmethod
-    def build_vocab_from_dataset(
-        cls,
-        dataset_name: str = "dlouapre/lichess_2025-01_1M",
-        split: str = "train",
-        column: str = "text",
-        min_frequency: int = 100,
-        max_samples: Optional[int] = 100000,
-    ) -> "ChessTokenizer":
-        from datasets import load_dataset
-        print(f"Loading dataset {dataset_name} to build vocabulary...")
-        dataset = load_dataset(dataset_name, split=split, streaming=True)
-        unique_tokens = set()
-        print("Building vocabulary...")
-        count = 0
-        for example in dataset:
-            moves = example[column].strip().split()
-            for move in moves:
-                if len(move) >= 6:
-                    unique_tokens.add(move[:2])    # Piece
-                    unique_tokens.add(move[2:4])   # From
-                    unique_tokens.add(move[4:])    # To (includes suffix like (x))
-            count += 1
-            if max_samples is not None and count >= max_samples:
-                break
-        special = [cls.PAD_TOKEN, cls.BOS_TOKEN, cls.EOS_TOKEN, cls.UNK_TOKEN]
-        # Sort tokens to ensure deterministic IDs
-        all_tokens = special + sorted(list(unique_tokens))
-        vocab = {token: idx for idx, token in enumerate(all_tokens)}
-        print(f"Built vocabulary with {len(vocab)} tokens")
-        return cls(vocab=vocab)
-# Kept for compatibility if other scripts import it
-def count_vocab_from_dataset(*args, **kwargs):
-    return {}

 """
 Custom Chess Tokenizer for the Chess Challenge.
+This tokenizer treats each move as a single token using the extended UCI notation
+from the Lichess dataset (e.g., WPe2e4, BNg8f6).
+The dataset format uses:
+- W/B prefix for White/Black
+- Piece letter: P=Pawn, N=Knight, B=Bishop, R=Rook, Q=Queen, K=King
+- Source and destination squares (e.g., e2e4)
+- Special suffixes: (x)=capture, (+)=check, (+*)=checkmate, (o)/(O)=castling
 """
 from __future__ import annotations
 import json
 import os
+from pathlib import Path
 from typing import Dict, List, Optional
+import re
 from transformers import PreTrainedTokenizer
 class ChessTokenizer(PreTrainedTokenizer):
     model_input_names = ["input_ids", "attention_mask"]
     PAD_TOKEN = "[PAD]"
     BOS_TOKEN = "[BOS]"
     EOS_TOKEN = "[EOS]"
         vocab: Optional[Dict[str, int]] = None,
         **kwargs,
     ):
+        self._pad_token = self.PAD_TOKEN
+        self._bos_token = self.BOS_TOKEN
+        self._eos_token = self.EOS_TOKEN
+        self._unk_token = self.UNK_TOKEN
         kwargs.pop("pad_token", None)
         kwargs.pop("bos_token", None)
         kwargs.pop("eos_token", None)
         kwargs.pop("unk_token", None)
+        self.token_pattern = re.compile(r'[a-h][1-8]|[qrbn]')
         if vocab is not None:
             self._vocab = vocab
         elif vocab_file is not None and os.path.exists(vocab_file):
                 self._vocab = json.load(f)
         else:
             self._vocab = self._create_default_vocab()
         self._ids_to_tokens = {v: k for k, v in self._vocab.items()}
         super().__init__(
+            pad_token=self._pad_token,
+            bos_token=self._bos_token,
+            eos_token=self._eos_token,
+            unk_token=self._unk_token,
             **kwargs,
         )
     def _create_default_vocab(self) -> Dict[str, int]:
         special_tokens = [self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN]
         vocab = {token: idx for idx, token in enumerate(special_tokens)}
+        idx = len(vocab)
+        # Squares (4-67)
+        for f in 'abcdefgh':
+            for r in '12345678':
+                vocab[f"{f}{r}"] = idx
+                idx += 1
+        # Promotions (68-71)
+        for p in ['q', 'r', 'b', 'n']:
+            vocab[p] = idx
+            idx += 1
         return vocab
     def _tokenize(self, text: str) -> List[str]:
         """
+        Tokenizes text by first normalizing specific chess patterns
+        and then extracting squares/promotions.
         """
+        text = (text.replace("(Q)", "q")
+                    .replace("(R)", "r")
+                    .replace("(B)", "b")
+                    .replace("(N)", "n"))
+        return self.token_pattern.findall(text)
     def _convert_token_to_id(self, token: str) -> int:
+        """Convert a token to its ID."""
+        return self._vocab.get(token, self._vocab.get(self.UNK_TOKEN, 0))
     def _convert_id_to_token(self, index: int) -> str:
+        """Convert an ID to its token."""
         return self._ids_to_tokens.get(index, self.UNK_TOKEN)
     def convert_tokens_to_string(self, tokens: List[str]) -> str:
+        """Reconstructs standard UCI string (e.g. "e2e4 a7a8q")"""
+        special = {self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN}
+        clean_tokens = [t for t in tokens if t not in special]
+        output = []
+        for token in clean_tokens:
+            if token in ['q', 'r', 'b', 'n'] and output:
+                output[-1] += token
+            elif output and len(output[-1]) == 2 and output[-1][0] in 'abcdefgh':
+                output[-1] += token
+            else:
+                output.append(token)
+        return " ".join(output)
     def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple:
         if not os.path.isdir(save_directory):
             os.makedirs(save_directory, exist_ok=True)
         vocab_file = os.path.join(
+            save_directory, (filename_prefix + "-" if filename_prefix else "") + "vocab.json"
         )
         with open(vocab_file, "w", encoding="utf-8") as f:
             json.dump(self._vocab, f, ensure_ascii=False, indent=2)
         return (vocab_file,)
     @classmethod
+    def build_vocab_from_iterator(cls, iterator, min_frequency=1):
+        return cls()
+    @classmethod
+    def build_vocab_from_dataset(cls, **kwargs):
+        return cls()
+    @property
+    def vocab_size(self) -> int:
+        return len(self._vocab)
+    def get_vocab(self) -> Dict[str, int]:
+        return dict(self._vocab)

vocab.json CHANGED Viewed

@@ -3,651 +3,72 @@
   "[BOS]": 1,
   "[EOS]": 2,
   "[UNK]": 3,
-  "BB": 4,
-  "BK": 5,
-  "BN": 6,
-  "BP": 7,
-  "BQ": 8,
-  "BR": 9,
-  "WB": 10,
-  "WK": 11,
-  "WN": 12,
-  "WP": 13,
-  "WQ": 14,
-  "WR": 15,
-  "a1": 16,
-  "a1(+)": 17,
-  "a1(+*)": 18,
-  "a1(+*Q)": 19,
-  "a1(+*R)": 20,
-  "a1(+N)": 21,
-  "a1(+Q)": 22,
-  "a1(+R)": 23,
-  "a1(B)": 24,
-  "a1(N)": 25,
-  "a1(Q)": 26,
-  "a1(R)": 27,
-  "a1(x)": 28,
-  "a1(x+)": 29,
-  "a1(x+*)": 30,
-  "a1(x+*Q)": 31,
-  "a1(x+Q)": 32,
-  "a1(xQ)": 33,
-  "a2": 34,
-  "a2(+)": 35,
-  "a2(+*)": 36,
-  "a2(x)": 37,
-  "a2(x+)": 38,
-  "a2(x+*)": 39,
-  "a3": 40,
-  "a3(+)": 41,
-  "a3(+*)": 42,
-  "a3(x)": 43,
-  "a3(x+)": 44,
-  "a3(x+*)": 45,
-  "a3(xE)": 46,
-  "a3(xE+)": 47,
-  "a4": 48,
-  "a4(+)": 49,
-  "a4(+*)": 50,
-  "a4(x)": 51,
-  "a4(x+)": 52,
-  "a4(x+*)": 53,
-  "a5": 54,
-  "a5(+)": 55,
-  "a5(+*)": 56,
-  "a5(x)": 57,
-  "a5(x+)": 58,
-  "a5(x+*)": 59,
-  "a6": 60,
-  "a6(+)": 61,
-  "a6(+*)": 62,
-  "a6(x)": 63,
-  "a6(x+)": 64,
-  "a6(x+*)": 65,
-  "a6(xE)": 66,
-  "a6(xE+)": 67,
-  "a7": 68,
-  "a7(+)": 69,
-  "a7(+*)": 70,
-  "a7(x)": 71,
-  "a7(x+)": 72,
-  "a7(x+*)": 73,
-  "a8": 74,
-  "a8(+)": 75,
-  "a8(+*)": 76,
-  "a8(+*Q)": 77,
-  "a8(+*R)": 78,
-  "a8(+N)": 79,
-  "a8(+Q)": 80,
-  "a8(B)": 81,
-  "a8(N)": 82,
-  "a8(Q)": 83,
-  "a8(R)": 84,
-  "a8(x)": 85,
-  "a8(x+)": 86,
-  "a8(x+*)": 87,
-  "a8(x+*Q)": 88,
-  "a8(x+Q)": 89,
-  "a8(xQ)": 90,
-  "b1": 91,
-  "b1(+)": 92,
-  "b1(+*)": 93,
-  "b1(+*Q)": 94,
-  "b1(+*R)": 95,
-  "b1(+B)": 96,
-  "b1(+N)": 97,
-  "b1(+Q)": 98,
-  "b1(B)": 99,
-  "b1(N)": 100,
-  "b1(Q)": 101,
-  "b1(R)": 102,
-  "b1(x)": 103,
-  "b1(x+)": 104,
-  "b1(x+*)": 105,
-  "b1(x+*Q)": 106,
-  "b1(x+Q)": 107,
-  "b1(xN)": 108,
-  "b1(xQ)": 109,
-  "b1(xR)": 110,
-  "b2": 111,
-  "b2(+)": 112,
-  "b2(+*)": 113,
-  "b2(x)": 114,
-  "b2(x+)": 115,
-  "b2(x+*)": 116,
-  "b3": 117,
-  "b3(+)": 118,
-  "b3(+*)": 119,
-  "b3(x)": 120,
-  "b3(x+)": 121,
-  "b3(x+*)": 122,
-  "b3(xE)": 123,
-  "b3(xE+)": 124,
-  "b4": 125,
-  "b4(+)": 126,
-  "b4(+*)": 127,
-  "b4(x)": 128,
-  "b4(x+)": 129,
-  "b4(x+*)": 130,
-  "b5": 131,
-  "b5(+)": 132,
-  "b5(+*)": 133,
-  "b5(x)": 134,
-  "b5(x+)": 135,
-  "b5(x+*)": 136,
-  "b6": 137,
-  "b6(+)": 138,
-  "b6(+*)": 139,
-  "b6(x)": 140,
-  "b6(x+)": 141,
-  "b6(x+*)": 142,
-  "b6(xE)": 143,
-  "b6(xE+)": 144,
-  "b7": 145,
-  "b7(+)": 146,
-  "b7(+*)": 147,
-  "b7(x)": 148,
-  "b7(x+)": 149,
-  "b7(x+*)": 150,
-  "b8": 151,
-  "b8(+)": 152,
-  "b8(+*)": 153,
-  "b8(+*Q)": 154,
-  "b8(+*R)": 155,
-  "b8(+Q)": 156,
-  "b8(+R)": 157,
-  "b8(B)": 158,
-  "b8(N)": 159,
-  "b8(Q)": 160,
-  "b8(R)": 161,
-  "b8(x)": 162,
-  "b8(x+)": 163,
-  "b8(x+*)": 164,
-  "b8(x+*Q)": 165,
-  "b8(x+Q)": 166,
-  "b8(xN)": 167,
-  "b8(xQ)": 168,
-  "c1": 169,
-  "c1(+)": 170,
-  "c1(+*)": 171,
-  "c1(+*Q)": 172,
-  "c1(+B)": 173,
-  "c1(+N)": 174,
-  "c1(+Q)": 175,
-  "c1(+R)": 176,
-  "c1(N)": 177,
-  "c1(O)": 178,
-  "c1(O+)": 179,
-  "c1(O+*)": 180,
-  "c1(Q)": 181,
-  "c1(R)": 182,
-  "c1(x)": 183,
-  "c1(x+)": 184,
-  "c1(x+*)": 185,
-  "c1(x+*Q)": 186,
-  "c1(x+Q)": 187,
-  "c1(x+R)": 188,
-  "c1(xN)": 189,
-  "c1(xQ)": 190,
-  "c2": 191,
-  "c2(+)": 192,
-  "c2(+*)": 193,
-  "c2(x)": 194,
-  "c2(x+)": 195,
-  "c2(x+*)": 196,
-  "c3": 197,
-  "c3(+)": 198,
-  "c3(+*)": 199,
-  "c3(x)": 200,
-  "c3(x+)": 201,
-  "c3(x+*)": 202,
-  "c3(xE)": 203,
-  "c3(xE+)": 204,
-  "c4": 205,
-  "c4(+)": 206,
-  "c4(+*)": 207,
-  "c4(x)": 208,
-  "c4(x+)": 209,
-  "c4(x+*)": 210,
-  "c5": 211,
-  "c5(+)": 212,
-  "c5(+*)": 213,
-  "c5(x)": 214,
-  "c5(x+)": 215,
-  "c5(x+*)": 216,
-  "c6": 217,
-  "c6(+)": 218,
-  "c6(+*)": 219,
-  "c6(x)": 220,
-  "c6(x+)": 221,
-  "c6(x+*)": 222,
-  "c6(xE)": 223,
-  "c6(xE+)": 224,
-  "c7": 225,
-  "c7(+)": 226,
-  "c7(+*)": 227,
-  "c7(x)": 228,
-  "c7(x+)": 229,
-  "c7(x+*)": 230,
-  "c8": 231,
-  "c8(+)": 232,
-  "c8(+*)": 233,
-  "c8(+*Q)": 234,
-  "c8(+B)": 235,
-  "c8(+N)": 236,
-  "c8(+Q)": 237,
-  "c8(+R)": 238,
-  "c8(B)": 239,
-  "c8(N)": 240,
-  "c8(O)": 241,
-  "c8(O+)": 242,
-  "c8(Q)": 243,
-  "c8(R)": 244,
-  "c8(x)": 245,
-  "c8(x+)": 246,
-  "c8(x+*)": 247,
-  "c8(x+*Q)": 248,
-  "c8(x+N)": 249,
-  "c8(x+Q)": 250,
-  "c8(xQ)": 251,
-  "d1": 252,
-  "d1(+)": 253,
-  "d1(+*)": 254,
-  "d1(+*Q)": 255,
-  "d1(+*R)": 256,
-  "d1(+N)": 257,
-  "d1(+Q)": 258,
-  "d1(+R)": 259,
-  "d1(N)": 260,
-  "d1(Q)": 261,
-  "d1(R)": 262,
-  "d1(x)": 263,
-  "d1(x+)": 264,
-  "d1(x+*)": 265,
-  "d1(x+*Q)": 266,
-  "d1(x+B)": 267,
-  "d1(x+Q)": 268,
-  "d1(xN)": 269,
-  "d1(xQ)": 270,
-  "d2": 271,
-  "d2(+)": 272,
-  "d2(+*)": 273,
-  "d2(x)": 274,
-  "d2(x+)": 275,
-  "d2(x+*)": 276,
-  "d3": 277,
-  "d3(+)": 278,
-  "d3(+*)": 279,
-  "d3(x)": 280,
-  "d3(x+)": 281,
-  "d3(x+*)": 282,
-  "d3(xE)": 283,
-  "d3(xE+)": 284,
-  "d4": 285,
-  "d4(+)": 286,
-  "d4(+*)": 287,
-  "d4(x)": 288,
-  "d4(x+)": 289,
-  "d4(x+*)": 290,
-  "d5": 291,
-  "d5(+)": 292,
-  "d5(+*)": 293,
-  "d5(x)": 294,
-  "d5(x+)": 295,
-  "d5(x+*)": 296,
-  "d6": 297,
-  "d6(+)": 298,
-  "d6(+*)": 299,
-  "d6(x)": 300,
-  "d6(x+)": 301,
-  "d6(x+*)": 302,
-  "d6(xE)": 303,
-  "d6(xE+)": 304,
-  "d7": 305,
-  "d7(+)": 306,
-  "d7(+*)": 307,
-  "d7(x)": 308,
-  "d7(x+)": 309,
-  "d7(x+*)": 310,
-  "d8": 311,
-  "d8(+)": 312,
-  "d8(+*)": 313,
-  "d8(+*Q)": 314,
-  "d8(+B)": 315,
-  "d8(+N)": 316,
-  "d8(+Q)": 317,
-  "d8(+R)": 318,
-  "d8(B)": 319,
-  "d8(N)": 320,
-  "d8(Q)": 321,
-  "d8(R)": 322,
-  "d8(x)": 323,
-  "d8(x+)": 324,
-  "d8(x+*)": 325,
-  "d8(x+*Q)": 326,
-  "d8(x+Q)": 327,
-  "d8(xQ)": 328,
-  "e1": 329,
-  "e1(+)": 330,
-  "e1(+*)": 331,
-  "e1(+*Q)": 332,
-  "e1(+*R)": 333,
-  "e1(+B)": 334,
-  "e1(+N)": 335,
-  "e1(+Q)": 336,
-  "e1(+R)": 337,
-  "e1(N)": 338,
-  "e1(Q)": 339,
-  "e1(R)": 340,
-  "e1(x)": 341,
-  "e1(x+)": 342,
-  "e1(x+*)": 343,
-  "e1(x+*Q)": 344,
-  "e1(x+Q)": 345,
-  "e1(xQ)": 346,
-  "e2": 347,
-  "e2(+)": 348,
-  "e2(+*)": 349,
-  "e2(x)": 350,
-  "e2(x+)": 351,
-  "e2(x+*)": 352,
-  "e3": 353,
-  "e3(+)": 354,
-  "e3(+*)": 355,
-  "e3(x)": 356,
-  "e3(x+)": 357,
-  "e3(x+*)": 358,
-  "e3(xE)": 359,
-  "e3(xE+)": 360,
-  "e4": 361,
-  "e4(+)": 362,
-  "e4(+*)": 363,
-  "e4(x)": 364,
-  "e4(x+)": 365,
-  "e4(x+*)": 366,
-  "e5": 367,
-  "e5(+)": 368,
-  "e5(+*)": 369,
-  "e5(x)": 370,
-  "e5(x+)": 371,
-  "e5(x+*)": 372,
-  "e6": 373,
-  "e6(+)": 374,
-  "e6(+*)": 375,
-  "e6(x)": 376,
-  "e6(x+)": 377,
-  "e6(x+*)": 378,
-  "e6(xE)": 379,
-  "e6(xE+)": 380,
-  "e7": 381,
-  "e7(+)": 382,
-  "e7(+*)": 383,
-  "e7(x)": 384,
-  "e7(x+)": 385,
-  "e7(x+*)": 386,
-  "e8": 387,
-  "e8(+)": 388,
-  "e8(+*)": 389,
-  "e8(+*Q)": 390,
-  "e8(+*R)": 391,
-  "e8(+N)": 392,
-  "e8(+Q)": 393,
-  "e8(+R)": 394,
-  "e8(B)": 395,
-  "e8(N)": 396,
-  "e8(Q)": 397,
-  "e8(R)": 398,
-  "e8(x)": 399,
-  "e8(x+)": 400,
-  "e8(x+*)": 401,
-  "e8(x+*Q)": 402,
-  "e8(x+*R)": 403,
-  "e8(x+Q)": 404,
-  "e8(x+R)": 405,
-  "e8(xN)": 406,
-  "e8(xQ)": 407,
-  "e8(xR)": 408,
-  "f1": 409,
-  "f1(+)": 410,
-  "f1(+*)": 411,
-  "f1(+*Q)": 412,
-  "f1(+*R)": 413,
-  "f1(+B)": 414,
-  "f1(+N)": 415,
-  "f1(+Q)": 416,
-  "f1(+R)": 417,
-  "f1(B)": 418,
-  "f1(N)": 419,
-  "f1(Q)": 420,
-  "f1(R)": 421,
-  "f1(x)": 422,
-  "f1(x+)": 423,
-  "f1(x+*)": 424,
-  "f1(x+*Q)": 425,
-  "f1(x+*R)": 426,
-  "f1(x+N)": 427,
-  "f1(x+Q)": 428,
-  "f1(x+R)": 429,
-  "f1(xQ)": 430,
-  "f2": 431,
-  "f2(+)": 432,
-  "f2(+*)": 433,
-  "f2(x)": 434,
-  "f2(x+)": 435,
-  "f2(x+*)": 436,
-  "f3": 437,
-  "f3(+)": 438,
-  "f3(+*)": 439,
-  "f3(x)": 440,
-  "f3(x+)": 441,
-  "f3(x+*)": 442,
-  "f3(xE)": 443,
-  "f3(xE+)": 444,
-  "f4": 445,
-  "f4(+)": 446,
-  "f4(+*)": 447,
-  "f4(x)": 448,
-  "f4(x+)": 449,
-  "f4(x+*)": 450,
-  "f5": 451,
-  "f5(+)": 452,
-  "f5(+*)": 453,
-  "f5(x)": 454,
-  "f5(x+)": 455,
-  "f5(x+*)": 456,
-  "f6": 457,
-  "f6(+)": 458,
-  "f6(+*)": 459,
-  "f6(x)": 460,
-  "f6(x+)": 461,
-  "f6(x+*)": 462,
-  "f6(xE)": 463,
-  "f6(xE+)": 464,
-  "f7": 465,
-  "f7(+)": 466,
-  "f7(+*)": 467,
-  "f7(x)": 468,
-  "f7(x+)": 469,
-  "f7(x+*)": 470,
-  "f8": 471,
-  "f8(+)": 472,
-  "f8(+*)": 473,
-  "f8(+*Q)": 474,
-  "f8(+*R)": 475,
-  "f8(+N)": 476,
-  "f8(+Q)": 477,
-  "f8(+R)": 478,
-  "f8(B)": 479,
-  "f8(N)": 480,
-  "f8(Q)": 481,
-  "f8(R)": 482,
-  "f8(x)": 483,
-  "f8(x+)": 484,
-  "f8(x+*)": 485,
-  "f8(x+*Q)": 486,
-  "f8(x+Q)": 487,
-  "f8(x+R)": 488,
-  "f8(xN)": 489,
-  "f8(xQ)": 490,
-  "g1": 491,
-  "g1(+)": 492,
-  "g1(+*)": 493,
-  "g1(+*Q)": 494,
-  "g1(+*R)": 495,
-  "g1(+B)": 496,
-  "g1(+N)": 497,
-  "g1(+Q)": 498,
-  "g1(+R)": 499,
-  "g1(B)": 500,
-  "g1(N)": 501,
-  "g1(Q)": 502,
-  "g1(R)": 503,
-  "g1(o)": 504,
-  "g1(o+)": 505,
-  "g1(o+*)": 506,
-  "g1(x)": 507,
-  "g1(x+)": 508,
-  "g1(x+*)": 509,
-  "g1(x+*Q)": 510,
-  "g1(x+*R)": 511,
-  "g1(x+N)": 512,
-  "g1(x+Q)": 513,
-  "g1(xB)": 514,
-  "g1(xQ)": 515,
-  "g2": 516,
-  "g2(+)": 517,
-  "g2(+*)": 518,
-  "g2(x)": 519,
-  "g2(x+)": 520,
-  "g2(x+*)": 521,
-  "g3": 522,
-  "g3(+)": 523,
-  "g3(+*)": 524,
-  "g3(x)": 525,
-  "g3(x+)": 526,
-  "g3(x+*)": 527,
-  "g3(xE)": 528,
-  "g3(xE+)": 529,
-  "g4": 530,
-  "g4(+)": 531,
-  "g4(+*)": 532,
-  "g4(x)": 533,
-  "g4(x+)": 534,
-  "g4(x+*)": 535,
-  "g5": 536,
-  "g5(+)": 537,
-  "g5(+*)": 538,
-  "g5(x)": 539,
-  "g5(x+)": 540,
-  "g5(x+*)": 541,
-  "g6": 542,
-  "g6(+)": 543,
-  "g6(+*)": 544,
-  "g6(x)": 545,
-  "g6(x+)": 546,
-  "g6(x+*)": 547,
-  "g6(xE)": 548,
-  "g6(xE+)": 549,
-  "g7": 550,
-  "g7(+)": 551,
-  "g7(+*)": 552,
-  "g7(x)": 553,
-  "g7(x+)": 554,
-  "g7(x+*)": 555,
-  "g8": 556,
-  "g8(+)": 557,
-  "g8(+*)": 558,
-  "g8(+*Q)": 559,
-  "g8(+*R)": 560,
-  "g8(+B)": 561,
-  "g8(+N)": 562,
-  "g8(+Q)": 563,
-  "g8(+R)": 564,
-  "g8(B)": 565,
-  "g8(N)": 566,
-  "g8(Q)": 567,
-  "g8(R)": 568,
-  "g8(o)": 569,
-  "g8(o+)": 570,
-  "g8(x)": 571,
-  "g8(x+)": 572,
-  "g8(x+*)": 573,
-  "g8(x+*Q)": 574,
-  "g8(x+Q)": 575,
-  "g8(xQ)": 576,
-  "h1": 577,
-  "h1(+)": 578,
-  "h1(+*)": 579,
-  "h1(+*Q)": 580,
-  "h1(+*R)": 581,
-  "h1(+N)": 582,
-  "h1(+Q)": 583,
-  "h1(B)": 584,
-  "h1(N)": 585,
-  "h1(Q)": 586,
-  "h1(R)": 587,
-  "h1(x)": 588,
-  "h1(x+)": 589,
-  "h1(x+*)": 590,
-  "h1(x+N)": 591,
-  "h1(x+Q)": 592,
-  "h1(xQ)": 593,
-  "h2": 594,
-  "h2(+)": 595,
-  "h2(+*)": 596,
-  "h2(x)": 597,
-  "h2(x+)": 598,
-  "h2(x+*)": 599,
-  "h3": 600,
-  "h3(+)": 601,
-  "h3(+*)": 602,
-  "h3(x)": 603,
-  "h3(x+)": 604,
-  "h3(x+*)": 605,
-  "h3(xE)": 606,
-  "h3(xE+)": 607,
-  "h4": 608,
-  "h4(+)": 609,
-  "h4(+*)": 610,
-  "h4(x)": 611,
-  "h4(x+)": 612,
-  "h4(x+*)": 613,
-  "h5": 614,
-  "h5(+)": 615,
-  "h5(+*)": 616,
-  "h5(x)": 617,
-  "h5(x+)": 618,
-  "h5(x+*)": 619,
-  "h6": 620,
-  "h6(+)": 621,
-  "h6(+*)": 622,
-  "h6(x)": 623,
-  "h6(x+)": 624,
-  "h6(x+*)": 625,
-  "h6(xE)": 626,
-  "h6(xE+)": 627,
-  "h7": 628,
-  "h7(+)": 629,
-  "h7(+*)": 630,
-  "h7(x)": 631,
-  "h7(x+)": 632,
-  "h7(x+*)": 633,
-  "h8": 634,
-  "h8(+)": 635,
-  "h8(+*)": 636,
-  "h8(+*Q)": 637,
-  "h8(+*R)": 638,
-  "h8(+N)": 639,
-  "h8(+Q)": 640,
-  "h8(+R)": 641,
-  "h8(N)": 642,
-  "h8(Q)": 643,
-  "h8(R)": 644,
-  "h8(x)": 645,
-  "h8(x+)": 646,
-  "h8(x+*)": 647,
-  "h8(x+*Q)": 648,
-  "h8(x+Q)": 649,
-  "h8(xQ)": 650
 }

   "[BOS]": 1,
   "[EOS]": 2,
   "[UNK]": 3,
+  "a1": 4,
+  "a2": 5,
+  "a3": 6,
+  "a4": 7,
+  "a5": 8,
+  "a6": 9,
+  "a7": 10,
+  "a8": 11,
+  "b1": 12,
+  "b2": 13,
+  "b3": 14,
+  "b4": 15,
+  "b5": 16,
+  "b6": 17,
+  "b7": 18,
+  "b8": 19,
+  "c1": 20,
+  "c2": 21,
+  "c3": 22,
+  "c4": 23,
+  "c5": 24,
+  "c6": 25,
+  "c7": 26,
+  "c8": 27,
+  "d1": 28,
+  "d2": 29,
+  "d3": 30,
+  "d4": 31,
+  "d5": 32,
+  "d6": 33,
+  "d7": 34,
+  "d8": 35,
+  "e1": 36,
+  "e2": 37,
+  "e3": 38,
+  "e4": 39,
+  "e5": 40,
+  "e6": 41,
+  "e7": 42,
+  "e8": 43,
+  "f1": 44,
+  "f2": 45,
+  "f3": 46,
+  "f4": 47,
+  "f5": 48,
+  "f6": 49,
+  "f7": 50,
+  "f8": 51,
+  "g1": 52,
+  "g2": 53,
+  "g3": 54,
+  "g4": 55,
+  "g5": 56,
+  "g6": 57,
+  "g7": 58,
+  "g8": 59,
+  "h1": 60,
+  "h2": 61,
+  "h3": 62,
+  "h4": 63,
+  "h5": 64,
+  "h6": 65,
+  "h7": 66,
+  "h8": 67,
+  "q": 68,
+  "r": 69,
+  "b": 70,
+  "n": 71
 }