shubhasanket commited on
Commit
937866b
·
verified ·
1 Parent(s): 6e05a30

Chess Challenge submission by shubhasanket

Browse files
Files changed (7) hide show
  1. README.md +26 -0
  2. config.json +20 -0
  3. model.safetensors +3 -0
  4. special_tokens_map.json +6 -0
  5. tokenizer.py +446 -0
  6. tokenizer_config.json +50 -0
  7. vocab.json +145 -0
README.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags:
4
+ - chess
5
+ - llm-course
6
+ - chess-challenge
7
+ license: mit
8
+ ---
9
+
10
+ # shubha-chess-1m-structured-v2
11
+
12
+ Chess model submitted to the LLM Course Chess Challenge.
13
+
14
+ ## Submission Info
15
+
16
+ - **Submitted by**: [shubhasanket](https://huggingface.co/shubhasanket)
17
+ - **Parameters**: 986,576
18
+ - **Organization**: LLM-course
19
+
20
+ ## Model Details
21
+
22
+ - **Architecture**: Chess Transformer (GPT-style)
23
+ - **Vocab size**: 143
24
+ - **Embedding dim**: 144
25
+ - **Layers**: 4
26
+ - **Heads**: 8
config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ChessForCausalLM"
4
+ ],
5
+ "bos_token_id": 1,
6
+ "dropout": 0.1,
7
+ "dtype": "float32",
8
+ "eos_token_id": 2,
9
+ "layer_norm_epsilon": 1e-05,
10
+ "model_type": "chess_transformer",
11
+ "n_ctx": 256,
12
+ "n_embd": 144,
13
+ "n_head": 8,
14
+ "n_inner": 512,
15
+ "n_layer": 4,
16
+ "pad_token_id": 0,
17
+ "tie_weights": true,
18
+ "transformers_version": "4.57.6",
19
+ "vocab_size": 143
20
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2115203092db9b17ff723b1c4f00bd4a534189fda6d931849f733ae1ccf350fa
3
+ size 3950720
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[BOS]",
3
+ "eos_token": "[EOS]",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer.py ADDED
@@ -0,0 +1,446 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # """
2
+ # Improved Chess Tokenizer (Structured) for the Chess Challenge.
3
+
4
+ # Key idea:
5
+ # - Decompose each move into sub-tokens:
6
+ # SIDE_W / SIDE_B
7
+ # piece (P,N,B,R,Q,K)
8
+ # from-square (e2)
9
+ # to-square (e4)
10
+ # optional flags: CAPTURE, CHECK, MATE, CASTLE_SHORT, CASTLE_LONG
11
+
12
+ # Important implementation detail:
13
+ # - We MUST avoid token-string collisions. In particular, "B" is both:
14
+ # - Black side ("B")
15
+ # - Bishop piece ("B")
16
+ # If we used raw "W"/"B" for side, the vocab dict would overwrite one of them,
17
+ # creating non-contiguous IDs and leading to embedding "index out of range".
18
+ # """
19
+
20
+ # from __future__ import annotations
21
+
22
+ # import json
23
+ # import os
24
+ # import re
25
+ # from typing import Dict, List, Optional
26
+
27
+ # from transformers import PreTrainedTokenizer
28
+
29
+
30
+ # class ChessTokenizer(PreTrainedTokenizer):
31
+ # model_input_names = ["input_ids", "attention_mask"]
32
+ # vocab_files_names = {"vocab_file": "vocab.json"}
33
+
34
+ # # Special tokens
35
+ # PAD_TOKEN = "[PAD]"
36
+ # BOS_TOKEN = "[BOS]"
37
+ # EOS_TOKEN = "[EOS]"
38
+ # UNK_TOKEN = "[UNK]"
39
+
40
+ # # Side tokens (avoid collision with piece "B" for Bishop)
41
+ # SIDE_W = "SIDE_W"
42
+ # SIDE_B = "SIDE_B"
43
+
44
+ # def __init__(
45
+ # self,
46
+ # vocab_file: Optional[str] = None,
47
+ # vocab: Optional[Dict[str, int]] = None,
48
+ # **kwargs,
49
+ # ):
50
+ # self._pad_token = self.PAD_TOKEN
51
+ # self._bos_token = self.BOS_TOKEN
52
+ # self._eos_token = self.EOS_TOKEN
53
+ # self._unk_token = self.UNK_TOKEN
54
+
55
+ # # Avoid duplicate kwargs when HF loads from disk
56
+ # kwargs.pop("pad_token", None)
57
+ # kwargs.pop("bos_token", None)
58
+ # kwargs.pop("eos_token", None)
59
+ # kwargs.pop("unk_token", None)
60
+
61
+ # if vocab is not None:
62
+ # self._vocab = {str(k): int(v) for k, v in vocab.items()}
63
+ # elif vocab_file is not None and os.path.exists(vocab_file):
64
+ # with open(vocab_file, "r", encoding="utf-8") as f:
65
+ # loaded = json.load(f)
66
+ # self._vocab = {str(k): int(v) for k, v in loaded.items()}
67
+ # else:
68
+ # self._vocab = self._create_default_vocab()
69
+
70
+ # # Ensure IDs are contiguous 0..(len-1) (robust to any old saved vocabs)
71
+ # self._vocab = self._normalize_vocab_ids(self._vocab)
72
+ # self._ids_to_tokens = {v: k for k, v in self._vocab.items()}
73
+
74
+ # super().__init__(
75
+ # pad_token=self._pad_token,
76
+ # bos_token=self._bos_token,
77
+ # eos_token=self._eos_token,
78
+ # unk_token=self._unk_token,
79
+ # **kwargs,
80
+ # )
81
+
82
+ # @staticmethod
83
+ # def _normalize_vocab_ids(vocab: Dict[str, int]) -> Dict[str, int]:
84
+ # """
85
+ # Re-map token IDs to be contiguous and deterministic.
86
+ # Sort by old id then by token string.
87
+ # """
88
+ # items = sorted(vocab.items(), key=lambda kv: (kv[1], kv[0]))
89
+ # return {tok: new_id for new_id, (tok, _) in enumerate(items)}
90
+
91
+ # # ------------------------------------------------------------------
92
+ # # REQUIRED compatibility method (train.py expects this to exist)
93
+ # # ------------------------------------------------------------------
94
+ # @classmethod
95
+ # def build_vocab_from_dataset(
96
+ # cls,
97
+ # dataset_name: str = "dlouapre/lichess_2025-01_1M",
98
+ # split: str = "train",
99
+ # column: str = "text",
100
+ # min_frequency: int = 1,
101
+ # max_samples: Optional[int] = None,
102
+ # ) -> "ChessTokenizer":
103
+ # """
104
+ # Compatibility hook.
105
+
106
+ # For the structured tokenizer, the vocabulary is fixed and does not
107
+ # depend on dataset statistics. We keep this method so src/train.py
108
+ # (template code) does not need to change.
109
+ # """
110
+ # return cls()
111
+
112
+ # # ------------------------------------------------------------------
113
+ # # Vocabulary construction
114
+ # # ------------------------------------------------------------------
115
+ # def _create_default_vocab(self) -> Dict[str, int]:
116
+ # special = [self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN]
117
+
118
+ # sides = [self.SIDE_W, self.SIDE_B] # no collision with piece tokens
119
+ # pieces = ["P", "N", "B", "R", "Q", "K"]
120
+
121
+ # files = list("abcdefgh")
122
+ # ranks = list("12345678")
123
+ # squares = [f + r for f in files for r in ranks] # 64 tokens
124
+
125
+ # flags = ["CAPTURE", "CHECK", "MATE", "CASTLE_SHORT", "CASTLE_LONG"]
126
+
127
+ # tokens = special + sides + pieces + squares + flags
128
+ # return {tok: i for i, tok in enumerate(tokens)} # contiguous by construction
129
+
130
+ # @property
131
+ # def vocab_size(self) -> int:
132
+ # return len(self._vocab)
133
+
134
+ # def get_vocab(self) -> Dict[str, int]:
135
+ # return dict(self._vocab)
136
+
137
+ # # ------------------------------------------------------------------
138
+ # # Tokenization logic
139
+ # # ------------------------------------------------------------------
140
+ # MOVE_REGEX = re.compile(
141
+ # r"""
142
+ # (?P<side>[WB])
143
+ # (?P<piece>[PNBRQK])
144
+ # (?P<from>[a-h][1-8])
145
+ # (?P<to>[a-h][1-8])
146
+ # (?P<suffix>.*)?
147
+ # """,
148
+ # re.VERBOSE,
149
+ # )
150
+
151
+ # def _tokenize(self, text: str) -> List[str]:
152
+ # out: List[str] = []
153
+ # for move in text.strip().split():
154
+ # out.extend(self._decompose_move(move))
155
+ # return out
156
+
157
+ # def _decompose_move(self, move: str) -> List[str]:
158
+ # m = self.MOVE_REGEX.match(move)
159
+ # if not m:
160
+ # return [self.UNK_TOKEN]
161
+
162
+ # side_raw = m.group("side")
163
+ # side_tok = self.SIDE_W if side_raw == "W" else self.SIDE_B
164
+
165
+ # tokens = [
166
+ # side_tok,
167
+ # m.group("piece"),
168
+ # m.group("from"),
169
+ # m.group("to"),
170
+ # ]
171
+
172
+ # suffix = m.group("suffix") or ""
173
+
174
+ # if "(x)" in suffix:
175
+ # tokens.append("CAPTURE")
176
+ # if "(+*)" in suffix:
177
+ # tokens.append("MATE")
178
+ # elif "(+)" in suffix:
179
+ # tokens.append("CHECK")
180
+ # if "(o)" in suffix:
181
+ # tokens.append("CASTLE_SHORT")
182
+ # if "(O)" in suffix:
183
+ # tokens.append("CASTLE_LONG")
184
+
185
+ # return tokens
186
+
187
+ # # ------------------------------------------------------------------
188
+ # # ID conversion
189
+ # # ------------------------------------------------------------------
190
+ # def _convert_token_to_id(self, token: str) -> int:
191
+ # return self._vocab.get(token, self._vocab[self.UNK_TOKEN])
192
+
193
+ # def _convert_id_to_token(self, index: int) -> str:
194
+ # return self._ids_to_tokens.get(index, self.UNK_TOKEN)
195
+
196
+ # def convert_tokens_to_string(self, tokens: List[str]) -> str:
197
+ # special = {self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN}
198
+ # return " ".join(t for t in tokens if t not in special)
199
+
200
+ # # ------------------------------------------------------------------
201
+ # # Saving
202
+ # # ------------------------------------------------------------------
203
+ # def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple:
204
+ # os.makedirs(save_directory, exist_ok=True)
205
+ # vocab_file = os.path.join(
206
+ # save_directory,
207
+ # (filename_prefix + "-" if filename_prefix else "") + "vocab.json",
208
+ # )
209
+ # with open(vocab_file, "w", encoding="utf-8") as f:
210
+ # json.dump(self._vocab, f, indent=2)
211
+ # return (vocab_file,)
212
+
213
+
214
+ # def count_vocab_from_dataset(
215
+ # dataset_name: str = "dlouapre/lichess_2025-01_1M",
216
+ # split: str = "train",
217
+ # column: str = "text",
218
+ # max_samples: Optional[int] = 10000,
219
+ # ) -> Dict[str, int]:
220
+ # """
221
+ # Count token frequencies after structured tokenization.
222
+ # (Editor warning about 'datasets' can be ignored if terminal run works.)
223
+ # """
224
+ # from collections import Counter
225
+ # from datasets import load_dataset
226
+
227
+ # dataset = load_dataset(dataset_name, split=split)
228
+ # if max_samples is not None:
229
+ # dataset = dataset.select(range(min(max_samples, len(dataset))))
230
+
231
+ # tok = ChessTokenizer()
232
+ # counts = Counter()
233
+
234
+ # for ex in dataset:
235
+ # counts.update(tok._tokenize(ex[column]))
236
+
237
+ # return dict(counts)
238
+
239
+ """
240
+ Final Structured Chess Tokenizer for the Chess Challenge.
241
+
242
+ Design goals:
243
+ - Strong legality bias
244
+ - Fixed, collision-free vocabulary
245
+ - HF-compatible (Trainer, save/load, Hub)
246
+ - Evaluator-friendly (square extraction still works)
247
+
248
+ Move decomposition:
249
+ PIECE
250
+ FROM_<square>
251
+ TO_<square>
252
+ optional FLAGS
253
+
254
+ Example:
255
+ P FROM_e2 TO_e4
256
+ N FROM_g1 TO_f3 CHECK
257
+ K FROM_e1 TO_g1 CASTLE_SHORT
258
+ """
259
+
260
+ from __future__ import annotations
261
+
262
+ import json
263
+ import os
264
+ import re
265
+ from typing import Dict, List, Optional
266
+
267
+ from transformers import PreTrainedTokenizer
268
+
269
+
270
+ class ChessTokenizer(PreTrainedTokenizer):
271
+ model_input_names = ["input_ids", "attention_mask"]
272
+ vocab_files_names = {"vocab_file": "vocab.json"}
273
+
274
+ # Special tokens
275
+ PAD_TOKEN = "[PAD]"
276
+ BOS_TOKEN = "[BOS]"
277
+ EOS_TOKEN = "[EOS]"
278
+ UNK_TOKEN = "[UNK]"
279
+
280
+ # Fixed role prefixes
281
+ FROM_PREFIX = "FROM_"
282
+ TO_PREFIX = "TO_"
283
+
284
+ def __init__(
285
+ self,
286
+ vocab_file: Optional[str] = None,
287
+ vocab: Optional[Dict[str, int]] = None,
288
+ **kwargs,
289
+ ):
290
+ self._pad_token = self.PAD_TOKEN
291
+ self._bos_token = self.BOS_TOKEN
292
+ self._eos_token = self.EOS_TOKEN
293
+ self._unk_token = self.UNK_TOKEN
294
+
295
+ # Avoid duplicate kwargs when loading
296
+ for k in ("pad_token", "bos_token", "eos_token", "unk_token"):
297
+ kwargs.pop(k, None)
298
+
299
+ if vocab is not None:
300
+ self._vocab = {str(k): int(v) for k, v in vocab.items()}
301
+ elif vocab_file and os.path.exists(vocab_file):
302
+ with open(vocab_file, "r", encoding="utf-8") as f:
303
+ self._vocab = {str(k): int(v) for k, v in json.load(f).items()}
304
+ else:
305
+ self._vocab = self._create_default_vocab()
306
+
307
+ # Ensure contiguous IDs
308
+ self._vocab = self._normalize_vocab(self._vocab)
309
+ self._ids_to_tokens = {v: k for k, v in self._vocab.items()}
310
+
311
+ super().__init__(
312
+ pad_token=self._pad_token,
313
+ bos_token=self._bos_token,
314
+ eos_token=self._eos_token,
315
+ unk_token=self._unk_token,
316
+ **kwargs,
317
+ )
318
+
319
+ @staticmethod
320
+ def _normalize_vocab(vocab: Dict[str, int]) -> Dict[str, int]:
321
+ items = sorted(vocab.items(), key=lambda kv: (kv[1], kv[0]))
322
+ return {tok: i for i, (tok, _) in enumerate(items)}
323
+
324
+ # ------------------------------------------------------------
325
+ # Required by train.py (kept for compatibility)
326
+ # ------------------------------------------------------------
327
+ @classmethod
328
+ def build_vocab_from_dataset(
329
+ cls,
330
+ *args,
331
+ **kwargs,
332
+ ) -> "ChessTokenizer":
333
+ return cls()
334
+
335
+ # ------------------------------------------------------------
336
+ # Vocabulary
337
+ # ------------------------------------------------------------
338
+ def _create_default_vocab(self) -> Dict[str, int]:
339
+ special = [
340
+ self.PAD_TOKEN,
341
+ self.BOS_TOKEN,
342
+ self.EOS_TOKEN,
343
+ self.UNK_TOKEN,
344
+ ]
345
+
346
+ pieces = ["P", "N", "B", "R", "Q", "K"]
347
+
348
+ files = "abcdefgh"
349
+ ranks = "12345678"
350
+ squares = [f + r for f in files for r in ranks]
351
+
352
+ from_tokens = [self.FROM_PREFIX + sq for sq in squares]
353
+ to_tokens = [self.TO_PREFIX + sq for sq in squares]
354
+
355
+ flags = [
356
+ "CAPTURE",
357
+ "CHECK",
358
+ "MATE",
359
+ "CASTLE_SHORT",
360
+ "CASTLE_LONG",
361
+ ]
362
+
363
+ tokens = special + pieces + from_tokens + to_tokens + flags
364
+ return {tok: i for i, tok in enumerate(tokens)}
365
+
366
+ @property
367
+ def vocab_size(self) -> int:
368
+ return len(self._vocab)
369
+
370
+ def get_vocab(self) -> Dict[str, int]:
371
+ return dict(self._vocab)
372
+
373
+ # ------------------------------------------------------------
374
+ # Tokenization
375
+ # ------------------------------------------------------------
376
+ MOVE_REGEX = re.compile(
377
+ r"""
378
+ (?P<piece>[PNBRQK])
379
+ (?P<from>[a-h][1-8])
380
+ (?P<to>[a-h][1-8])
381
+ (?P<suffix>.*)?
382
+ """,
383
+ re.VERBOSE,
384
+ )
385
+
386
+ def _tokenize(self, text: str) -> List[str]:
387
+ out: List[str] = []
388
+ for move in text.strip().split():
389
+ out.extend(self._decompose_move(move))
390
+ return out
391
+
392
+ def _decompose_move(self, move: str) -> List[str]:
393
+ m = self.MOVE_REGEX.search(move)
394
+ if not m:
395
+ return [self.UNK_TOKEN]
396
+
397
+ tokens = [
398
+ m.group("piece"),
399
+ self.FROM_PREFIX + m.group("from"),
400
+ self.TO_PREFIX + m.group("to"),
401
+ ]
402
+
403
+ suffix = m.group("suffix") or ""
404
+
405
+ if "(x)" in suffix:
406
+ tokens.append("CAPTURE")
407
+ if "(+*)" in suffix:
408
+ tokens.append("MATE")
409
+ elif "(+)" in suffix:
410
+ tokens.append("CHECK")
411
+ if "(o)" in suffix:
412
+ tokens.append("CASTLE_SHORT")
413
+ if "(O)" in suffix:
414
+ tokens.append("CASTLE_LONG")
415
+
416
+ return tokens
417
+
418
+ # ------------------------------------------------------------
419
+ # ID conversion
420
+ # ------------------------------------------------------------
421
+ def _convert_token_to_id(self, token: str) -> int:
422
+ return self._vocab.get(token, self._vocab[self.UNK_TOKEN])
423
+
424
+ def _convert_id_to_token(self, index: int) -> str:
425
+ return self._ids_to_tokens.get(index, self.UNK_TOKEN)
426
+
427
+ def convert_tokens_to_string(self, tokens: List[str]) -> str:
428
+ special = {self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN}
429
+ return " ".join(t for t in tokens if t not in special)
430
+
431
+ # ------------------------------------------------------------
432
+ # Saving
433
+ # ------------------------------------------------------------
434
+ def save_vocabulary(
435
+ self,
436
+ save_directory: str,
437
+ filename_prefix: Optional[str] = None,
438
+ ) -> tuple:
439
+ os.makedirs(save_directory, exist_ok=True)
440
+ path = os.path.join(
441
+ save_directory,
442
+ (filename_prefix + "-" if filename_prefix else "") + "vocab.json",
443
+ )
444
+ with open(path, "w", encoding="utf-8") as f:
445
+ json.dump(self._vocab, f, indent=2)
446
+ return (path,)
tokenizer_config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[BOS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[EOS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "auto_map": {
37
+ "AutoTokenizer": [
38
+ "tokenizer.ChessTokenizer",
39
+ null
40
+ ]
41
+ },
42
+ "bos_token": "[BOS]",
43
+ "clean_up_tokenization_spaces": false,
44
+ "eos_token": "[EOS]",
45
+ "extra_special_tokens": {},
46
+ "model_max_length": 1000000000000000019884624838656,
47
+ "pad_token": "[PAD]",
48
+ "tokenizer_class": "ChessTokenizer",
49
+ "unk_token": "[UNK]"
50
+ }
vocab.json ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[PAD]": 0,
3
+ "[BOS]": 1,
4
+ "[EOS]": 2,
5
+ "[UNK]": 3,
6
+ "P": 4,
7
+ "N": 5,
8
+ "B": 6,
9
+ "R": 7,
10
+ "Q": 8,
11
+ "K": 9,
12
+ "FROM_a1": 10,
13
+ "FROM_a2": 11,
14
+ "FROM_a3": 12,
15
+ "FROM_a4": 13,
16
+ "FROM_a5": 14,
17
+ "FROM_a6": 15,
18
+ "FROM_a7": 16,
19
+ "FROM_a8": 17,
20
+ "FROM_b1": 18,
21
+ "FROM_b2": 19,
22
+ "FROM_b3": 20,
23
+ "FROM_b4": 21,
24
+ "FROM_b5": 22,
25
+ "FROM_b6": 23,
26
+ "FROM_b7": 24,
27
+ "FROM_b8": 25,
28
+ "FROM_c1": 26,
29
+ "FROM_c2": 27,
30
+ "FROM_c3": 28,
31
+ "FROM_c4": 29,
32
+ "FROM_c5": 30,
33
+ "FROM_c6": 31,
34
+ "FROM_c7": 32,
35
+ "FROM_c8": 33,
36
+ "FROM_d1": 34,
37
+ "FROM_d2": 35,
38
+ "FROM_d3": 36,
39
+ "FROM_d4": 37,
40
+ "FROM_d5": 38,
41
+ "FROM_d6": 39,
42
+ "FROM_d7": 40,
43
+ "FROM_d8": 41,
44
+ "FROM_e1": 42,
45
+ "FROM_e2": 43,
46
+ "FROM_e3": 44,
47
+ "FROM_e4": 45,
48
+ "FROM_e5": 46,
49
+ "FROM_e6": 47,
50
+ "FROM_e7": 48,
51
+ "FROM_e8": 49,
52
+ "FROM_f1": 50,
53
+ "FROM_f2": 51,
54
+ "FROM_f3": 52,
55
+ "FROM_f4": 53,
56
+ "FROM_f5": 54,
57
+ "FROM_f6": 55,
58
+ "FROM_f7": 56,
59
+ "FROM_f8": 57,
60
+ "FROM_g1": 58,
61
+ "FROM_g2": 59,
62
+ "FROM_g3": 60,
63
+ "FROM_g4": 61,
64
+ "FROM_g5": 62,
65
+ "FROM_g6": 63,
66
+ "FROM_g7": 64,
67
+ "FROM_g8": 65,
68
+ "FROM_h1": 66,
69
+ "FROM_h2": 67,
70
+ "FROM_h3": 68,
71
+ "FROM_h4": 69,
72
+ "FROM_h5": 70,
73
+ "FROM_h6": 71,
74
+ "FROM_h7": 72,
75
+ "FROM_h8": 73,
76
+ "TO_a1": 74,
77
+ "TO_a2": 75,
78
+ "TO_a3": 76,
79
+ "TO_a4": 77,
80
+ "TO_a5": 78,
81
+ "TO_a6": 79,
82
+ "TO_a7": 80,
83
+ "TO_a8": 81,
84
+ "TO_b1": 82,
85
+ "TO_b2": 83,
86
+ "TO_b3": 84,
87
+ "TO_b4": 85,
88
+ "TO_b5": 86,
89
+ "TO_b6": 87,
90
+ "TO_b7": 88,
91
+ "TO_b8": 89,
92
+ "TO_c1": 90,
93
+ "TO_c2": 91,
94
+ "TO_c3": 92,
95
+ "TO_c4": 93,
96
+ "TO_c5": 94,
97
+ "TO_c6": 95,
98
+ "TO_c7": 96,
99
+ "TO_c8": 97,
100
+ "TO_d1": 98,
101
+ "TO_d2": 99,
102
+ "TO_d3": 100,
103
+ "TO_d4": 101,
104
+ "TO_d5": 102,
105
+ "TO_d6": 103,
106
+ "TO_d7": 104,
107
+ "TO_d8": 105,
108
+ "TO_e1": 106,
109
+ "TO_e2": 107,
110
+ "TO_e3": 108,
111
+ "TO_e4": 109,
112
+ "TO_e5": 110,
113
+ "TO_e6": 111,
114
+ "TO_e7": 112,
115
+ "TO_e8": 113,
116
+ "TO_f1": 114,
117
+ "TO_f2": 115,
118
+ "TO_f3": 116,
119
+ "TO_f4": 117,
120
+ "TO_f5": 118,
121
+ "TO_f6": 119,
122
+ "TO_f7": 120,
123
+ "TO_f8": 121,
124
+ "TO_g1": 122,
125
+ "TO_g2": 123,
126
+ "TO_g3": 124,
127
+ "TO_g4": 125,
128
+ "TO_g5": 126,
129
+ "TO_g6": 127,
130
+ "TO_g7": 128,
131
+ "TO_g8": 129,
132
+ "TO_h1": 130,
133
+ "TO_h2": 131,
134
+ "TO_h3": 132,
135
+ "TO_h4": 133,
136
+ "TO_h5": 134,
137
+ "TO_h6": 135,
138
+ "TO_h7": 136,
139
+ "TO_h8": 137,
140
+ "CAPTURE": 138,
141
+ "CHECK": 139,
142
+ "MATE": 140,
143
+ "CASTLE_SHORT": 141,
144
+ "CASTLE_LONG": 142
145
+ }