Nestor02 commited on
Commit
b922fdb
·
verified ·
1 Parent(s): ba9d7e4

Chess Challenge submission by Nestor02

Browse files
README.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags:
4
+ - chess
5
+ - llm-course
6
+ - chess-challenge
7
+ license: mit
8
+ ---
9
+
10
+ # chess_done
11
+
12
+ Chess model submitted to the LLM Course Chess Challenge.
13
+
14
+ ## Submission Info
15
+
16
+ - **Submitted by**: [Nestor02](https://huggingface.co/Nestor02)
17
+ - **Parameters**: 993,312
18
+ - **Organization**: LLM-course
19
+
20
+ ## Model Details
21
+
22
+ - **Architecture**: Chess Transformer (GPT-style)
23
+ - **Vocab size**: 148
24
+ - **Embedding dim**: 144
25
+ - **Layers**: 7
26
+ - **Heads**: 4
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ChessForCausalLM"
4
+ ],
5
+ "attention_type": "gqa",
6
+ "auto_map": {
7
+ "AutoConfig": "model.ChessConfig",
8
+ "AutoModelForCausalLM": "model.ChessForCausalLM"
9
+ },
10
+ "bos_token_id": 1,
11
+ "dropout": 0.1,
12
+ "dtype": "float32",
13
+ "eos_token_id": 2,
14
+ "ffn_type": "swiglu",
15
+ "layer_norm_epsilon": 1e-05,
16
+ "legal_loss_weight": 0.0,
17
+ "model_type": "chess_transformer",
18
+ "n_ctx": 256,
19
+ "n_embd": 144,
20
+ "n_head": 4,
21
+ "n_inner": 256,
22
+ "n_kv_heads": 2,
23
+ "n_layer": 7,
24
+ "pad_token_id": 0,
25
+ "pos_encoding": "rope",
26
+ "rope_theta": 10000.0,
27
+ "tie_weights": true,
28
+ "transformers_version": "4.57.3",
29
+ "vocab_size": 148
30
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": [
5
+ 2
6
+ ],
7
+ "pad_token_id": 0,
8
+ "transformers_version": "4.57.3"
9
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee5e4fab00a102d89e3735c2df86891c8554d2cdb72ac0e91b7c50191067dd76
3
+ size 3980104
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[BOS]",
3
+ "eos_token": "[EOS]",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Decomposed Chess Tokenizer (v2) for the Chess Challenge.
3
+
4
+ This tokenizer factorizes each move into a small set of reusable tokens:
5
+ - One token for (color + piece): e.g. "WP", "BN"
6
+ - One token for the from-square with role suffix: e.g. "e2_f"
7
+ - One token for the to-square with role suffix: e.g. "e4_t"
8
+ - Optional promotion token: "q", "r", "b", "n"
9
+
10
+ It is compatible with the teacher evaluator's supported formats:
11
+ - Standard: "WPe2e4", "BNg8f6", with optional annotations "(x)", "(+)", "(o)/(O)", "(Q)"
12
+ - Decomposed: "WP e2_f e4_t"
13
+ - UCI: "e2e4", "e7e8q"
14
+ - UCI spaced: "e2 e4"
15
+
16
+ The tokenizer parses those inputs and emits the decomposed tokens above.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import os
23
+ import re
24
+ from pathlib import Path
25
+ from typing import Dict, List, Optional
26
+
27
+ from transformers import PreTrainedTokenizer
28
+
29
+
30
+ class ChessTokenizer(PreTrainedTokenizer):
31
+ model_input_names = ["input_ids", "attention_mask"]
32
+ vocab_files_names = {"vocab_file": "vocab.json"}
33
+
34
+ PAD_TOKEN = "[PAD]"
35
+ BOS_TOKEN = "[BOS]"
36
+ EOS_TOKEN = "[EOS]"
37
+ UNK_TOKEN = "[UNK]"
38
+
39
+ _COLOR_PIECE_RE = re.compile(r"^[WB][PNBRQK]$")
40
+ _SQUARE_RE = re.compile(r"[a-h][1-8]")
41
+ _SQUARE_ROLE_RE = re.compile(r"^([a-h][1-8])_([ft])$", re.IGNORECASE)
42
+ _PLAIN_SQUARE_RE = re.compile(r"^[a-h][1-8]$", re.IGNORECASE)
43
+
44
+ def __init__(
45
+ self,
46
+ vocab_file: Optional[str] = None,
47
+ vocab: Optional[Dict[str, int]] = None,
48
+ **kwargs,
49
+ ):
50
+ self._pad_token = self.PAD_TOKEN
51
+ self._bos_token = self.BOS_TOKEN
52
+ self._eos_token = self.EOS_TOKEN
53
+ self._unk_token = self.UNK_TOKEN
54
+
55
+ # Remove any duplicate special-token entries passed through kwargs to avoid collisions.
56
+ kwargs.pop("pad_token", None)
57
+ kwargs.pop("bos_token", None)
58
+ kwargs.pop("eos_token", None)
59
+ kwargs.pop("unk_token", None)
60
+
61
+ if vocab is not None:
62
+ self._vocab = vocab
63
+ elif vocab_file is not None and os.path.exists(vocab_file):
64
+ with open(vocab_file, "r", encoding="utf-8") as f:
65
+ self._vocab = json.load(f)
66
+ else:
67
+ self._vocab = self._create_default_vocab()
68
+
69
+ self._ids_to_tokens = {v: k for k, v in self._vocab.items()}
70
+
71
+ super().__init__(
72
+ pad_token=self._pad_token,
73
+ bos_token=self._bos_token,
74
+ eos_token=self._eos_token,
75
+ unk_token=self._unk_token,
76
+ **kwargs,
77
+ )
78
+
79
+ @classmethod
80
+ def build_vocab_from_dataset(
81
+ cls,
82
+ *_,
83
+ **__,
84
+ ) -> "ChessTokenizer2":
85
+ """
86
+ Kept for API compatibility with `train.py`.
87
+
88
+ The v2 tokenizer uses a fixed vocabulary (colors/pieces/squares/promotions),
89
+ so dataset statistics are not required.
90
+ """
91
+ return cls()
92
+
93
+ def _create_default_vocab(self) -> Dict[str, int]:
94
+ special_tokens = [self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN]
95
+
96
+ color_pieces = [
97
+ f"{color}{piece}"
98
+ for color in ("W", "B")
99
+ for piece in ("P", "N", "B", "R", "Q", "K")
100
+ ]
101
+
102
+ squares = [f"{file}{rank}" for rank in range(1, 9) for file in "abcdefgh"]
103
+ square_from = [f"{sq}_f" for sq in squares]
104
+ square_to = [f"{sq}_t" for sq in squares]
105
+
106
+ promotions = ["q", "r", "b", "n"]
107
+
108
+ # Deterministic order for reproducibility.
109
+ all_tokens = special_tokens + color_pieces + square_from + square_to + promotions
110
+ return {tok: idx for idx, tok in enumerate(all_tokens)}
111
+
112
+ @property
113
+ def vocab_size(self) -> int:
114
+ return len(self._vocab)
115
+
116
+ def get_vocab(self) -> Dict[str, int]:
117
+ return dict(self._vocab)
118
+
119
+ def _tokenize(self, text: str) -> List[str]:
120
+ parts = text.strip().split()
121
+ if not parts:
122
+ return []
123
+
124
+ out: List[str] = []
125
+ next_role = "f" # Used only when squares arrive without _f/_t.
126
+
127
+ for part in parts:
128
+ if part in {self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN}:
129
+ out.append(part)
130
+ next_role = "f"
131
+ continue
132
+
133
+ # Decomposed color+piece token: "WP", "BN", ...
134
+ if self._COLOR_PIECE_RE.match(part.upper()):
135
+ out.append(part.upper())
136
+ next_role = "f"
137
+ continue
138
+
139
+ # Square with role suffix: "e2_f" / "e4_t"
140
+ m_role = self._SQUARE_ROLE_RE.match(part)
141
+ if m_role:
142
+ sq = m_role.group(1).lower()
143
+ role = m_role.group(2).lower()
144
+ out.append(f"{sq}_{role}")
145
+ next_role = "t" if role == "f" else "f"
146
+ continue
147
+
148
+ # Plain square: "e2" (assign role by position)
149
+ if self._PLAIN_SQUARE_RE.match(part):
150
+ sq = part.lower()
151
+ out.append(f"{sq}_{next_role}")
152
+ next_role = "t" if next_role == "f" else "f"
153
+ continue
154
+
155
+ # Promotion token as its own chunk: "q", "=Q", "(Q)" etc.
156
+ promo = self._extract_promotion(part)
157
+ if promo and self._looks_like_promo_only(part):
158
+ out.append(promo)
159
+ continue
160
+
161
+ # Standard / UCI move chunk: "WPe2e4(x+)", "e2e4", "e7e8=Q", ...
162
+ move_tokens = self._tokenize_move_chunk(part)
163
+ if move_tokens:
164
+ out.extend(move_tokens)
165
+ next_role = "f"
166
+ continue
167
+
168
+ # Skip pure annotation chunks if they appear separated (rare).
169
+ if re.fullmatch(r"[\(\)\+\*xoO=]+", part):
170
+ continue
171
+
172
+ out.append(self.UNK_TOKEN)
173
+
174
+ return out
175
+
176
+ def _looks_like_promo_only(self, part: str) -> bool:
177
+ part_stripped = part.strip()
178
+ if re.fullmatch(r"[qrbnQRBN]", part_stripped):
179
+ return True
180
+ if re.fullmatch(r"=[qrbnQRBN]", part_stripped):
181
+ return True
182
+ if re.fullmatch(r"\([qrbnQRBN]\)", part_stripped):
183
+ return True
184
+ return False
185
+
186
+ def _extract_promotion(self, text: str) -> Optional[str]:
187
+ text_lower = text.lower()
188
+ m = re.search(r"\(([qrbn])\)", text_lower)
189
+ if m:
190
+ return m.group(1)
191
+ m = re.search(r"=([qrbn])", text_lower)
192
+ if m:
193
+ return m.group(1)
194
+ return None
195
+
196
+ def _tokenize_move_chunk(self, chunk: str) -> List[str]:
197
+ chunk_stripped = chunk.strip()
198
+ if not chunk_stripped:
199
+ return []
200
+
201
+ chunk_lower = chunk_stripped.lower()
202
+ squares = re.findall(self._SQUARE_RE, chunk_lower)
203
+ if len(squares) < 2:
204
+ return []
205
+
206
+ from_sq, to_sq = squares[0], squares[1]
207
+
208
+ color_piece = None
209
+ if len(chunk_stripped) >= 2 and self._COLOR_PIECE_RE.match(chunk_stripped[:2].upper()):
210
+ color_piece = chunk_stripped[:2].upper()
211
+
212
+ tokens: List[str] = []
213
+ if color_piece:
214
+ tokens.append(color_piece)
215
+
216
+ tokens.append(f"{from_sq}_f")
217
+ tokens.append(f"{to_sq}_t")
218
+
219
+ # Promotion: look right after the destination square.
220
+ after_to = chunk_lower.find(to_sq)
221
+ if after_to != -1:
222
+ remaining = chunk_lower[after_to + 2 : after_to + 6]
223
+ m = re.search(r"[=]?([qrbn])", remaining)
224
+ if m:
225
+ tokens.append(m.group(1))
226
+
227
+ # Also support dataset-style "(Q)" promotions.
228
+ promo = self._extract_promotion(chunk_stripped)
229
+ if promo and promo not in tokens:
230
+ tokens.append(promo)
231
+
232
+ return tokens
233
+
234
+ def _convert_token_to_id(self, token: str) -> int:
235
+ return self._vocab.get(token, self._vocab.get(self.UNK_TOKEN, 0))
236
+
237
+ def _convert_id_to_token(self, index: int) -> str:
238
+ return self._ids_to_tokens.get(index, self.UNK_TOKEN)
239
+
240
+ def convert_tokens_to_string(self, tokens: List[str]) -> str:
241
+ special = {self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN}
242
+ return " ".join(t for t in tokens if t not in special)
243
+
244
+ def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple:
245
+ if not os.path.isdir(save_directory):
246
+ os.makedirs(save_directory, exist_ok=True)
247
+
248
+ vocab_file = os.path.join(
249
+ save_directory,
250
+ (filename_prefix + "-" if filename_prefix else "") + "vocab.json",
251
+ )
252
+
253
+ with open(vocab_file, "w", encoding="utf-8") as f:
254
+ json.dump(self._vocab, f, ensure_ascii=False, indent=2)
255
+
256
+ return (vocab_file,)
tokenizer_config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[BOS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[EOS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "auto_map": {
37
+ "AutoTokenizer": [
38
+ "tokenizer.ChessTokenizer",
39
+ null
40
+ ]
41
+ },
42
+ "bos_token": "[BOS]",
43
+ "clean_up_tokenization_spaces": false,
44
+ "eos_token": "[EOS]",
45
+ "extra_special_tokens": {},
46
+ "model_max_length": 1000000000000000019884624838656,
47
+ "pad_token": "[PAD]",
48
+ "tokenizer_class": "ChessTokenizer",
49
+ "unk_token": "[UNK]"
50
+ }
vocab.json ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[PAD]": 0,
3
+ "[BOS]": 1,
4
+ "[EOS]": 2,
5
+ "[UNK]": 3,
6
+ "WP": 4,
7
+ "WN": 5,
8
+ "WB": 6,
9
+ "WR": 7,
10
+ "WQ": 8,
11
+ "WK": 9,
12
+ "BP": 10,
13
+ "BN": 11,
14
+ "BB": 12,
15
+ "BR": 13,
16
+ "BQ": 14,
17
+ "BK": 15,
18
+ "a1_f": 16,
19
+ "b1_f": 17,
20
+ "c1_f": 18,
21
+ "d1_f": 19,
22
+ "e1_f": 20,
23
+ "f1_f": 21,
24
+ "g1_f": 22,
25
+ "h1_f": 23,
26
+ "a2_f": 24,
27
+ "b2_f": 25,
28
+ "c2_f": 26,
29
+ "d2_f": 27,
30
+ "e2_f": 28,
31
+ "f2_f": 29,
32
+ "g2_f": 30,
33
+ "h2_f": 31,
34
+ "a3_f": 32,
35
+ "b3_f": 33,
36
+ "c3_f": 34,
37
+ "d3_f": 35,
38
+ "e3_f": 36,
39
+ "f3_f": 37,
40
+ "g3_f": 38,
41
+ "h3_f": 39,
42
+ "a4_f": 40,
43
+ "b4_f": 41,
44
+ "c4_f": 42,
45
+ "d4_f": 43,
46
+ "e4_f": 44,
47
+ "f4_f": 45,
48
+ "g4_f": 46,
49
+ "h4_f": 47,
50
+ "a5_f": 48,
51
+ "b5_f": 49,
52
+ "c5_f": 50,
53
+ "d5_f": 51,
54
+ "e5_f": 52,
55
+ "f5_f": 53,
56
+ "g5_f": 54,
57
+ "h5_f": 55,
58
+ "a6_f": 56,
59
+ "b6_f": 57,
60
+ "c6_f": 58,
61
+ "d6_f": 59,
62
+ "e6_f": 60,
63
+ "f6_f": 61,
64
+ "g6_f": 62,
65
+ "h6_f": 63,
66
+ "a7_f": 64,
67
+ "b7_f": 65,
68
+ "c7_f": 66,
69
+ "d7_f": 67,
70
+ "e7_f": 68,
71
+ "f7_f": 69,
72
+ "g7_f": 70,
73
+ "h7_f": 71,
74
+ "a8_f": 72,
75
+ "b8_f": 73,
76
+ "c8_f": 74,
77
+ "d8_f": 75,
78
+ "e8_f": 76,
79
+ "f8_f": 77,
80
+ "g8_f": 78,
81
+ "h8_f": 79,
82
+ "a1_t": 80,
83
+ "b1_t": 81,
84
+ "c1_t": 82,
85
+ "d1_t": 83,
86
+ "e1_t": 84,
87
+ "f1_t": 85,
88
+ "g1_t": 86,
89
+ "h1_t": 87,
90
+ "a2_t": 88,
91
+ "b2_t": 89,
92
+ "c2_t": 90,
93
+ "d2_t": 91,
94
+ "e2_t": 92,
95
+ "f2_t": 93,
96
+ "g2_t": 94,
97
+ "h2_t": 95,
98
+ "a3_t": 96,
99
+ "b3_t": 97,
100
+ "c3_t": 98,
101
+ "d3_t": 99,
102
+ "e3_t": 100,
103
+ "f3_t": 101,
104
+ "g3_t": 102,
105
+ "h3_t": 103,
106
+ "a4_t": 104,
107
+ "b4_t": 105,
108
+ "c4_t": 106,
109
+ "d4_t": 107,
110
+ "e4_t": 108,
111
+ "f4_t": 109,
112
+ "g4_t": 110,
113
+ "h4_t": 111,
114
+ "a5_t": 112,
115
+ "b5_t": 113,
116
+ "c5_t": 114,
117
+ "d5_t": 115,
118
+ "e5_t": 116,
119
+ "f5_t": 117,
120
+ "g5_t": 118,
121
+ "h5_t": 119,
122
+ "a6_t": 120,
123
+ "b6_t": 121,
124
+ "c6_t": 122,
125
+ "d6_t": 123,
126
+ "e6_t": 124,
127
+ "f6_t": 125,
128
+ "g6_t": 126,
129
+ "h6_t": 127,
130
+ "a7_t": 128,
131
+ "b7_t": 129,
132
+ "c7_t": 130,
133
+ "d7_t": 131,
134
+ "e7_t": 132,
135
+ "f7_t": 133,
136
+ "g7_t": 134,
137
+ "h7_t": 135,
138
+ "a8_t": 136,
139
+ "b8_t": 137,
140
+ "c8_t": 138,
141
+ "d8_t": 139,
142
+ "e8_t": 140,
143
+ "f8_t": 141,
144
+ "g8_t": 142,
145
+ "h8_t": 143,
146
+ "q": 144,
147
+ "r": 145,
148
+ "b": 146,
149
+ "n": 147
150
+ }