corentincaris commited on
Commit
28e5bf6
·
verified ·
1 Parent(s): abd7e24

Chess Challenge submission by corentincaris

Browse files
Files changed (5) hide show
  1. README.md +2 -2
  2. config.json +1 -1
  3. model.safetensors +2 -2
  4. tokenizer.py +71 -92
  5. vocab.json +68 -647
README.md CHANGED
@@ -14,13 +14,13 @@ Chess model submitted to the LLM Course Chess Challenge.
14
  ## Submission Info
15
 
16
  - **Submitted by**: [corentincaris](https://huggingface.co/corentincaris)
17
- - **Parameters**: 980,880
18
  - **Organization**: LLM-course
19
 
20
  ## Model Details
21
 
22
  - **Architecture**: Chess Transformer (GPT-style)
23
- - **Vocab size**: 651
24
  - **Embedding dim**: 120
25
  - **Layers**: 5
26
  - **Heads**: 8
 
14
  ## Submission Info
15
 
16
  - **Submitted by**: [corentincaris](https://huggingface.co/corentincaris)
17
+ - **Parameters**: 911,400
18
  - **Organization**: LLM-course
19
 
20
  ## Model Details
21
 
22
  - **Architecture**: Chess Transformer (GPT-style)
23
+ - **Vocab size**: 72
24
  - **Embedding dim**: 120
25
  - **Layers**: 5
26
  - **Heads**: 8
config.json CHANGED
@@ -16,5 +16,5 @@
16
  "pad_token_id": 0,
17
  "tie_weights": true,
18
  "transformers_version": "4.57.5",
19
- "vocab_size": 651
20
  }
 
16
  "pad_token_id": 0,
17
  "tie_weights": true,
18
  "transformers_version": "4.57.5",
19
+ "vocab_size": 72
20
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb36b137cba1ba135d520ebe357fc4c09f0779faf7b31da82b74d4d277c0e06f
3
- size 3928944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3884f7282fb27a863b10d268ab8d6f2eb0d1c24c8e67461d58e189cd8bd262f
3
+ size 3651024
tokenizer.py CHANGED
@@ -1,32 +1,30 @@
1
  """
2
  Custom Chess Tokenizer for the Chess Challenge.
3
 
4
- This tokenizer splits moves into 3 parts:
5
- 1. Piece (e.g., WP)
6
- 2. From Square (e.g., e2)
7
- 3. To Square + Suffix (e.g., e4 or e4(x))
 
 
 
 
8
  """
9
 
10
  from __future__ import annotations
11
 
12
  import json
13
  import os
 
14
  from typing import Dict, List, Optional
 
15
 
16
  from transformers import PreTrainedTokenizer
17
 
18
 
19
  class ChessTokenizer(PreTrainedTokenizer):
20
- """
21
- A custom tokenizer for chess moves using a 3-part split.
22
-
23
- Splits "WPe2e4(x)" into ["WP", "e2", "e4(x)"].
24
- """
25
-
26
  model_input_names = ["input_ids", "attention_mask"]
27
- vocab_files_names = {"vocab_file": "vocab.json"}
28
 
29
- # Special tokens
30
  PAD_TOKEN = "[PAD]"
31
  BOS_TOKEN = "[BOS]"
32
  EOS_TOKEN = "[EOS]"
@@ -38,15 +36,18 @@ class ChessTokenizer(PreTrainedTokenizer):
38
  vocab: Optional[Dict[str, int]] = None,
39
  **kwargs,
40
  ):
41
- # Clean kwargs to avoid conflicts
 
 
 
 
42
  kwargs.pop("pad_token", None)
43
  kwargs.pop("bos_token", None)
44
  kwargs.pop("eos_token", None)
45
  kwargs.pop("unk_token", None)
46
-
47
- self.vocab_file = vocab_file
48
 
49
- # Load vocab
 
50
  if vocab is not None:
51
  self._vocab = vocab
52
  elif vocab_file is not None and os.path.exists(vocab_file):
@@ -54,113 +55,91 @@ class ChessTokenizer(PreTrainedTokenizer):
54
  self._vocab = json.load(f)
55
  else:
56
  self._vocab = self._create_default_vocab()
57
-
58
  self._ids_to_tokens = {v: k for k, v in self._vocab.items()}
59
 
60
  super().__init__(
61
- pad_token=self.PAD_TOKEN,
62
- bos_token=self.BOS_TOKEN,
63
- eos_token=self.EOS_TOKEN,
64
- unk_token=self.UNK_TOKEN,
65
  **kwargs,
66
  )
67
 
68
  def _create_default_vocab(self) -> Dict[str, int]:
69
- """Create a minimal default vocabulary with just special tokens."""
70
  special_tokens = [self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN]
71
  vocab = {token: idx for idx, token in enumerate(special_tokens)}
 
 
 
 
 
 
 
 
 
 
 
 
72
  return vocab
73
 
74
- @property
75
- def vocab_size(self) -> int:
76
- return len(self._vocab)
77
-
78
- def get_vocab(self) -> Dict[str, int]:
79
- return dict(self._vocab)
80
-
81
  def _tokenize(self, text: str) -> List[str]:
82
  """
83
- Tokenize a string of moves into 3 components per move.
 
84
  """
85
- tokens = []
86
- raw_moves = text.strip().split()
 
 
87
 
88
- for move in raw_moves:
89
- if len(move) >= 6:
90
- # 1. Piece (WP)
91
- tokens.append(move[:2])
92
- # 2. From (e2)
93
- tokens.append(move[2:4])
94
- # 3. To (e4 or e4(x)) - grab the rest
95
- tokens.append(move[4:])
96
- else:
97
- tokens.append(self.UNK_TOKEN)
98
- return tokens
99
 
100
  def _convert_token_to_id(self, token: str) -> int:
101
- return self._vocab.get(token, self._vocab.get(self.UNK_TOKEN))
 
102
 
103
  def _convert_id_to_token(self, index: int) -> str:
 
104
  return self._ids_to_tokens.get(index, self.UNK_TOKEN)
105
 
106
  def convert_tokens_to_string(self, tokens: List[str]) -> str:
107
- # Filter specials
108
- filtered = [t for t in tokens if t not in [self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN]]
109
- # Join with space. Result: "WP e2 e4 BN g8 f6"
110
- return " ".join(filtered)
 
 
 
 
 
 
 
 
 
 
111
 
112
  def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple:
113
  if not os.path.isdir(save_directory):
114
  os.makedirs(save_directory, exist_ok=True)
115
-
116
  vocab_file = os.path.join(
117
- save_directory,
118
- (filename_prefix + "-" if filename_prefix else "") + "vocab.json",
119
  )
120
-
121
  with open(vocab_file, "w", encoding="utf-8") as f:
122
  json.dump(self._vocab, f, ensure_ascii=False, indent=2)
123
-
124
  return (vocab_file,)
125
-
126
  @classmethod
127
- def build_vocab_from_dataset(
128
- cls,
129
- dataset_name: str = "dlouapre/lichess_2025-01_1M",
130
- split: str = "train",
131
- column: str = "text",
132
- min_frequency: int = 100,
133
- max_samples: Optional[int] = 100000,
134
- ) -> "ChessTokenizer":
135
- from datasets import load_dataset
136
-
137
- print(f"Loading dataset {dataset_name} to build vocabulary...")
138
- dataset = load_dataset(dataset_name, split=split, streaming=True)
139
-
140
- unique_tokens = set()
141
-
142
- print("Building vocabulary...")
143
- count = 0
144
- for example in dataset:
145
- moves = example[column].strip().split()
146
- for move in moves:
147
- if len(move) >= 6:
148
- unique_tokens.add(move[:2]) # Piece
149
- unique_tokens.add(move[2:4]) # From
150
- unique_tokens.add(move[4:]) # To (includes suffix like (x))
151
- count += 1
152
- if max_samples is not None and count >= max_samples:
153
- break
154
-
155
- special = [cls.PAD_TOKEN, cls.BOS_TOKEN, cls.EOS_TOKEN, cls.UNK_TOKEN]
156
- # Sort tokens to ensure deterministic IDs
157
- all_tokens = special + sorted(list(unique_tokens))
158
 
159
- vocab = {token: idx for idx, token in enumerate(all_tokens)}
160
- print(f"Built vocabulary with {len(vocab)} tokens")
161
- return cls(vocab=vocab)
162
-
163
 
164
- # Kept for compatibility if other scripts import it
165
- def count_vocab_from_dataset(*args, **kwargs):
166
- return {}
 
 
 
 
1
  """
2
  Custom Chess Tokenizer for the Chess Challenge.
3
 
4
+ This tokenizer treats each move as a single token using the extended UCI notation
5
+ from the Lichess dataset (e.g., WPe2e4, BNg8f6).
6
+
7
+ The dataset format uses:
8
+ - W/B prefix for White/Black
9
+ - Piece letter: P=Pawn, N=Knight, B=Bishop, R=Rook, Q=Queen, K=King
10
+ - Source and destination squares (e.g., e2e4)
11
+ - Special suffixes: (x)=capture, (+)=check, (+*)=checkmate, (o)/(O)=castling
12
  """
13
 
14
  from __future__ import annotations
15
 
16
  import json
17
  import os
18
+ from pathlib import Path
19
  from typing import Dict, List, Optional
20
+ import re
21
 
22
  from transformers import PreTrainedTokenizer
23
 
24
 
25
  class ChessTokenizer(PreTrainedTokenizer):
 
 
 
 
 
 
26
  model_input_names = ["input_ids", "attention_mask"]
 
27
 
 
28
  PAD_TOKEN = "[PAD]"
29
  BOS_TOKEN = "[BOS]"
30
  EOS_TOKEN = "[EOS]"
 
36
  vocab: Optional[Dict[str, int]] = None,
37
  **kwargs,
38
  ):
39
+ self._pad_token = self.PAD_TOKEN
40
+ self._bos_token = self.BOS_TOKEN
41
+ self._eos_token = self.EOS_TOKEN
42
+ self._unk_token = self.UNK_TOKEN
43
+
44
  kwargs.pop("pad_token", None)
45
  kwargs.pop("bos_token", None)
46
  kwargs.pop("eos_token", None)
47
  kwargs.pop("unk_token", None)
 
 
48
 
49
+ self.token_pattern = re.compile(r'[a-h][1-8]|[qrbn]')
50
+
51
  if vocab is not None:
52
  self._vocab = vocab
53
  elif vocab_file is not None and os.path.exists(vocab_file):
 
55
  self._vocab = json.load(f)
56
  else:
57
  self._vocab = self._create_default_vocab()
58
+
59
  self._ids_to_tokens = {v: k for k, v in self._vocab.items()}
60
 
61
  super().__init__(
62
+ pad_token=self._pad_token,
63
+ bos_token=self._bos_token,
64
+ eos_token=self._eos_token,
65
+ unk_token=self._unk_token,
66
  **kwargs,
67
  )
68
 
69
  def _create_default_vocab(self) -> Dict[str, int]:
 
70
  special_tokens = [self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN]
71
  vocab = {token: idx for idx, token in enumerate(special_tokens)}
72
+ idx = len(vocab)
73
+
74
+ # Squares (4-67)
75
+ for f in 'abcdefgh':
76
+ for r in '12345678':
77
+ vocab[f"{f}{r}"] = idx
78
+ idx += 1
79
+
80
+ # Promotions (68-71)
81
+ for p in ['q', 'r', 'b', 'n']:
82
+ vocab[p] = idx
83
+ idx += 1
84
  return vocab
85
 
 
 
 
 
 
 
 
86
  def _tokenize(self, text: str) -> List[str]:
87
  """
88
+ Tokenizes text by first normalizing specific chess patterns
89
+ and then extracting squares/promotions.
90
  """
91
+ text = (text.replace("(Q)", "q")
92
+ .replace("(R)", "r")
93
+ .replace("(B)", "b")
94
+ .replace("(N)", "n"))
95
 
96
+ return self.token_pattern.findall(text)
 
 
 
 
 
 
 
 
 
 
97
 
98
  def _convert_token_to_id(self, token: str) -> int:
99
+ """Convert a token to its ID."""
100
+ return self._vocab.get(token, self._vocab.get(self.UNK_TOKEN, 0))
101
 
102
  def _convert_id_to_token(self, index: int) -> str:
103
+ """Convert an ID to its token."""
104
  return self._ids_to_tokens.get(index, self.UNK_TOKEN)
105
 
106
  def convert_tokens_to_string(self, tokens: List[str]) -> str:
107
+ """Reconstructs standard UCI string (e.g. "e2e4 a7a8q")"""
108
+ special = {self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN}
109
+ clean_tokens = [t for t in tokens if t not in special]
110
+
111
+ output = []
112
+ for token in clean_tokens:
113
+ if token in ['q', 'r', 'b', 'n'] and output:
114
+ output[-1] += token
115
+ elif output and len(output[-1]) == 2 and output[-1][0] in 'abcdefgh':
116
+ output[-1] += token
117
+ else:
118
+ output.append(token)
119
+
120
+ return " ".join(output)
121
 
122
  def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple:
123
  if not os.path.isdir(save_directory):
124
  os.makedirs(save_directory, exist_ok=True)
 
125
  vocab_file = os.path.join(
126
+ save_directory, (filename_prefix + "-" if filename_prefix else "") + "vocab.json"
 
127
  )
 
128
  with open(vocab_file, "w", encoding="utf-8") as f:
129
  json.dump(self._vocab, f, ensure_ascii=False, indent=2)
 
130
  return (vocab_file,)
131
+
132
  @classmethod
133
+ def build_vocab_from_iterator(cls, iterator, min_frequency=1):
134
+ return cls()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ @classmethod
137
+ def build_vocab_from_dataset(cls, **kwargs):
138
+ return cls()
 
139
 
140
+ @property
141
+ def vocab_size(self) -> int:
142
+ return len(self._vocab)
143
+
144
+ def get_vocab(self) -> Dict[str, int]:
145
+ return dict(self._vocab)
vocab.json CHANGED
@@ -3,651 +3,72 @@
3
  "[BOS]": 1,
4
  "[EOS]": 2,
5
  "[UNK]": 3,
6
- "BB": 4,
7
- "BK": 5,
8
- "BN": 6,
9
- "BP": 7,
10
- "BQ": 8,
11
- "BR": 9,
12
- "WB": 10,
13
- "WK": 11,
14
- "WN": 12,
15
- "WP": 13,
16
- "WQ": 14,
17
- "WR": 15,
18
- "a1": 16,
19
- "a1(+)": 17,
20
- "a1(+*)": 18,
21
- "a1(+*Q)": 19,
22
- "a1(+*R)": 20,
23
- "a1(+N)": 21,
24
- "a1(+Q)": 22,
25
- "a1(+R)": 23,
26
- "a1(B)": 24,
27
- "a1(N)": 25,
28
- "a1(Q)": 26,
29
- "a1(R)": 27,
30
- "a1(x)": 28,
31
- "a1(x+)": 29,
32
- "a1(x+*)": 30,
33
- "a1(x+*Q)": 31,
34
- "a1(x+Q)": 32,
35
- "a1(xQ)": 33,
36
- "a2": 34,
37
- "a2(+)": 35,
38
- "a2(+*)": 36,
39
- "a2(x)": 37,
40
- "a2(x+)": 38,
41
- "a2(x+*)": 39,
42
- "a3": 40,
43
- "a3(+)": 41,
44
- "a3(+*)": 42,
45
- "a3(x)": 43,
46
- "a3(x+)": 44,
47
- "a3(x+*)": 45,
48
- "a3(xE)": 46,
49
- "a3(xE+)": 47,
50
- "a4": 48,
51
- "a4(+)": 49,
52
- "a4(+*)": 50,
53
- "a4(x)": 51,
54
- "a4(x+)": 52,
55
- "a4(x+*)": 53,
56
- "a5": 54,
57
- "a5(+)": 55,
58
- "a5(+*)": 56,
59
- "a5(x)": 57,
60
- "a5(x+)": 58,
61
- "a5(x+*)": 59,
62
- "a6": 60,
63
- "a6(+)": 61,
64
- "a6(+*)": 62,
65
- "a6(x)": 63,
66
- "a6(x+)": 64,
67
- "a6(x+*)": 65,
68
- "a6(xE)": 66,
69
- "a6(xE+)": 67,
70
- "a7": 68,
71
- "a7(+)": 69,
72
- "a7(+*)": 70,
73
- "a7(x)": 71,
74
- "a7(x+)": 72,
75
- "a7(x+*)": 73,
76
- "a8": 74,
77
- "a8(+)": 75,
78
- "a8(+*)": 76,
79
- "a8(+*Q)": 77,
80
- "a8(+*R)": 78,
81
- "a8(+N)": 79,
82
- "a8(+Q)": 80,
83
- "a8(B)": 81,
84
- "a8(N)": 82,
85
- "a8(Q)": 83,
86
- "a8(R)": 84,
87
- "a8(x)": 85,
88
- "a8(x+)": 86,
89
- "a8(x+*)": 87,
90
- "a8(x+*Q)": 88,
91
- "a8(x+Q)": 89,
92
- "a8(xQ)": 90,
93
- "b1": 91,
94
- "b1(+)": 92,
95
- "b1(+*)": 93,
96
- "b1(+*Q)": 94,
97
- "b1(+*R)": 95,
98
- "b1(+B)": 96,
99
- "b1(+N)": 97,
100
- "b1(+Q)": 98,
101
- "b1(B)": 99,
102
- "b1(N)": 100,
103
- "b1(Q)": 101,
104
- "b1(R)": 102,
105
- "b1(x)": 103,
106
- "b1(x+)": 104,
107
- "b1(x+*)": 105,
108
- "b1(x+*Q)": 106,
109
- "b1(x+Q)": 107,
110
- "b1(xN)": 108,
111
- "b1(xQ)": 109,
112
- "b1(xR)": 110,
113
- "b2": 111,
114
- "b2(+)": 112,
115
- "b2(+*)": 113,
116
- "b2(x)": 114,
117
- "b2(x+)": 115,
118
- "b2(x+*)": 116,
119
- "b3": 117,
120
- "b3(+)": 118,
121
- "b3(+*)": 119,
122
- "b3(x)": 120,
123
- "b3(x+)": 121,
124
- "b3(x+*)": 122,
125
- "b3(xE)": 123,
126
- "b3(xE+)": 124,
127
- "b4": 125,
128
- "b4(+)": 126,
129
- "b4(+*)": 127,
130
- "b4(x)": 128,
131
- "b4(x+)": 129,
132
- "b4(x+*)": 130,
133
- "b5": 131,
134
- "b5(+)": 132,
135
- "b5(+*)": 133,
136
- "b5(x)": 134,
137
- "b5(x+)": 135,
138
- "b5(x+*)": 136,
139
- "b6": 137,
140
- "b6(+)": 138,
141
- "b6(+*)": 139,
142
- "b6(x)": 140,
143
- "b6(x+)": 141,
144
- "b6(x+*)": 142,
145
- "b6(xE)": 143,
146
- "b6(xE+)": 144,
147
- "b7": 145,
148
- "b7(+)": 146,
149
- "b7(+*)": 147,
150
- "b7(x)": 148,
151
- "b7(x+)": 149,
152
- "b7(x+*)": 150,
153
- "b8": 151,
154
- "b8(+)": 152,
155
- "b8(+*)": 153,
156
- "b8(+*Q)": 154,
157
- "b8(+*R)": 155,
158
- "b8(+Q)": 156,
159
- "b8(+R)": 157,
160
- "b8(B)": 158,
161
- "b8(N)": 159,
162
- "b8(Q)": 160,
163
- "b8(R)": 161,
164
- "b8(x)": 162,
165
- "b8(x+)": 163,
166
- "b8(x+*)": 164,
167
- "b8(x+*Q)": 165,
168
- "b8(x+Q)": 166,
169
- "b8(xN)": 167,
170
- "b8(xQ)": 168,
171
- "c1": 169,
172
- "c1(+)": 170,
173
- "c1(+*)": 171,
174
- "c1(+*Q)": 172,
175
- "c1(+B)": 173,
176
- "c1(+N)": 174,
177
- "c1(+Q)": 175,
178
- "c1(+R)": 176,
179
- "c1(N)": 177,
180
- "c1(O)": 178,
181
- "c1(O+)": 179,
182
- "c1(O+*)": 180,
183
- "c1(Q)": 181,
184
- "c1(R)": 182,
185
- "c1(x)": 183,
186
- "c1(x+)": 184,
187
- "c1(x+*)": 185,
188
- "c1(x+*Q)": 186,
189
- "c1(x+Q)": 187,
190
- "c1(x+R)": 188,
191
- "c1(xN)": 189,
192
- "c1(xQ)": 190,
193
- "c2": 191,
194
- "c2(+)": 192,
195
- "c2(+*)": 193,
196
- "c2(x)": 194,
197
- "c2(x+)": 195,
198
- "c2(x+*)": 196,
199
- "c3": 197,
200
- "c3(+)": 198,
201
- "c3(+*)": 199,
202
- "c3(x)": 200,
203
- "c3(x+)": 201,
204
- "c3(x+*)": 202,
205
- "c3(xE)": 203,
206
- "c3(xE+)": 204,
207
- "c4": 205,
208
- "c4(+)": 206,
209
- "c4(+*)": 207,
210
- "c4(x)": 208,
211
- "c4(x+)": 209,
212
- "c4(x+*)": 210,
213
- "c5": 211,
214
- "c5(+)": 212,
215
- "c5(+*)": 213,
216
- "c5(x)": 214,
217
- "c5(x+)": 215,
218
- "c5(x+*)": 216,
219
- "c6": 217,
220
- "c6(+)": 218,
221
- "c6(+*)": 219,
222
- "c6(x)": 220,
223
- "c6(x+)": 221,
224
- "c6(x+*)": 222,
225
- "c6(xE)": 223,
226
- "c6(xE+)": 224,
227
- "c7": 225,
228
- "c7(+)": 226,
229
- "c7(+*)": 227,
230
- "c7(x)": 228,
231
- "c7(x+)": 229,
232
- "c7(x+*)": 230,
233
- "c8": 231,
234
- "c8(+)": 232,
235
- "c8(+*)": 233,
236
- "c8(+*Q)": 234,
237
- "c8(+B)": 235,
238
- "c8(+N)": 236,
239
- "c8(+Q)": 237,
240
- "c8(+R)": 238,
241
- "c8(B)": 239,
242
- "c8(N)": 240,
243
- "c8(O)": 241,
244
- "c8(O+)": 242,
245
- "c8(Q)": 243,
246
- "c8(R)": 244,
247
- "c8(x)": 245,
248
- "c8(x+)": 246,
249
- "c8(x+*)": 247,
250
- "c8(x+*Q)": 248,
251
- "c8(x+N)": 249,
252
- "c8(x+Q)": 250,
253
- "c8(xQ)": 251,
254
- "d1": 252,
255
- "d1(+)": 253,
256
- "d1(+*)": 254,
257
- "d1(+*Q)": 255,
258
- "d1(+*R)": 256,
259
- "d1(+N)": 257,
260
- "d1(+Q)": 258,
261
- "d1(+R)": 259,
262
- "d1(N)": 260,
263
- "d1(Q)": 261,
264
- "d1(R)": 262,
265
- "d1(x)": 263,
266
- "d1(x+)": 264,
267
- "d1(x+*)": 265,
268
- "d1(x+*Q)": 266,
269
- "d1(x+B)": 267,
270
- "d1(x+Q)": 268,
271
- "d1(xN)": 269,
272
- "d1(xQ)": 270,
273
- "d2": 271,
274
- "d2(+)": 272,
275
- "d2(+*)": 273,
276
- "d2(x)": 274,
277
- "d2(x+)": 275,
278
- "d2(x+*)": 276,
279
- "d3": 277,
280
- "d3(+)": 278,
281
- "d3(+*)": 279,
282
- "d3(x)": 280,
283
- "d3(x+)": 281,
284
- "d3(x+*)": 282,
285
- "d3(xE)": 283,
286
- "d3(xE+)": 284,
287
- "d4": 285,
288
- "d4(+)": 286,
289
- "d4(+*)": 287,
290
- "d4(x)": 288,
291
- "d4(x+)": 289,
292
- "d4(x+*)": 290,
293
- "d5": 291,
294
- "d5(+)": 292,
295
- "d5(+*)": 293,
296
- "d5(x)": 294,
297
- "d5(x+)": 295,
298
- "d5(x+*)": 296,
299
- "d6": 297,
300
- "d6(+)": 298,
301
- "d6(+*)": 299,
302
- "d6(x)": 300,
303
- "d6(x+)": 301,
304
- "d6(x+*)": 302,
305
- "d6(xE)": 303,
306
- "d6(xE+)": 304,
307
- "d7": 305,
308
- "d7(+)": 306,
309
- "d7(+*)": 307,
310
- "d7(x)": 308,
311
- "d7(x+)": 309,
312
- "d7(x+*)": 310,
313
- "d8": 311,
314
- "d8(+)": 312,
315
- "d8(+*)": 313,
316
- "d8(+*Q)": 314,
317
- "d8(+B)": 315,
318
- "d8(+N)": 316,
319
- "d8(+Q)": 317,
320
- "d8(+R)": 318,
321
- "d8(B)": 319,
322
- "d8(N)": 320,
323
- "d8(Q)": 321,
324
- "d8(R)": 322,
325
- "d8(x)": 323,
326
- "d8(x+)": 324,
327
- "d8(x+*)": 325,
328
- "d8(x+*Q)": 326,
329
- "d8(x+Q)": 327,
330
- "d8(xQ)": 328,
331
- "e1": 329,
332
- "e1(+)": 330,
333
- "e1(+*)": 331,
334
- "e1(+*Q)": 332,
335
- "e1(+*R)": 333,
336
- "e1(+B)": 334,
337
- "e1(+N)": 335,
338
- "e1(+Q)": 336,
339
- "e1(+R)": 337,
340
- "e1(N)": 338,
341
- "e1(Q)": 339,
342
- "e1(R)": 340,
343
- "e1(x)": 341,
344
- "e1(x+)": 342,
345
- "e1(x+*)": 343,
346
- "e1(x+*Q)": 344,
347
- "e1(x+Q)": 345,
348
- "e1(xQ)": 346,
349
- "e2": 347,
350
- "e2(+)": 348,
351
- "e2(+*)": 349,
352
- "e2(x)": 350,
353
- "e2(x+)": 351,
354
- "e2(x+*)": 352,
355
- "e3": 353,
356
- "e3(+)": 354,
357
- "e3(+*)": 355,
358
- "e3(x)": 356,
359
- "e3(x+)": 357,
360
- "e3(x+*)": 358,
361
- "e3(xE)": 359,
362
- "e3(xE+)": 360,
363
- "e4": 361,
364
- "e4(+)": 362,
365
- "e4(+*)": 363,
366
- "e4(x)": 364,
367
- "e4(x+)": 365,
368
- "e4(x+*)": 366,
369
- "e5": 367,
370
- "e5(+)": 368,
371
- "e5(+*)": 369,
372
- "e5(x)": 370,
373
- "e5(x+)": 371,
374
- "e5(x+*)": 372,
375
- "e6": 373,
376
- "e6(+)": 374,
377
- "e6(+*)": 375,
378
- "e6(x)": 376,
379
- "e6(x+)": 377,
380
- "e6(x+*)": 378,
381
- "e6(xE)": 379,
382
- "e6(xE+)": 380,
383
- "e7": 381,
384
- "e7(+)": 382,
385
- "e7(+*)": 383,
386
- "e7(x)": 384,
387
- "e7(x+)": 385,
388
- "e7(x+*)": 386,
389
- "e8": 387,
390
- "e8(+)": 388,
391
- "e8(+*)": 389,
392
- "e8(+*Q)": 390,
393
- "e8(+*R)": 391,
394
- "e8(+N)": 392,
395
- "e8(+Q)": 393,
396
- "e8(+R)": 394,
397
- "e8(B)": 395,
398
- "e8(N)": 396,
399
- "e8(Q)": 397,
400
- "e8(R)": 398,
401
- "e8(x)": 399,
402
- "e8(x+)": 400,
403
- "e8(x+*)": 401,
404
- "e8(x+*Q)": 402,
405
- "e8(x+*R)": 403,
406
- "e8(x+Q)": 404,
407
- "e8(x+R)": 405,
408
- "e8(xN)": 406,
409
- "e8(xQ)": 407,
410
- "e8(xR)": 408,
411
- "f1": 409,
412
- "f1(+)": 410,
413
- "f1(+*)": 411,
414
- "f1(+*Q)": 412,
415
- "f1(+*R)": 413,
416
- "f1(+B)": 414,
417
- "f1(+N)": 415,
418
- "f1(+Q)": 416,
419
- "f1(+R)": 417,
420
- "f1(B)": 418,
421
- "f1(N)": 419,
422
- "f1(Q)": 420,
423
- "f1(R)": 421,
424
- "f1(x)": 422,
425
- "f1(x+)": 423,
426
- "f1(x+*)": 424,
427
- "f1(x+*Q)": 425,
428
- "f1(x+*R)": 426,
429
- "f1(x+N)": 427,
430
- "f1(x+Q)": 428,
431
- "f1(x+R)": 429,
432
- "f1(xQ)": 430,
433
- "f2": 431,
434
- "f2(+)": 432,
435
- "f2(+*)": 433,
436
- "f2(x)": 434,
437
- "f2(x+)": 435,
438
- "f2(x+*)": 436,
439
- "f3": 437,
440
- "f3(+)": 438,
441
- "f3(+*)": 439,
442
- "f3(x)": 440,
443
- "f3(x+)": 441,
444
- "f3(x+*)": 442,
445
- "f3(xE)": 443,
446
- "f3(xE+)": 444,
447
- "f4": 445,
448
- "f4(+)": 446,
449
- "f4(+*)": 447,
450
- "f4(x)": 448,
451
- "f4(x+)": 449,
452
- "f4(x+*)": 450,
453
- "f5": 451,
454
- "f5(+)": 452,
455
- "f5(+*)": 453,
456
- "f5(x)": 454,
457
- "f5(x+)": 455,
458
- "f5(x+*)": 456,
459
- "f6": 457,
460
- "f6(+)": 458,
461
- "f6(+*)": 459,
462
- "f6(x)": 460,
463
- "f6(x+)": 461,
464
- "f6(x+*)": 462,
465
- "f6(xE)": 463,
466
- "f6(xE+)": 464,
467
- "f7": 465,
468
- "f7(+)": 466,
469
- "f7(+*)": 467,
470
- "f7(x)": 468,
471
- "f7(x+)": 469,
472
- "f7(x+*)": 470,
473
- "f8": 471,
474
- "f8(+)": 472,
475
- "f8(+*)": 473,
476
- "f8(+*Q)": 474,
477
- "f8(+*R)": 475,
478
- "f8(+N)": 476,
479
- "f8(+Q)": 477,
480
- "f8(+R)": 478,
481
- "f8(B)": 479,
482
- "f8(N)": 480,
483
- "f8(Q)": 481,
484
- "f8(R)": 482,
485
- "f8(x)": 483,
486
- "f8(x+)": 484,
487
- "f8(x+*)": 485,
488
- "f8(x+*Q)": 486,
489
- "f8(x+Q)": 487,
490
- "f8(x+R)": 488,
491
- "f8(xN)": 489,
492
- "f8(xQ)": 490,
493
- "g1": 491,
494
- "g1(+)": 492,
495
- "g1(+*)": 493,
496
- "g1(+*Q)": 494,
497
- "g1(+*R)": 495,
498
- "g1(+B)": 496,
499
- "g1(+N)": 497,
500
- "g1(+Q)": 498,
501
- "g1(+R)": 499,
502
- "g1(B)": 500,
503
- "g1(N)": 501,
504
- "g1(Q)": 502,
505
- "g1(R)": 503,
506
- "g1(o)": 504,
507
- "g1(o+)": 505,
508
- "g1(o+*)": 506,
509
- "g1(x)": 507,
510
- "g1(x+)": 508,
511
- "g1(x+*)": 509,
512
- "g1(x+*Q)": 510,
513
- "g1(x+*R)": 511,
514
- "g1(x+N)": 512,
515
- "g1(x+Q)": 513,
516
- "g1(xB)": 514,
517
- "g1(xQ)": 515,
518
- "g2": 516,
519
- "g2(+)": 517,
520
- "g2(+*)": 518,
521
- "g2(x)": 519,
522
- "g2(x+)": 520,
523
- "g2(x+*)": 521,
524
- "g3": 522,
525
- "g3(+)": 523,
526
- "g3(+*)": 524,
527
- "g3(x)": 525,
528
- "g3(x+)": 526,
529
- "g3(x+*)": 527,
530
- "g3(xE)": 528,
531
- "g3(xE+)": 529,
532
- "g4": 530,
533
- "g4(+)": 531,
534
- "g4(+*)": 532,
535
- "g4(x)": 533,
536
- "g4(x+)": 534,
537
- "g4(x+*)": 535,
538
- "g5": 536,
539
- "g5(+)": 537,
540
- "g5(+*)": 538,
541
- "g5(x)": 539,
542
- "g5(x+)": 540,
543
- "g5(x+*)": 541,
544
- "g6": 542,
545
- "g6(+)": 543,
546
- "g6(+*)": 544,
547
- "g6(x)": 545,
548
- "g6(x+)": 546,
549
- "g6(x+*)": 547,
550
- "g6(xE)": 548,
551
- "g6(xE+)": 549,
552
- "g7": 550,
553
- "g7(+)": 551,
554
- "g7(+*)": 552,
555
- "g7(x)": 553,
556
- "g7(x+)": 554,
557
- "g7(x+*)": 555,
558
- "g8": 556,
559
- "g8(+)": 557,
560
- "g8(+*)": 558,
561
- "g8(+*Q)": 559,
562
- "g8(+*R)": 560,
563
- "g8(+B)": 561,
564
- "g8(+N)": 562,
565
- "g8(+Q)": 563,
566
- "g8(+R)": 564,
567
- "g8(B)": 565,
568
- "g8(N)": 566,
569
- "g8(Q)": 567,
570
- "g8(R)": 568,
571
- "g8(o)": 569,
572
- "g8(o+)": 570,
573
- "g8(x)": 571,
574
- "g8(x+)": 572,
575
- "g8(x+*)": 573,
576
- "g8(x+*Q)": 574,
577
- "g8(x+Q)": 575,
578
- "g8(xQ)": 576,
579
- "h1": 577,
580
- "h1(+)": 578,
581
- "h1(+*)": 579,
582
- "h1(+*Q)": 580,
583
- "h1(+*R)": 581,
584
- "h1(+N)": 582,
585
- "h1(+Q)": 583,
586
- "h1(B)": 584,
587
- "h1(N)": 585,
588
- "h1(Q)": 586,
589
- "h1(R)": 587,
590
- "h1(x)": 588,
591
- "h1(x+)": 589,
592
- "h1(x+*)": 590,
593
- "h1(x+N)": 591,
594
- "h1(x+Q)": 592,
595
- "h1(xQ)": 593,
596
- "h2": 594,
597
- "h2(+)": 595,
598
- "h2(+*)": 596,
599
- "h2(x)": 597,
600
- "h2(x+)": 598,
601
- "h2(x+*)": 599,
602
- "h3": 600,
603
- "h3(+)": 601,
604
- "h3(+*)": 602,
605
- "h3(x)": 603,
606
- "h3(x+)": 604,
607
- "h3(x+*)": 605,
608
- "h3(xE)": 606,
609
- "h3(xE+)": 607,
610
- "h4": 608,
611
- "h4(+)": 609,
612
- "h4(+*)": 610,
613
- "h4(x)": 611,
614
- "h4(x+)": 612,
615
- "h4(x+*)": 613,
616
- "h5": 614,
617
- "h5(+)": 615,
618
- "h5(+*)": 616,
619
- "h5(x)": 617,
620
- "h5(x+)": 618,
621
- "h5(x+*)": 619,
622
- "h6": 620,
623
- "h6(+)": 621,
624
- "h6(+*)": 622,
625
- "h6(x)": 623,
626
- "h6(x+)": 624,
627
- "h6(x+*)": 625,
628
- "h6(xE)": 626,
629
- "h6(xE+)": 627,
630
- "h7": 628,
631
- "h7(+)": 629,
632
- "h7(+*)": 630,
633
- "h7(x)": 631,
634
- "h7(x+)": 632,
635
- "h7(x+*)": 633,
636
- "h8": 634,
637
- "h8(+)": 635,
638
- "h8(+*)": 636,
639
- "h8(+*Q)": 637,
640
- "h8(+*R)": 638,
641
- "h8(+N)": 639,
642
- "h8(+Q)": 640,
643
- "h8(+R)": 641,
644
- "h8(N)": 642,
645
- "h8(Q)": 643,
646
- "h8(R)": 644,
647
- "h8(x)": 645,
648
- "h8(x+)": 646,
649
- "h8(x+*)": 647,
650
- "h8(x+*Q)": 648,
651
- "h8(x+Q)": 649,
652
- "h8(xQ)": 650
653
  }
 
3
  "[BOS]": 1,
4
  "[EOS]": 2,
5
  "[UNK]": 3,
6
+ "a1": 4,
7
+ "a2": 5,
8
+ "a3": 6,
9
+ "a4": 7,
10
+ "a5": 8,
11
+ "a6": 9,
12
+ "a7": 10,
13
+ "a8": 11,
14
+ "b1": 12,
15
+ "b2": 13,
16
+ "b3": 14,
17
+ "b4": 15,
18
+ "b5": 16,
19
+ "b6": 17,
20
+ "b7": 18,
21
+ "b8": 19,
22
+ "c1": 20,
23
+ "c2": 21,
24
+ "c3": 22,
25
+ "c4": 23,
26
+ "c5": 24,
27
+ "c6": 25,
28
+ "c7": 26,
29
+ "c8": 27,
30
+ "d1": 28,
31
+ "d2": 29,
32
+ "d3": 30,
33
+ "d4": 31,
34
+ "d5": 32,
35
+ "d6": 33,
36
+ "d7": 34,
37
+ "d8": 35,
38
+ "e1": 36,
39
+ "e2": 37,
40
+ "e3": 38,
41
+ "e4": 39,
42
+ "e5": 40,
43
+ "e6": 41,
44
+ "e7": 42,
45
+ "e8": 43,
46
+ "f1": 44,
47
+ "f2": 45,
48
+ "f3": 46,
49
+ "f4": 47,
50
+ "f5": 48,
51
+ "f6": 49,
52
+ "f7": 50,
53
+ "f8": 51,
54
+ "g1": 52,
55
+ "g2": 53,
56
+ "g3": 54,
57
+ "g4": 55,
58
+ "g5": 56,
59
+ "g6": 57,
60
+ "g7": 58,
61
+ "g8": 59,
62
+ "h1": 60,
63
+ "h2": 61,
64
+ "h3": 62,
65
+ "h4": 63,
66
+ "h5": 64,
67
+ "h6": 65,
68
+ "h7": 66,
69
+ "h8": 67,
70
+ "q": 68,
71
+ "r": 69,
72
+ "b": 70,
73
+ "n": 71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  }