graug commited on
Commit
8e4b914
·
verified ·
1 Parent(s): 3f29d57

Chess Challenge submission by graug

Browse files
Files changed (2) hide show
  1. tokenizer.py +3 -168
  2. tokenizer_config.json +50 -46
tokenizer.py CHANGED
@@ -21,172 +21,6 @@ from typing import Dict, List, Optional
21
  from transformers import PreTrainedTokenizer
22
 
23
 
24
- def all_moves():
25
- """Generate all possible chess moves in UCI extended format."""
26
-
27
- pieces = ['P', 'N', 'B', 'R', 'Q', 'K']
28
- files = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
29
- ranks = ['1', '2', '3', '4', '5', '6', '7', '8']
30
-
31
- all_extended_moves = []
32
-
33
- # pions
34
-
35
-
36
- for file in files:
37
- for rank in ranks[1:-1]:
38
- #white pawns
39
- if rank == '2':
40
- move = f'WP{file}2{file}3'
41
- all_extended_moves.append(move)
42
- move = f'WP{file}2{file}4'
43
- all_extended_moves.append(move)
44
- else:
45
- move = f'WP{file}{rank}{file}{str(int(rank)+1)}'
46
- all_extended_moves.append(move)
47
-
48
- #take moves for white pawns
49
- if file != 'a':
50
- move = f'WP'
51
- move += f'{file}{rank}{chr(ord(file)-1)}{str(int(rank)+1)}'
52
- all_extended_moves.append(move)
53
- if file != 'h':
54
- move = f'WP'
55
- move += f'{file}{rank}{chr(ord(file)+1)}{str(int(rank)+1)}'
56
- all_extended_moves.append(move)
57
-
58
- #black pawns
59
-
60
- if rank == '7':
61
- move = f'BP{file}7{file}6'
62
- all_extended_moves.append(move)
63
- move = f'BP{file}7{file}5'
64
- all_extended_moves.append(move)
65
- else:
66
- move = f'BP{file}{rank}{file}{str(int(rank)-1)}'
67
- all_extended_moves.append(move)
68
-
69
- #take moves for black pawns
70
- if file != 'a':
71
- move = f'BP'
72
- move += f'{file}{rank}{chr(ord(file)-1)}{str(int(rank)-1)}'
73
- all_extended_moves.append(move)
74
- if file != 'h':
75
- move = f'BP'
76
- move += f'{file}{rank}{chr(ord(file)+1)}{str(int(rank)-1)}'
77
- all_extended_moves.append(move)
78
-
79
- nb_pion_moves = len(all_extended_moves)
80
-
81
- # Knights,
82
-
83
- for file_from in files:
84
- for rank_from in ranks:
85
- for file_to in files:
86
- for rank_to in ranks:
87
- file_diff = abs(ord(file_from) - ord(file_to))
88
- rank_diff = abs(int(rank_from) - int(rank_to))
89
- if (file_diff == 2 and rank_diff == 1) or (file_diff == 1 and rank_diff == 2):
90
- move_white = f'WN{file_from}{rank_from}{file_to}{rank_to}'
91
- move_black = f'BN{file_from}{rank_from}{file_to}{rank_to}'
92
- all_extended_moves.append(move_white)
93
- all_extended_moves.append(move_black)
94
-
95
-
96
- #bishops
97
-
98
- for file_from in files:
99
- for rank_from in ranks:
100
- for file_to in files:
101
- for rank_to in ranks:
102
- file_diff = abs(ord(file_from) - ord(file_to))
103
- rank_diff = abs(int(rank_from) - int(rank_to))
104
- if file_diff == rank_diff and file_diff != 0:
105
- move_white = f'WB{file_from}{rank_from}{file_to}{rank_to}'
106
- move_black = f'BB{file_from}{rank_from}{file_to}{rank_to}'
107
- all_extended_moves.append(move_white)
108
- all_extended_moves.append(move_black)
109
-
110
- # rooks
111
-
112
- for file_from in files:
113
- for rank_from in ranks:
114
- for file_to in files:
115
- for rank_to in ranks:
116
- if (file_from == file_to and rank_from != rank_to) or (rank_from == rank_to and file_from != file_to):
117
- move_white = f'WR{file_from}{rank_from}{file_to}{rank_to}'
118
- move_black = f'BR{file_from}{rank_from}{file_to}{rank_to}'
119
- all_extended_moves.append(move_white)
120
- all_extended_moves.append(move_black)
121
-
122
- # queens
123
-
124
- for file_from in files:
125
-
126
- for rank_from in ranks:
127
- for file_to in files:
128
- for rank_to in ranks:
129
- file_diff = abs(ord(file_from) - ord(file_to))
130
- rank_diff = abs(int(rank_from) - int(rank_to))
131
- if (file_diff == rank_diff and file_diff != 0) or (file_from == file_to and rank_from != rank_to) or (rank_from == rank_to and file_from != file_to):
132
- move_white = f'WQ{file_from}{rank_from}{file_to}{rank_to}'
133
- move_black = f'BQ{file_from}{rank_from}{file_to}{rank_to}'
134
- all_extended_moves.append(move_white)
135
- all_extended_moves.append(move_black)
136
-
137
- # kings
138
- for file_from in files:
139
- for rank_from in ranks:
140
- for file_to in files:
141
- for rank_to in ranks:
142
- file_diff = abs(ord(file_from) - ord(file_to))
143
- rank_diff = abs(int(rank_from) - int(rank_to))
144
- if (file_diff <= 1 and rank_diff <= 1) and not (file_diff == 0 and rank_diff == 0):
145
- move_white = f'WK{file_from}{rank_from}{file_to}{rank_to}'
146
- move_black = f'BK{file_from}{rank_from}{file_to}{rank_to}'
147
- all_extended_moves.append(move_white)
148
- all_extended_moves.append(move_black)
149
-
150
-
151
- print(f"Total number of moves before extensions: {len(all_extended_moves)}")
152
-
153
- #adding taking moves for all pieces except pawns
154
-
155
- moves_not_pawns = all_extended_moves[nb_pion_moves:]
156
- taking_moves = [move + '(x)' for move in moves_not_pawns]
157
-
158
-
159
-
160
-
161
- # adding check moves
162
-
163
-
164
- moves_with_check = [move + '(+)' for move in all_extended_moves]
165
-
166
-
167
-
168
- moves_with_mate = [move + '(+*)' for move in all_extended_moves]
169
-
170
-
171
- all_extended_moves.extend(taking_moves)
172
- all_extended_moves.extend(moves_with_check)
173
- all_extended_moves.extend(moves_with_mate)
174
-
175
-
176
-
177
-
178
- # adding castling moves
179
-
180
-
181
- all_extended_moves.append('WKe1g1(o)') # White kingside castling
182
- all_extended_moves.append('WKe1c1(O)') # White queenside castling
183
- all_extended_moves.append('BKe8g8(o)') # Black kingside castling
184
- all_extended_moves.append('BKe8c8(0)') # Black queenside castling
185
-
186
-
187
-
188
-
189
- return all_extended_moves
190
 
191
  class ChessTokenizer(PreTrainedTokenizer):
192
  """
@@ -439,12 +273,13 @@ class ChessTokenizer(PreTrainedTokenizer):
439
  grouped_tokens = ["".join(tokens[i:i+3]) for i in range(0, len(tokens), 3)]
440
 
441
  # mettre une fois sur deux B avant les coups noirs sinon W
442
- final_tokens = []
 
443
  for i, token in enumerate(grouped_tokens):
444
  if i % 2 == 0:
445
  final_tokens.append('W' + token)
446
  else:
447
- final_tokens.append('B' + token)
448
 
449
  return " ".join(final_tokens)
450
 
 
21
  from transformers import PreTrainedTokenizer
22
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  class ChessTokenizer(PreTrainedTokenizer):
26
  """
 
273
  grouped_tokens = ["".join(tokens[i:i+3]) for i in range(0, len(tokens), 3)]
274
 
275
  # mettre une fois sur deux B avant les coups noirs sinon W
276
+ final_tokens = grouped_tokens.copy()
277
+ """
278
  for i, token in enumerate(grouped_tokens):
279
  if i % 2 == 0:
280
  final_tokens.append('W' + token)
281
  else:
282
+ final_tokens.append('B' + token)"""
283
 
284
  return " ".join(final_tokens)
285
 
tokenizer_config.json CHANGED
@@ -1,46 +1,50 @@
1
- {
2
- "auto_map": {
3
- "AutoTokenizer": ["tokenizer.ChessTokenizer", null]},
4
- "added_tokens_decoder": {
5
- "0": {
6
- "content": "[PAD]",
7
- "lstrip": false,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false,
11
- "special": true
12
- },
13
- "1": {
14
- "content": "[BOS]",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false,
19
- "special": true
20
- },
21
- "2": {
22
- "content": "[EOS]",
23
- "lstrip": false,
24
- "normalized": false,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": true
28
- },
29
- "3": {
30
- "content": "[UNK]",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- }
37
- },
38
- "bos_token": "[BOS]",
39
- "clean_up_tokenization_spaces": false,
40
- "eos_token": "[EOS]",
41
- "extra_special_tokens": {},
42
- "model_max_length": 1000000000000000019884624838656,
43
- "pad_token": "[PAD]",
44
- "tokenizer_class": "ChessTokenizer",
45
- "unk_token": "[UNK]"
46
- }
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[BOS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[EOS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "bos_token": "[BOS]",
37
+ "clean_up_tokenization_spaces": false,
38
+ "eos_token": "[EOS]",
39
+ "extra_special_tokens": {},
40
+ "model_max_length": 1000000000000000019884624838656,
41
+ "pad_token": "[PAD]",
42
+ "tokenizer_class": "ChessTokenizer",
43
+ "unk_token": "[UNK]",
44
+ "auto_map": {
45
+ "AutoTokenizer": [
46
+ "tokenizer.ChessTokenizer",
47
+ null
48
+ ]
49
+ }
50
+ }