DarshanScripts commited on
Commit
a8fffd2
·
verified ·
1 Parent(s): 82dd7b3

Upload stratego/env/StrategoDuel/env.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. stratego/env/StrategoDuel/env.py +484 -0
stratego/env/StrategoDuel/env.py ADDED
@@ -0,0 +1,484 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re, random
2
+ from typing import Optional, Dict, Tuple, List, Any
3
+
4
+ import textarena as ta
5
+
6
+ class StrategoDuelEnv(ta.Env):
7
+ """ A two-player implementation of the board game Stratego """
8
+ def __init__(self):
9
+ """
10
+ Initialize the environment.
11
+ """
12
+ ## set up the board items
13
+ self.piece_counts = {
14
+ 'Flag': 1, 'Bomb': 2, 'Spy': 1, 'Scout': 2, 'Miner': 2,
15
+ 'General': 1, 'Marshal': 1
16
+ }
17
+ self.piece_ranks = {
18
+ 'Flag': 0, 'Bomb': 11, 'Spy': 1, 'Scout': 2, 'Miner': 3,
19
+ 'General': 9, 'Marshal': 10
20
+ }
21
+ self.lakes = [(4, 2), (4, 3), (5, 2), (5, 3), (4, 6), (4, 7), (5, 6), (5, 7)]
22
+ self.player_pieces = {0: [], 1: []}
23
+ self.board = [[None for _ in range(10)] for _ in range(10)]
24
+
25
+ @property
26
+ def terminal_render_keys(self):
27
+ return ["rendered_board"]
28
+
29
+ def reset(self, num_players: int, seed: Optional[int]=None):
30
+ """ Reset the environment to start a new game """
31
+ self.state = ta.TwoPlayerState(num_players=num_players, seed=seed)
32
+
33
+ ## populate the board
34
+ self.board = self._populate_board()
35
+
36
+ ## initialise the game state
37
+ rendered_board = self._render_board(player_id=None, full_board=True)
38
+ game_state={"board": self.board, "player_pieces": self.player_pieces, "rendered_board": rendered_board}
39
+ self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
40
+ self._observe_current_state()
41
+
42
+ def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]):
43
+ """
44
+ Generates the player prompt for the current player.
45
+
46
+ Args:
47
+ player_id (int): The ID of the current player.
48
+ game_state (Dict[str, Any]): The current game state.
49
+ """
50
+ prompt = (
51
+ f"You are Player {player_id} in Stratego.\n"
52
+ "Your goal is to capture your opponent's Flag or eliminate all of their movable pieces.\n"
53
+ "Your army has been placed for you on the board, including your Flag, Bombs, and other pieces of varying ranks.\n"
54
+ "\n"
55
+ "### Gameplay Instructions\n"
56
+ "1. **Movement Rules:**\n"
57
+ " - On your turn, you can move one piece by one step to an adjacent square (up, down, left, or right) that is already occupied with your pieces.\n"
58
+ " - Example: A piece can move from A1 to B1 or A1 to A2 if B1 and A2 are not placed with the player's own pieces.\n"
59
+ " - If the selected piece is a Bomb or a Flag, it cannot be moved.\n"
60
+ # " - **Scout Movement:** Scouts, on the other hand, can move multiple steps in a straight line (horizontally or vertically), but strictly only on one condition.\n"
61
+ # " - The condition is that Scouts cannot jump over any piece (your own or your opponent's).\n"
62
+ # " - Example: If there is a piece between the Scout and its destination, the Scout cannot move to the destination.\n"
63
+ # " - This will be indicated as an invalid move which makes you lose the game.\n"
64
+ "2. **Battles:**\n"
65
+ " - If you move onto a square occupied by an opponent's piece, then a battle will occur:\n"
66
+ " - The piece with the higher rank wins and eliminates the opponent's piece.\n"
67
+ " - If the ranks are equal, both pieces are removed from the board.\n"
68
+ " - **Special Cases:**\n"
69
+ " - Bombs eliminate most attacking pieces except Miners, which defuse Bombs.\n"
70
+ " - Spies can defeat the Marshal if the Spy attacks first but lose to all other pieces.\n"
71
+ "3. **Strategic Goals:**\n"
72
+ " - Identify your opponent's pieces through their movements and battles.\n"
73
+ " - Protect your Flag while attempting to capture your opponent's Flag.\n"
74
+ " - Use Scouts strategically to gain information about your opponent's pieces and attack weak ones.\n"
75
+ "\n"
76
+ "### How to Make a Move:\n"
77
+ "1. Specify the coordinates of the piece you want to move and its destination.\n"
78
+ "2. Use the format: [A0 B0], where A0 is the source position, and B0 is the destination.\n"
79
+ " - Example: To move a piece from row 0, column 0 to row 1, column 0, input [A0 B0].\n"
80
+ "3. Ensure the destination is valid according to the movement rules above.\n"
81
+ "\n"
82
+ "### Important Notes:\n"
83
+ "- The board will show your pieces and their positions, e.g. MN, MS.\n"
84
+ "- The board will also show known positions of your opponent's pieces without revealing their ranks, e.g. ?.\n"
85
+ "- Grids with ~ are lakes and cannot be moved onto.\n"
86
+ "- As a suggestion, start your game by moving your pieces that are on the front lines to gain information about your opponent's pieces. Player 0 and player 1's frontlines are row D and G respectively.\n"
87
+ "\n"
88
+ "Here is the current board state:\n"
89
+ )
90
+ return prompt
91
+
92
+ def _observe_current_state(self):
93
+ """
94
+ Observe the current state of the game and update the state with the rendered board
95
+ and gives the available moves for the current player.
96
+ """
97
+ player_id = self.state.current_player_id
98
+ available_moves = []
99
+
100
+ for row in range(10):
101
+ for col in range(10):
102
+ piece = self.board[row][col]
103
+ if isinstance(piece, dict) and piece['player'] == player_id:
104
+ # Skip immovable pieces
105
+ if piece['rank'].lower() in ['bomb', 'flag']:
106
+ continue
107
+
108
+ # Check if this is a scout (can move multiple squares)
109
+ is_scout = piece['rank'].lower() == 'scout'
110
+
111
+ # Check all four directions
112
+ for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
113
+ if is_scout:
114
+ # Scout can move multiple squares in this direction
115
+ distance = 1
116
+ while True:
117
+ new_row = row + (dr * distance)
118
+ new_col = col + (dc * distance)
119
+
120
+ # Check if still within board bounds
121
+ if not (0 <= new_row < 10 and 0 <= new_col < 10):
122
+ break
123
+
124
+ target = self.board[new_row][new_col]
125
+
126
+ if target is None:
127
+ # Empty square - scout can move here and continue
128
+ available_moves.append(f"[{chr(row + 65)}{col} {chr(new_row + 65)}{new_col}]")
129
+ distance += 1
130
+ elif isinstance(target, dict) and target['player'] != player_id:
131
+ # Enemy piece - scout can attack but cannot continue past
132
+ available_moves.append(f"[{chr(row + 65)}{col} {chr(new_row + 65)}{new_col}]")
133
+ break
134
+ else:
135
+ # Own piece or other obstacle - scout cannot move here or past
136
+ break
137
+ else:
138
+ # Regular piece - can only move one square
139
+ new_row, new_col = row + dr, col + dc
140
+ if 0 <= new_row < 10 and 0 <= new_col < 10:
141
+ target = self.board[new_row][new_col]
142
+ if (target is None or
143
+ (isinstance(target, dict) and target['player'] != player_id)):
144
+ available_moves.append(f"[{chr(row + 65)}{col} {chr(new_row + 65)}{new_col}]")
145
+
146
+ self.state.add_observation(
147
+ message=f"Current Board:\n\n{self._render_board(player_id=player_id, full_board=False)}\nAvailable Moves: " + ", ".join(available_moves),
148
+ observation_type=ta.ObservationType.GAME_BOARD
149
+ )
150
+
151
+ def _populate_board(self):
152
+ """
153
+ Populates the board with pieces for each player strategically.
154
+ """
155
+ for player in range(2):
156
+ # Define rows for each player
157
+ back_rows = range(0, 2) if player == 0 else range(8, 10)
158
+ front_rows = range(2, 4) if player == 0 else range(7, 9)
159
+ all_rows = range(0, 4) if player == 0 else range(6, 10)
160
+
161
+ # Place the Flag strategically
162
+ while True:
163
+ row = random.choice(back_rows)
164
+ col = random.randint(0, 9)
165
+ if (row, col) not in self.lakes and self.board[row][col] is None:
166
+ self.board[row][col] = {'rank': 'Flag', 'player': player}
167
+ self.player_pieces[player].append((row, col))
168
+ flag_position = (row, col)
169
+ break
170
+
171
+ # Place Bombs around the Flag if possible
172
+ bombs_to_place = self.piece_counts['Bomb']
173
+ bomb_positions = [
174
+ (flag_position[0] + dr, flag_position[1] + dc)
175
+ for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)] # Adjacent cells
176
+ if 0 <= flag_position[0] + dr < 10 and 0 <= flag_position[1] + dc < 10
177
+ ]
178
+
179
+ for pos in bomb_positions:
180
+ if bombs_to_place > 0 and self.board[pos[0]][pos[1]] is None and pos not in self.lakes:
181
+ self.board[pos[0]][pos[1]] = {'rank': 'Bomb', 'player': player}
182
+ self.player_pieces[player].append(pos)
183
+ bombs_to_place -= 1
184
+
185
+ # Place remaining Bombs at the frontline
186
+ for _ in range(bombs_to_place):
187
+ while True:
188
+ row = random.choice(front_rows)
189
+ col = random.randint(0, 9)
190
+ if self.board[row][col] is None and (row, col) not in self.lakes:
191
+ self.board[row][col] = {'rank': 'Bomb', 'player': player}
192
+ self.player_pieces[player].append((row, col))
193
+ break
194
+
195
+ # Place other pieces randomly
196
+ for piece, count in self.piece_counts.items():
197
+ if piece in ['Flag', 'Bomb']:
198
+ continue # Skip already placed pieces
199
+ for _ in range(count):
200
+ while True:
201
+ row = random.choice(all_rows)
202
+ col = random.randint(0, 9)
203
+ if self.board[row][col] is None and (row, col) not in self.lakes:
204
+ self.board[row][col] = {'rank': piece, 'player': player}
205
+ self.player_pieces[player].append((row, col))
206
+ break
207
+
208
+ # Place the lakes
209
+ for row, col in self.lakes:
210
+ self.board[row][col] = "~"
211
+
212
+ return self.board
213
+
214
+
215
+
216
+ def _render_board(self, player_id, full_board: bool = False):
217
+ """
218
+ Renders the board state with fixed-width formatting for uniform alignment.
219
+
220
+ Args:
221
+ player_id (int): The player viewing the board.
222
+ full_board (bool): Whether to render the full board or just the visible pieces.
223
+ """
224
+ # Define abbreviations for each piece
225
+ piece_abbreviations = {
226
+ 'Flag': 'FL', 'Bomb': 'BM', 'Spy': 'SP', 'Scout': 'SC', 'Miner': 'MN',
227
+ 'Sergeant': 'SG', 'Lieutenant': 'LT', 'Captain': 'CP', 'Major': 'MJ',
228
+ 'Colonel': 'CL', 'General': 'GN', 'Marshal': 'MS'
229
+ }
230
+
231
+ res = []
232
+ column_headers = " " + " ".join([f"{i:>3}" for i in range(10)]) # Align column numbers
233
+ res.append(column_headers + "\n")
234
+
235
+ for row in range(10):
236
+ row_label = chr(row + 65) # Convert row index to a letter (A, B, C, ...)
237
+ row_render = [f"{row_label:<3}"] # Add row label with fixed width
238
+ for col in range(10):
239
+ if (row, col) in self.lakes:
240
+ cell = " ~ " # Lakes
241
+ elif self.board[row][col] is None:
242
+ cell = " . " # Empty space
243
+ else:
244
+ piece = self.board[row][col]
245
+ abbreviation = piece_abbreviations[piece['rank']]
246
+ if full_board:
247
+ cell = f" {abbreviation.lower() if piece['player'] == 0 else abbreviation.upper()} " # Full board view
248
+ elif piece['player'] == player_id:
249
+ displayed_piece = abbreviation.upper()
250
+ cell = f" {displayed_piece} "
251
+ else:
252
+ cell = " ? " # Hidden opponent piece
253
+ row_render.append(cell)
254
+
255
+ res.append("".join(row_render) + "\n")
256
+
257
+ return "".join(res)
258
+
259
+
260
+
261
+ def step(self, action: str) -> Tuple[bool, ta.Info]:
262
+ """ Execute an action in the environment """
263
+ player_id = self.state.current_player_id
264
+
265
+ ## update the observation
266
+ self.state.add_observation(from_id=player_id, to_id=player_id, message=action, observation_type=ta.ObservationType.PLAYER_ACTION)
267
+
268
+ ## action search pattern
269
+ action_search_pattern = re.compile(r"\[([A-J])([0-9]) ([A-J])([0-9])\]", re.IGNORECASE)
270
+ match = action_search_pattern.search(action)
271
+
272
+ if match is None:
273
+ reason=f"Invalid action format. Player {player_id} did not input a move in the format [A0 B0]."
274
+ self.state.set_invalid_move(reason=reason)
275
+
276
+ else:
277
+ src_row, src_col, dest_row, dest_col = match.groups()
278
+ src_row, dest_row = src_row.upper(), dest_row.upper()
279
+ source = f"{src_row}{src_col}"
280
+ dest = f"{dest_row}{dest_col}"
281
+ src_row, src_col = ord(src_row) - 65, int(src_col)
282
+ dest_row, dest_col = ord(dest_row) - 65, int(dest_col)
283
+
284
+
285
+ ## check if the source and destination are valid
286
+ if self._validate_move(player_id=player_id, src_row=src_row, src_col=src_col, dest_row=dest_row, dest_col=dest_col):
287
+
288
+ attacking_piece = self.board[src_row][src_col]
289
+ target_piece = self.board[dest_row][dest_col]
290
+
291
+ if target_piece is None:
292
+ ## move to an empty square
293
+ self.board[dest_row][dest_col] = attacking_piece
294
+ self.board[src_row][src_col] = None
295
+ self.player_pieces[player_id].remove((src_row, src_col))
296
+ self.player_pieces[player_id].append((dest_row, dest_col))
297
+
298
+ ## add the observation to both players separately
299
+ message=f"You have moved your piece from {source} to {dest}."
300
+ self.state.add_observation(from_id=-1, to_id=player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
301
+
302
+ message=f"Player {player_id} has moved a piece from {source} to {dest}."
303
+ self.state.add_observation(from_id=-1, to_id=1-player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
304
+
305
+ else:
306
+ ## battle
307
+ attacking_rank = self.piece_ranks[attacking_piece['rank']]
308
+ target_rank = self.piece_ranks[target_piece['rank']]
309
+ if attacking_rank == target_rank:
310
+ ## both pieces are removed
311
+ self.board[src_row][src_col] = None
312
+ self.board[dest_row][dest_col] = None
313
+ self.player_pieces[player_id].remove((src_row, src_col))
314
+ self.player_pieces[1 - player_id].remove((dest_row, dest_col))
315
+
316
+ ## add the observation to both players separately
317
+ message=f"You have moved your piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the ranks are the same, both pieces lost."
318
+ self.state.add_observation(from_id=-1, to_id=player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
319
+
320
+ message=f"Player {player_id} has moved a piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the ranks are the same, both pieces lost."
321
+ self.state.add_observation(from_id=-1, to_id=1 - player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
322
+
323
+ elif target_piece['rank'] == 'Bomb':
324
+ if attacking_piece['rank'] == 'Miner':
325
+ ## Miner defuses the bomb
326
+ self.board[dest_row][dest_col] = attacking_piece
327
+ self.board[src_row][src_col] = None
328
+ self.player_pieces[player_id].remove((src_row, src_col))
329
+ self.player_pieces[player_id].append((dest_row, dest_col))
330
+
331
+ ## add the observation to both players separately
332
+ message=f"You have moved your piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As miners can defuse bombs, you won the battle."
333
+ self.state.add_observation(from_id=-1, to_id=player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
334
+
335
+ message=f"Player {player_id} has moved a piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As miners can defuse bombs, you lost the battle."
336
+ self.state.add_observation(from_id=-1, to_id=1-player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
337
+
338
+ else:
339
+ ## attacking piece is destroyed
340
+ self.board[src_row][src_col] = None
341
+ self.player_pieces[player_id].remove((src_row, src_col))
342
+
343
+ ## add the observation to both players separately
344
+ message=f"You have moved your piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is not a miner, you lost the battle."
345
+ self.state.add_observation(from_id=-1, to_id=player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
346
+
347
+ message=f"Player {player_id} has moved a piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is not a miner, you won the battle."
348
+ self.state.add_observation(from_id=-1, to_id=1-player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
349
+
350
+ elif target_piece['rank'] == 'Flag':
351
+ self.board[dest_row][dest_col] = attacking_piece
352
+ self.board[src_row][src_col] = None
353
+ self.player_pieces[player_id].remove((src_row, src_col))
354
+ self.player_pieces[player_id].append((dest_row, dest_col))
355
+ self.player_pieces[1 - player_id].remove((dest_row, dest_col))
356
+ ## game over
357
+ self.state.set_winner(player_id=player_id,reason=[f"Player {player_id} has captured the opponent's flag!"])
358
+ elif attacking_piece['rank'] == 'Spy' and target_piece['rank'] == 'Marshal':
359
+ ## Spy beats Marshal only if spy attacks first
360
+ self.board[dest_row][dest_col] = attacking_piece
361
+ self.board[src_row][src_col] = None
362
+ self.player_pieces[player_id].remove((src_row, src_col))
363
+ self.player_pieces[player_id].append((dest_row, dest_col))
364
+ self.player_pieces[1 - player_id].remove((dest_row, dest_col))
365
+
366
+ ## add the observation to both players separately
367
+ message=f"You have moved your piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is a spy and the destination is a marshall, you won the battle."
368
+ self.state.add_observation(from_id=-1, to_id=player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
369
+
370
+ message=f"Player {player_id} has moved a piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is a spy and the destination is a marshall, you lost the battle."
371
+ self.state.add_observation(from_id=-1, to_id=1-player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
372
+
373
+ elif attacking_rank > target_rank:
374
+ ## attacker wins
375
+ self.board[dest_row][dest_col] = attacking_piece
376
+ self.board[src_row][src_col] = None
377
+ self.player_pieces[player_id].remove((src_row, src_col))
378
+ self.player_pieces[player_id].append((dest_row, dest_col))
379
+ self.player_pieces[1 - player_id].remove((dest_row, dest_col))
380
+
381
+ ## add the observation to both players separately
382
+ message=f"You have moved your piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is a higher rank than the destination, you won the battle."
383
+ self.state.add_observation(from_id=-1, to_id=player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
384
+
385
+ message=f"Player {player_id} has moved a piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is a higher rank than the destination, you lost the battle."
386
+ self.state.add_observation(from_id=-1, to_id=1-player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
387
+
388
+ else:
389
+ ## defender wins
390
+ self.board[src_row][src_col] = None
391
+ self.player_pieces[player_id].remove((src_row, src_col))
392
+
393
+ ## add the observation to both players separately
394
+ message=f"You have moved your piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is a lower rank than the destination, you lost the battle."
395
+ self.state.add_observation(from_id=-1, to_id=player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
396
+
397
+ message=f"Player {player_id} has moved a piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is a lower rank than the destination, you won the battle."
398
+ self.state.add_observation(from_id=-1, to_id=1-player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
399
+
400
+ ## check if the game is over
401
+ if self._check_winner():
402
+ reason=f"Player {self._check_winner()} wins! Player {1 - self._check_winner()} has no more movable pieces."
403
+ self.state.set_winner(player_id=self._check_winner(), reason=reason)
404
+
405
+ ## update the rendered board
406
+ self.state.game_state["rendered_board"] = self._render_board(player_id=player_id, full_board=True)
407
+
408
+ result = self.state.step()
409
+ self._observe_current_state()
410
+ return result
411
+
412
+ def _validate_move(self, player_id, src_row, src_col, dest_row, dest_col):
413
+ """
414
+ Validates the move based on the game rules.
415
+
416
+ Args:
417
+ player_id (int): The ID of the player making the move.
418
+ src_row (int): The row of the source position.
419
+ src_col (int): The column of the source position.
420
+ dest_row (int): The row of the destination position.
421
+ dest_col (int): The column of the destination position.
422
+ """
423
+ if not (0 <= src_row < 10 and 0 <= src_col < 10 and 0 <= dest_row < 10 and 0 <= dest_col < 10):
424
+ reason=f"Invalid action format. Player {player_id} did not input valid coordinates."
425
+ self.state.set_invalid_move(reason=reason)
426
+ return False
427
+
428
+ if self.board[src_row][src_col] is None or self.board[src_row][src_col]['player'] != player_id:
429
+ reason=f"Invalid action format. Player {player_id} must move one of their own pieces."
430
+ self.state.set_invalid_move(reason=reason)
431
+ return False
432
+
433
+ if abs(src_row - dest_row) + abs(src_col - dest_col) != 1 and self.board[src_row][src_col]['rank'].lower() == 'scout':
434
+ ## check if there's a piece in between the source and destination
435
+ if src_row == dest_row:
436
+ for col in range(min(src_col, dest_col) + 1, max(src_col, dest_col)):
437
+ if self.board[src_row][col] is not None:
438
+ reason=f"Invalid action format. Player {player_id} cannot move a scout through other pieces."
439
+ self.state.set_invalid_move(reason=reason)
440
+ return False
441
+ elif src_col == dest_col:
442
+ for row in range(min(src_row, dest_row) + 1, max(src_row, dest_row)):
443
+ if self.board[row][src_col] is not None:
444
+ reason=f"Invalid action format. Player {player_id} cannot move a scout through other pieces."
445
+ self.state.set_invalid_move(reason=reason)
446
+ return False
447
+ else:
448
+ reason=f"Invalid action format. Player {player_id} cannot move a scout diagonally."
449
+ self.state.set_invalid_move(reason=reason)
450
+ return False
451
+
452
+ if abs(src_row - dest_row) + abs(src_col - dest_col) != 1 and self.board[src_row][src_col]['rank'].lower() != 'scout':
453
+ ## ! - by right, only scouts can move more than one square at a time but we are not implementing that yet
454
+ reason=f"Invalid action format. Pieces, apart from scouts, can only move one square at a time."
455
+ self.state.set_invalid_move(reason=reason)
456
+ return False
457
+
458
+ if self.board[dest_row][dest_col] is not None:
459
+ if (dest_row, dest_col) in self.lakes:
460
+ reason=f"Invalid action format. Player {player_id} cannot move into the lake."
461
+ self.state.set_invalid_move(reason=reason)
462
+ return False
463
+
464
+ elif self.board[dest_row][dest_col]['player'] == player_id:
465
+ reason=f"Invalid action format. Player {player_id} cannot move onto their own piece."
466
+ self.state.set_invalid_move(reason=reason)
467
+ return False
468
+
469
+ if self.board[src_row][src_col]['rank'].lower() in ['bomb','flag']:
470
+ reason=f"Invalid action format. Player {player_id} cannot move a bomb or flag."
471
+ self.state.set_invalid_move(reason=reason)
472
+ return False
473
+
474
+ return True
475
+
476
+ def _check_winner(self):
477
+ """
478
+ determine which player has no more pieces that are not bombs or flags.
479
+ """
480
+ for player in range(2):
481
+ if all([self.board[row][col]['rank'] in ['Bomb', 'Flag'] for row, col in self.player_pieces[player]]):
482
+ return 1 - player
483
+ return None
484
+