DarshanScripts commited on
Commit
fb0f7f4
·
verified ·
1 Parent(s): 6ef39ba

Upload stratego\env\backup\edited_env\Stratego\env.py with huggingface_hub

Browse files
stratego//env//backup//edited_env//Stratego//env.py ADDED
@@ -0,0 +1,612 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re, random
2
+ from typing import Optional, Dict, Tuple, List, Any
3
+
4
+ import textarena as ta
5
+
6
+ class StrategoEnv(ta.Env):
7
+ """ A two-player implementation of the board game Stratego """
8
+ def __init__(self):
9
+ """
10
+ Initialize the environment.
11
+ """
12
+ ## set up the board items
13
+ self.piece_counts = {
14
+ 'Flag': 1, 'Bomb': 6, 'Spy': 1, 'Scout': 8, 'Miner': 5,
15
+ 'Sergeant': 4, 'Lieutenant': 4, 'Captain': 4, 'Major': 3,
16
+ 'Colonel': 2, 'General': 1, 'Marshal': 1
17
+ }
18
+ self.piece_ranks = {
19
+ 'Flag': 0, 'Bomb': 11, 'Spy': 1, 'Scout': 2, 'Miner': 3,
20
+ 'Sergeant': 4, 'Lieutenant': 5, 'Captain': 6, 'Major': 7,
21
+ 'Colonel': 8, 'General': 9, 'Marshal': 10
22
+ }
23
+ self.lakes = [(4, 2), (4, 3), (5, 2), (5, 3), (4, 6), (4, 7), (5, 6), (5, 7)]
24
+ self.player_pieces = {0: [], 1: []}
25
+ self.board = [[None for _ in range(10)] for _ in range(10)]
26
+ #(13 Nov 2025) New Comment : to initializes a turn counter, which can be used, when declaring a draw if the game goes on for too long without a winner.
27
+ self.turn_count = 0
28
+
29
+ @property
30
+ def terminal_render_keys(self):
31
+ return ["rendered_board"]
32
+
33
+ def reset(self, num_players: int, seed: Optional[int]=None):
34
+ """ Reset the environment to start a new game """
35
+ self.state = ta.TwoPlayerState(num_players=num_players, seed=seed)
36
+ # (13 Nov 2025) New Comment : reset the turn counter at the start of a new game.
37
+ self.turn_count = 0
38
+
39
+ ## populate the board
40
+ self.board = self._populate_board()
41
+
42
+ ## initialise the game state
43
+ rendered_board = self._render_board(player_id=None, full_board=True)
44
+ game_state={"board": self.board, "player_pieces": self.player_pieces, "rendered_board": rendered_board}
45
+ self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
46
+ self._observe_current_state()
47
+
48
+ def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]):
49
+ """
50
+ Generates the player prompt for the current player.
51
+
52
+ Args:
53
+ player_id (int): The ID of the current player.
54
+ game_state (Dict[str, Any]): The current game state.
55
+ """
56
+ prompt = (
57
+ f"You are Player {player_id} in Stratego.\n"
58
+ "Your goal is to capture your opponent's Flag or eliminate all of their movable pieces.\n"
59
+ "Your army has been placed for you on the board, including your Flag, Bombs, and other pieces of varying ranks.\n"
60
+ "\n"
61
+ "### Gameplay Instructions\n"
62
+ "1. **Movement Rules:**\n"
63
+ " - On your turn, you can move one piece by one step to an adjacent square (up, down, left, or right) that is already occupied with your pieces.\n"
64
+ " - Example: A piece can move from A1 to B1 or A1 to A2 if B1 and A2 are not placed with the player's own pieces.\n"
65
+ " - If the selected piece is a Bomb or a Flag, it cannot be moved.\n"
66
+ # " - **Scout Movement:** Scouts, on the other hand, can move multiple steps in a straight line (horizontally or vertically), but strictly only on one condition.\n"
67
+ # " - The condition is that Scouts cannot jump over any piece (your own or your opponent's).\n"
68
+ # " - Example: If there is a piece between the Scout and its destination, the Scout cannot move to the destination.\n"
69
+ # " - This will be indicated as an invalid move which makes you lose the game.\n"
70
+ "2. **Battles:**\n"
71
+ " - If you move onto a square occupied by an opponent's piece, then a battle will occur:\n"
72
+ " - The piece with the higher rank wins and eliminates the opponent's piece.\n"
73
+ " - If the ranks are equal, both pieces are removed from the board.\n"
74
+ " - **Special Cases:**\n"
75
+ " - Bombs eliminate most attacking pieces except Miners, which defuse Bombs.\n"
76
+ " - Spies can defeat the Marshal if the Spy attacks first but lose to all other pieces.\n"
77
+ "3. **Strategic Goals:**\n"
78
+ " - Identify your opponent's pieces through their movements and battles.\n"
79
+ " - Protect your Flag while attempting to capture your opponent's Flag.\n"
80
+ " - Use Scouts strategically to gain information about your opponent's pieces and attack weak ones.\n"
81
+ "\n"
82
+ "### How to Make a Move:\n"
83
+ "1. Specify the coordinates of the piece you want to move and its destination.\n"
84
+ "2. Use the format: [A0 B0], where A0 is the source position, and B0 is the destination.\n"
85
+ " - Example: To move a piece from row 0, column 0 to row 1, column 0, input [A0 B0].\n"
86
+ "3. Ensure the destination is valid according to the movement rules above.\n"
87
+ "\n"
88
+ "### Important Notes:\n"
89
+ "- The board will show your pieces and their positions, e.g. MN, MS.\n"
90
+ "- The board will also show known positions of your opponent's pieces without revealing their ranks, e.g. ?.\n"
91
+ "- Grids with ~ are lakes and cannot be moved onto.\n"
92
+ "- As a suggestion, start your game by moving your pieces that are on the front lines to gain information about your opponent's pieces. Player 0 and player 1's frontlines are row D and G respectively.\n"
93
+ "\n"
94
+ "Here is the current board state:\n"
95
+ )
96
+ return prompt
97
+
98
+ def _observe_current_state(self):
99
+ """
100
+ Observe the current state of the game and update the state with the rendered board
101
+ and gives the available moves for the current player.
102
+ """
103
+ player_id = self.state.current_player_id
104
+ available_moves = []
105
+
106
+ for row in range(10):
107
+ for col in range(10):
108
+ piece = self.board[row][col]
109
+ if isinstance(piece, dict) and piece['player'] == player_id:
110
+ # Skip immovable pieces
111
+ if piece['rank'].lower() in ['bomb', 'flag']:
112
+ continue
113
+
114
+ # Check if this is a scout (can move multiple squares)
115
+ is_scout = piece['rank'].lower() == 'scout'
116
+
117
+ # Check all four directions
118
+ for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
119
+ if is_scout:
120
+ # Scout can move multiple squares in this direction
121
+ distance = 1
122
+ while True:
123
+ new_row = row + (dr * distance)
124
+ new_col = col + (dc * distance)
125
+
126
+ # Check if still within board bounds
127
+ if not (0 <= new_row < 10 and 0 <= new_col < 10):
128
+ break
129
+
130
+ target = self.board[new_row][new_col]
131
+
132
+ if target is None:
133
+ # Empty square - scout can move here and continue
134
+ available_moves.append(f"[{chr(row + 65)}{col} {chr(new_row + 65)}{new_col}]")
135
+ distance += 1
136
+ elif isinstance(target, dict) and target['player'] != player_id:
137
+ # Enemy piece - scout can attack but cannot continue past
138
+ available_moves.append(f"[{chr(row + 65)}{col} {chr(new_row + 65)}{new_col}]")
139
+ break
140
+ else:
141
+ # Own piece or other obstacle - scout cannot move here or past
142
+ break
143
+ else:
144
+ # Regular piece - can only move one square
145
+ new_row, new_col = row + dr, col + dc
146
+ if 0 <= new_row < 10 and 0 <= new_col < 10:
147
+ target = self.board[new_row][new_col]
148
+ if (target is None or
149
+ (isinstance(target, dict) and target['player'] != player_id)):
150
+ available_moves.append(f"[{chr(row + 65)}{col} {chr(new_row + 65)}{new_col}]")
151
+
152
+
153
+ # new comment(13 Nov 2025) Store the number of available moves in the game state.
154
+ # This is critical for detecting a "no moves remaining" loss or a stalemate/draw.
155
+ num_available_moves = len(available_moves)
156
+ self.state.game_state[f'available_moves_p{player_id}'] = num_available_moves
157
+
158
+ #Previous code lines for the observation message
159
+ self.state.add_observation(
160
+ message=f"Current Board:\n\n{self._render_board(player_id=player_id, full_board=False)}\nAvailable Moves: " + ", ".join(available_moves),
161
+ observation_type=ta.ObservationType.GAME_BOARD
162
+ )
163
+
164
+ def _populate_board(self):
165
+ """
166
+ Populates the board with pieces for each player strategically.
167
+ """
168
+ for player in range(2):
169
+ # Define rows for each player
170
+ back_rows = range(0, 2) if player == 0 else range(8, 10)
171
+ front_rows = range(2, 4) if player == 0 else range(7, 9)
172
+ all_rows = range(0, 4) if player == 0 else range(6, 10)
173
+
174
+ # Place the Flag strategically
175
+ while True:
176
+ row = random.choice(back_rows)
177
+ col = random.randint(0, 9)
178
+ if (row, col) not in self.lakes and self.board[row][col] is None:
179
+ self.board[row][col] = {'rank': 'Flag', 'player': player}
180
+ self.player_pieces[player].append((row, col))
181
+ flag_position = (row, col)
182
+ break
183
+
184
+ # Place Bombs around the Flag if possible
185
+ bombs_to_place = self.piece_counts['Bomb']
186
+ bomb_positions = [
187
+ (flag_position[0] + dr, flag_position[1] + dc)
188
+ for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)] # Adjacent cells
189
+ if 0 <= flag_position[0] + dr < 10 and 0 <= flag_position[1] + dc < 10
190
+ ]
191
+
192
+ for pos in bomb_positions:
193
+ if bombs_to_place > 0 and self.board[pos[0]][pos[1]] is None and pos not in self.lakes:
194
+ self.board[pos[0]][pos[1]] = {'rank': 'Bomb', 'player': player}
195
+ self.player_pieces[player].append(pos)
196
+ bombs_to_place -= 1
197
+
198
+ # Place remaining Bombs at the frontline
199
+ for _ in range(bombs_to_place):
200
+ while True:
201
+ row = random.choice(front_rows)
202
+ col = random.randint(0, 9)
203
+ if self.board[row][col] is None and (row, col) not in self.lakes:
204
+ self.board[row][col] = {'rank': 'Bomb', 'player': player}
205
+ self.player_pieces[player].append((row, col))
206
+ break
207
+
208
+ # Place other pieces randomly
209
+ for piece, count in self.piece_counts.items():
210
+ if piece in ['Flag', 'Bomb']:
211
+ continue # Skip already placed pieces
212
+ for _ in range(count):
213
+ while True:
214
+ row = random.choice(all_rows)
215
+ col = random.randint(0, 9)
216
+ if self.board[row][col] is None and (row, col) not in self.lakes:
217
+ self.board[row][col] = {'rank': piece, 'player': player}
218
+ self.player_pieces[player].append((row, col))
219
+ break
220
+
221
+ # Place the lakes
222
+ for row, col in self.lakes:
223
+ self.board[row][col] = "~"
224
+
225
+ return self.board
226
+
227
+
228
+
229
+ def _render_board(self, player_id, full_board: bool = False):
230
+ """
231
+ Renders the board state with fixed-width formatting for uniform alignment.
232
+
233
+ Args:
234
+ player_id (int): The player viewing the board.
235
+ full_board (bool): Whether to render the full board or just the visible pieces.
236
+ """
237
+ # Define abbreviations for each piece
238
+ piece_abbreviations = {
239
+ 'Flag': 'FL', 'Bomb': 'BM', 'Spy': 'SP', 'Scout': 'SC', 'Miner': 'MN',
240
+ 'Sergeant': 'SG', 'Lieutenant': 'LT', 'Captain': 'CP', 'Major': 'MJ',
241
+ 'Colonel': 'CL', 'General': 'GN', 'Marshal': 'MS'
242
+ }
243
+
244
+ res = []
245
+ column_headers = " " + " ".join([f"{i:>3}" for i in range(10)]) # Align column numbers
246
+ res.append(column_headers + "\n")
247
+
248
+ for row in range(10):
249
+ row_label = chr(row + 65) # Convert row index to a letter (A, B, C, ...)
250
+ row_render = [f"{row_label:<3}"] # Add row label with fixed width
251
+ for col in range(10):
252
+ if (row, col) in self.lakes:
253
+ cell = " ~ " # Lakes
254
+ elif self.board[row][col] is None:
255
+ cell = " . " # Empty space
256
+ else:
257
+ piece = self.board[row][col]
258
+ abbreviation = piece_abbreviations[piece['rank']]
259
+ if full_board:
260
+ cell = f" {abbreviation.lower() if piece['player'] == 0 else abbreviation.upper()} " # Full board view
261
+ elif piece['player'] == player_id:
262
+ displayed_piece = abbreviation.upper()
263
+ cell = f" {displayed_piece} "
264
+ else:
265
+ cell = " ? " # Hidden opponent piece
266
+ row_render.append(cell)
267
+
268
+ res.append("".join(row_render) + "\n")
269
+
270
+ return "".join(res)
271
+
272
+
273
+
274
+ def step(self, action: str) -> Tuple[bool, ta.Info]:
275
+ # new comment(13 Nov 2025) Increment turn counter
276
+ self.turn_count += 1
277
+ player_id = self.state.current_player_id
278
+
279
+ # new comment(13 Nov 2025) This block fixes Bug #3 (No Moves Remaining).
280
+ # We check if the player has 0 moves *before* parsing their action.
281
+ # This prevents an 'Invalid action' penalty when they have no valid moves.
282
+ num_moves = self.state.game_state.get(f'available_moves_p{player_id}', 1) # Default to 1 to avoid error
283
+ if num_moves == 0:
284
+ # The current player cannot move. Check if the *other* player can.
285
+ if self._has_movable_pieces(1 - player_id):
286
+ # Opponent still has pieces, so current player loses.
287
+ reason = f"Player {player_id} has no valid moves remaining. Player {1 - player_id} wins!"
288
+ self.state.set_winner(player_id=(1 - player_id), reason=reason)
289
+ else:
290
+ # Neither player can move. This is a stalemate (draw).
291
+ reason = "Stalemate: Neither player has any valid moves remaining. The game is a draw."
292
+ self.state.set_winner(player_id=-1, reason=reason) # -1 means draw
293
+
294
+ # Immediately end the game
295
+ return self.state.step()
296
+
297
+ # previous code for executing the action
298
+
299
+ """ Execute an action in the environment """
300
+ player_id = self.state.current_player_id
301
+
302
+ ## update the observation
303
+ self.state.add_observation(from_id=player_id, to_id=player_id, message=action, observation_type=ta.ObservationType.PLAYER_ACTION)
304
+
305
+ ## action search pattern
306
+ action_search_pattern = re.compile(r"\[([A-J])([0-9]) ([A-J])([0-9])\]", re.IGNORECASE)
307
+ match = action_search_pattern.search(action)
308
+
309
+ if match is None:
310
+ reason=f"Invalid action format. Player {player_id} did not input a move in the format [A0 B0]."
311
+ self.state.set_invalid_move(reason=reason)
312
+ try:
313
+ self.state.game_info[player_id]["invalid_move"] = True
314
+ except Exception:
315
+ pass
316
+ self.state.set_winner(player_id=(1 - player_id), reason=reason)
317
+ return self.state.step()
318
+
319
+ else:
320
+ src_row, src_col, dest_row, dest_col = match.groups()
321
+ src_row, dest_row = src_row.upper(), dest_row.upper()
322
+ source = f"{src_row}{src_col}"
323
+ dest = f"{dest_row}{dest_col}"
324
+ src_row, src_col = ord(src_row) - 65, int(src_col)
325
+ dest_row, dest_col = ord(dest_row) - 65, int(dest_col)
326
+
327
+
328
+ ## check if the source and destination are valid
329
+ if self._validate_move(player_id=player_id, src_row=src_row, src_col=src_col, dest_row=dest_row, dest_col=dest_col):
330
+
331
+ attacking_piece = self.board[src_row][src_col]
332
+ target_piece = self.board[dest_row][dest_col]
333
+
334
+ if target_piece is None:
335
+ ## move to an empty square
336
+ self.board[dest_row][dest_col] = attacking_piece
337
+ self.board[src_row][src_col] = None
338
+ self.player_pieces[player_id].remove((src_row, src_col))
339
+ self.player_pieces[player_id].append((dest_row, dest_col))
340
+
341
+ ## add the observation to both players separately
342
+ message=f"You have moved your piece from {source} to {dest}."
343
+ self.state.add_observation(from_id=-1, to_id=player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
344
+
345
+ message=f"Player {player_id} has moved a piece from {source} to {dest}."
346
+ self.state.add_observation(from_id=-1, to_id=1-player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
347
+
348
+ else:
349
+ ## battle
350
+ attacking_rank = self.piece_ranks[attacking_piece['rank']]
351
+ target_rank = self.piece_ranks[target_piece['rank']]
352
+ if attacking_rank == target_rank:
353
+ ## both pieces are removed
354
+ self.board[src_row][src_col] = None
355
+ self.board[dest_row][dest_col] = None
356
+ self.player_pieces[player_id].remove((src_row, src_col))
357
+ self.player_pieces[1 - player_id].remove((dest_row, dest_col))
358
+
359
+ ## add the observation to both players separately
360
+ message=f"You have moved your piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the ranks are the same, both pieces lost."
361
+ self.state.add_observation(from_id=-1, to_id=player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
362
+
363
+ message=f"Player {player_id} has moved a piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the ranks are the same, both pieces lost."
364
+ self.state.add_observation(from_id=-1, to_id=1 - player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
365
+
366
+ elif target_piece['rank'] == 'Bomb':
367
+ if attacking_piece['rank'] == 'Miner':
368
+ ## Miner defuses the bomb
369
+ self.board[dest_row][dest_col] = attacking_piece
370
+ self.board[src_row][src_col] = None
371
+ self.player_pieces[player_id].remove((src_row, src_col))
372
+ self.player_pieces[player_id].append((dest_row, dest_col))
373
+
374
+ # (12 Nov 2025)👇 ADD THIS LINE: Remove the Bomb's coordinate from the defender's list
375
+ self.player_pieces[1 - player_id].remove((dest_row, dest_col))
376
+
377
+ ## add the observation to both players separately
378
+ message=f"You have moved your piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As miners can defuse bombs, you won the battle."
379
+ self.state.add_observation(from_id=-1, to_id=player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
380
+
381
+ message=f"Player {player_id} has moved a piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As miners can defuse bombs, you lost the battle."
382
+ self.state.add_observation(from_id=-1, to_id=1-player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
383
+
384
+ else:
385
+ ## attacking piece is destroyed
386
+ self.board[src_row][src_col] = None
387
+ self.player_pieces[player_id].remove((src_row, src_col))
388
+
389
+ ## add the observation to both players separately
390
+ message=f"You have moved your piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is not a miner, you lost the battle."
391
+ self.state.add_observation(from_id=-1, to_id=player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
392
+
393
+ message=f"Player {player_id} has moved a piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is not a miner, you won the battle."
394
+ self.state.add_observation(from_id=-1, to_id=1-player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
395
+
396
+ elif target_piece['rank'] == 'Flag':
397
+ self.board[dest_row][dest_col] = attacking_piece
398
+ self.board[src_row][src_col] = None
399
+ self.player_pieces[player_id].remove((src_row, src_col))
400
+ self.player_pieces[player_id].append((dest_row, dest_col))
401
+ self.player_pieces[1 - player_id].remove((dest_row, dest_col))
402
+ ## game over
403
+
404
+ # Changes below: for the Winner setting(12 Nov 2025)
405
+ reason=f"Player {player_id} has captured the opponent's flag!"
406
+ self.state.set_winner(player_id=player_id,reason=reason)
407
+
408
+ # Immediately end the game and return the final state
409
+ return self.state.step()
410
+
411
+
412
+ elif attacking_piece['rank'] == 'Spy' and target_piece['rank'] == 'Marshal':
413
+ ## Spy beats Marshal only if spy attacks first
414
+ self.board[dest_row][dest_col] = attacking_piece
415
+ self.board[src_row][src_col] = None
416
+ self.player_pieces[player_id].remove((src_row, src_col))
417
+ self.player_pieces[player_id].append((dest_row, dest_col))
418
+ self.player_pieces[1 - player_id].remove((dest_row, dest_col))
419
+
420
+ ## add the observation to both players separately
421
+ message=f"You have moved your piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is a spy and the destination is a marshall, you won the battle."
422
+ self.state.add_observation(from_id=-1, to_id=player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
423
+
424
+ message=f"Player {player_id} has moved a piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is a spy and the destination is a marshall, you lost the battle."
425
+ self.state.add_observation(from_id=-1, to_id=1-player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
426
+
427
+ elif attacking_rank > target_rank:
428
+ ## attacker wins
429
+ self.board[dest_row][dest_col] = attacking_piece
430
+ self.board[src_row][src_col] = None
431
+ self.player_pieces[player_id].remove((src_row, src_col))
432
+ self.player_pieces[player_id].append((dest_row, dest_col))
433
+ self.player_pieces[1 - player_id].remove((dest_row, dest_col))
434
+
435
+ ## add the observation to both players separately
436
+ message=f"You have moved your piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is a higher rank than the destination, you won the battle."
437
+ self.state.add_observation(from_id=-1, to_id=player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
438
+
439
+ message=f"Player {player_id} has moved a piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is a higher rank than the destination, you lost the battle."
440
+ self.state.add_observation(from_id=-1, to_id=1-player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
441
+
442
+ else:
443
+ ## defender wins
444
+ self.board[src_row][src_col] = None
445
+ self.player_pieces[player_id].remove((src_row, src_col))
446
+
447
+ ## add the observation to both players separately
448
+ message=f"You have moved your piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is a lower rank than the destination, you lost the battle."
449
+ self.state.add_observation(from_id=-1, to_id=player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
450
+
451
+ message=f"Player {player_id} has moved a piece from {source} to {dest}. The attacking piece was {attacking_piece['rank']} and the destination piece was {target_piece['rank']}. As the attacker is a lower rank than the destination, you won the battle."
452
+ self.state.add_observation(from_id=-1, to_id=1-player_id, message=message, observation_type=ta.ObservationType.GAME_ACTION_DESCRIPTION)
453
+ else:
454
+ # invalid move -> immediate loss
455
+ try:
456
+ self.state.game_info[player_id]["invalid_move"] = True
457
+ except Exception:
458
+ pass
459
+ self.state.set_winner(player_id=(1 - player_id), reason="Illegal move.")
460
+ return self.state.step()
461
+ # new comment(13 Nov 2025) This block checks for win/draw conditions
462
+ # *after* a move has been successfully made.
463
+
464
+ # 1. Check for Elimination Win (opponent has no movable pieces left)
465
+ winner = self._check_winner()
466
+ if winner is not None:
467
+ reason=f"Player {winner} wins! Player {1 - winner} has no more movable pieces."
468
+ self.state.set_winner(player_id=winner, reason=reason)
469
+
470
+ # 2. Check for Stalemate (Draw)
471
+ elif self._check_stalemate():
472
+ reason = "Stalemate: Neither player has any valid moves remaining. The game is a draw."
473
+ self.state.set_winner(player_id=-1, reason=reason) # -1 means draw
474
+
475
+ # 3. Check for Turn Limit (Draw) - This fixes Bug #2
476
+ elif self.turn_count > 1000: # You can adjust this number
477
+ reason = f"Game ended in a draw (turn limit of {self.turn_count} moves exceeded)."
478
+ self.state.set_winner(player_id=-1, reason=reason)
479
+
480
+ ## update the rendered board
481
+ self.state.game_state["rendered_board"] = self._render_board(player_id=player_id, full_board=True)
482
+
483
+ result = self.state.step()
484
+
485
+ # We must observe the *next* player's state *before* returning
486
+ if not result[0]: # If game is not done
487
+ self._observe_current_state()
488
+
489
+ return result
490
+
491
+ def _validate_move(self, player_id, src_row, src_col, dest_row, dest_col):
492
+ """
493
+ Validates the move based on the game rules.
494
+
495
+ Args:
496
+ player_id (int): The ID of the player making the move.
497
+ src_row (int): The row of the source position.
498
+ src_col (int): The column of the source position.
499
+ dest_row (int): The row of the destination position.
500
+ dest_col (int): The column of the destination position.
501
+ """
502
+ if not (0 <= src_row < 10 and 0 <= src_col < 10 and 0 <= dest_row < 10 and 0 <= dest_col < 10):
503
+ reason=f"Invalid action format. Player {player_id} did not input valid coordinates."
504
+ self.state.set_invalid_move(reason=reason)
505
+ return False
506
+
507
+ if self.board[src_row][src_col] is None or self.board[src_row][src_col]['player'] != player_id:
508
+ reason=f"Invalid action format. Player {player_id} must move one of their own pieces."
509
+ self.state.set_invalid_move(reason=reason)
510
+ return False
511
+
512
+ if abs(src_row - dest_row) + abs(src_col - dest_col) != 1 and self.board[src_row][src_col]['rank'].lower() == 'scout':
513
+ ## check if there's a piece in between the source and destination
514
+ if src_row == dest_row:
515
+ for col in range(min(src_col, dest_col) + 1, max(src_col, dest_col)):
516
+ if self.board[src_row][col] is not None:
517
+ reason=f"Invalid action format. Player {player_id} cannot move a scout through other pieces."
518
+ self.state.set_invalid_move(reason=reason)
519
+ return False
520
+ elif src_col == dest_col:
521
+ for row in range(min(src_row, dest_row) + 1, max(src_row, dest_row)):
522
+ if self.board[row][src_col] is not None:
523
+ reason=f"Invalid action format. Player {player_id} cannot move a scout through other pieces."
524
+ self.state.set_invalid_move(reason=reason)
525
+ return False
526
+ else:
527
+ reason=f"Invalid action format. Player {player_id} cannot move a scout diagonally."
528
+ self.state.set_invalid_move(reason=reason)
529
+ return False
530
+
531
+ if abs(src_row - dest_row) + abs(src_col - dest_col) != 1 and self.board[src_row][src_col]['rank'].lower() != 'scout':
532
+ ## ! - by right, only scouts can move more than one square at a time but we are not implementing that yet
533
+ reason=f"Invalid action format. Pieces, apart from scouts, can only move one square at a time."
534
+ self.state.set_invalid_move(reason=reason)
535
+ return False
536
+
537
+ if self.board[dest_row][dest_col] is not None:
538
+ if (dest_row, dest_col) in self.lakes:
539
+ reason=f"Invalid action format. Player {player_id} cannot move into the lake."
540
+ self.state.set_invalid_move(reason=reason)
541
+ return False
542
+
543
+ elif self.board[dest_row][dest_col]['player'] == player_id:
544
+ reason=f"Invalid action format. Player {player_id} cannot move onto their own piece."
545
+ self.state.set_invalid_move(reason=reason)
546
+ return False
547
+
548
+ if self.board[src_row][src_col]['rank'].lower() in ['bomb','flag']:
549
+ reason=f"Invalid action format. Player {player_id} cannot move a bomb or flag."
550
+ self.state.set_invalid_move(reason=reason)
551
+ return False
552
+
553
+ return True
554
+
555
+ #Working on below for new code to deal with Non Type error
556
+ # def _check_winner(self):
557
+ # """
558
+ # determine which player has no more pieces that are not bombs or flags.
559
+ # """
560
+ # for player in range(2):
561
+ # if all([self.board[row][col]['rank'] in ['Bomb', 'Flag'] for row, col in self.player_pieces[player]]):
562
+ # return 1 - player
563
+ # return None
564
+
565
+ def _check_winner(self):
566
+ """
567
+ Determine which player has no more pieces that are not bombs or flags.
568
+ FIX: Skips coordinates that are empty on the board (already removed).
569
+ """
570
+ for player in range(2):
571
+ # NEW LOGIC: Filter out None/empty squares before checking rank
572
+ movable_pieces_remain = any([
573
+ self.board[row][col] is not None and self.board[row][col]['rank'] not in ['Bomb', 'Flag']
574
+ for row, col in self.player_pieces[player]
575
+ ])
576
+
577
+ # Original logic: If NO movable pieces remain, the opponent (1 - player) wins.
578
+ if not movable_pieces_remain:
579
+ return 1 - player
580
+ return None
581
+
582
+ # new comment(13 Nov 2025) These are new helper methods for win/draw checking.
583
+
584
+ def _has_movable_pieces(self, player_id: int) -> bool:
585
+ """Helper function to check if a player has any movable pieces left."""
586
+ # This uses the same logic as your _check_winner, just isolated
587
+ return any([
588
+ self.board[row][col] is not None and self.board[row][col]['rank'] not in ['Bomb', 'Flag']
589
+ for row, col in self.player_pieces[player_id]
590
+ ])
591
+
592
+ def _check_stalemate(self) -> bool:
593
+ """
594
+ Checks for two types of stalemate (draw):
595
+ 1. Neither player has any movable pieces left.
596
+ 2. Both players have 0 available moves (e.g., all pieces are blocked).
597
+ """
598
+ # 1. Check if both players are eliminated (e.g., last two pieces trade)
599
+ p0_has_movable = self._has_movable_pieces(0)
600
+ p1_has_movable = self._has_movable_pieces(1)
601
+ if not p0_has_movable and not p1_has_movable:
602
+ return True # Both players lost all pieces
603
+
604
+ # 2. Check if both players are blocked (0 moves)
605
+ # This relies on _observe_current_state being called
606
+ p0_move_count = self.state.game_state.get('available_moves_p0', 1) # Default to 1
607
+ p1_move_count = self.state.game_state.get('available_moves_p1', 1)
608
+
609
+ if p0_move_count == 0 and p1_move_count == 0:
610
+ return True # Both players are blocked
611
+
612
+ return False