Kaushik Rajan commited on
Commit
5c7fb25
Β·
1 Parent(s): b1670f3

fix(tictactoe): Fix minimax bug and implement clickable board

Browse files
Files changed (1) hide show
  1. app.py +66 -66
app.py CHANGED
@@ -108,29 +108,29 @@ def create_interface():
108
  gr.Markdown("Play TicTacToe against an AI, see its step-by-step reasoning, and learn how it thinks!")
109
 
110
  if GAMES_AVAILABLE:
111
- # TicTacToe specific functions
112
- def get_tictactoe_board_html():
113
- """Get current TicTacToe board as HTML with emojis."""
114
- board = tictactoe_env.board
115
- html = '<table style="border: 1px solid black; text-align: center; font-size: 24px;">'
116
- for row in range(3):
117
- html += '<tr>'
118
- for col in range(3):
119
- cell = board[row, col]
120
- if cell == 1:
121
- content = '❌'
122
- elif cell == -1:
123
- content = 'β­•'
124
- else:
125
- content = f'{row*3 + col}'
126
- html += f'<td style="border: 1px solid black; width: 50px; height: 50px;">{content}</td>'
127
- html += '</tr>'
128
- html += '</table>'
129
- return html
130
 
131
- def get_valid_tictactoe_positions():
132
- """Get list of valid position strings."""
133
- return [str(i) for i in tictactoe_env._get_valid_actions()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
  ttt_stats = gr.State({'wins': 0, 'losses': 0, 'draws': 0})
136
 
@@ -138,9 +138,10 @@ def create_interface():
138
  """Minimax algorithm to find the best move."""
139
 
140
  # Base cases
141
- if tictactoe_env._check_winner(1):
 
142
  return -10, None
143
- elif tictactoe_env._check_winner(-1):
144
  return 10, None
145
  elif tictactoe_env._is_draw():
146
  return 0, None
@@ -169,81 +170,74 @@ def create_interface():
169
  return best_score, best_move
170
 
171
  def play_tictactoe(position, stats):
172
- """Play a TicTacToe move."""
173
  if tictactoe_env.game_over:
174
- yield get_tictactoe_board_html(), "Game is over! Click 'New Game' to start again.", "", stats, get_valid_tictactoe_positions()
175
  return
176
 
177
  try:
178
  position = int(position)
179
- if position < 0 or position > 8:
180
- raise ValueError("Invalid position")
181
 
182
  # Human move
183
- obs, reward, terminated, truncated, info = tictactoe_env.step(position)
184
 
185
- if terminated:
186
  winner = "You" if tictactoe_env.winner == 1 else "AI" if tictactoe_env.winner == -1 else "Draw"
187
  if winner == "You": stats['wins'] += 1
188
  elif winner == "AI": stats['losses'] += 1
189
  else: stats['draws'] += 1
190
- yield get_tictactoe_board_html(), f"Game Over! {winner} won!", f"Final reward: {reward}", stats, []
191
  return
192
 
193
  # Show "thinking" indicator
194
- yield get_tictactoe_board_html(), "AI is thinking...", "🧠...", stats, []
195
 
196
  # AI move
197
  _, ai_action = minimax(tictactoe_env.board.copy(), -1)
198
- if ai_action is None: # Handle case where minimax returns no move (e.g., game over)
199
  valid_actions = tictactoe_env._get_valid_actions()
200
- if not valid_actions: # No actions left
201
- yield get_tictactoe_board_html(), "Game is a draw!", "", stats, []
202
  return
203
  ai_action = random.choice(valid_actions)
204
 
205
-
206
  reasoning_prompt = f"In TicTacToe, the board is currently: {tictactoe_env.board.flatten().tolist()}. The human player (X) played position {position}. I am the AI (O). The available moves are {tictactoe_env._get_valid_actions()}. I have analyzed the game tree using minimax and determined the optimal move is {ai_action}. Explain my strategy."
207
  reasoning = generate_reasoning(reasoning_prompt)
208
- obs, reward, terminated, truncated, info = tictactoe_env.step(ai_action)
209
 
210
- if terminated:
211
  winner = "You" if tictactoe_env.winner == 1 else "AI" if tictactoe_env.winner == -1 else "Draw"
212
  if winner == "You": stats['wins'] += 1
213
  elif winner == "AI": stats['losses'] += 1
214
  else: stats['draws'] += 1
215
- yield get_tictactoe_board_html(), f"Game Over! {winner} won! AI played {ai_action}.", reasoning, stats, []
216
  else:
217
- yield get_tictactoe_board_html(), f"AI played position {ai_action}. Your turn!", reasoning, stats, get_valid_tictactoe_positions()
218
 
219
  except Exception as e:
220
- yield get_tictactoe_board_html(), f"Error: {str(e)}", "", stats, get_valid_tictactoe_positions()
221
-
222
  def reset_tictactoe(stats):
223
  """Reset TicTacToe game."""
224
  tictactoe_env.reset()
225
- return get_tictactoe_board_html(), "New game started! You are ❌ (X). Choose a position from the dropdown.", "AI will show its reasoning here...", stats, get_valid_tictactoe_positions()
226
 
227
  # Simplified layout focusing only on TicTacToe
228
- gr.Markdown("### Play TicTacToe against AI\nYou are ❌ (X) and go first. Get 3 in a row to win! **How AI Thinks**: AI will analyze the board and explain its moves.\nPositions: Top-left=0, bottom-right=8.")
229
-
230
- with gr.Row():
231
- with gr.Column(scale=2):
232
- ttt_board = gr.HTML(
233
- label="Game Board",
234
- value=get_tictactoe_board_html()
235
- )
236
-
237
- with gr.Column(scale=1):
238
- ttt_position = gr.Dropdown(
239
- label="Your Move (Valid Positions)",
240
- choices=get_valid_tictactoe_positions()
241
- )
242
  with gr.Row():
243
- ttt_play_btn = gr.Button("Play Move", variant="primary")
244
- ttt_reset_btn = gr.Button("New Game", variant="secondary")
 
 
 
 
 
245
  ttt_stats_display = gr.Markdown(value="Wins: 0 | Losses: 0 | Draws: 0")
246
-
247
  ttt_message = gr.Textbox(
248
  label="Game Status",
249
  value="Choose a position to start!",
@@ -257,16 +251,22 @@ def create_interface():
257
  lines=3,
258
  interactive=False
259
  )
 
 
 
 
 
 
 
 
 
 
 
260
 
261
- ttt_play_btn.click(
262
- fn=play_tictactoe,
263
- inputs=[ttt_position, ttt_stats],
264
- outputs=[ttt_board, ttt_message, ttt_reasoning, ttt_stats, ttt_position]
265
- )
266
  ttt_reset_btn.click(
267
  fn=reset_tictactoe,
268
  inputs=[ttt_stats],
269
- outputs=[ttt_board, ttt_message, ttt_reasoning, ttt_stats, ttt_position]
270
  )
271
  # Update stats display on changes
272
  ttt_stats.change(
 
108
  gr.Markdown("Play TicTacToe against an AI, see its step-by-step reasoning, and learn how it thinks!")
109
 
110
  if GAMES_AVAILABLE:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
+ def update_board_buttons():
113
+ """Create a list of gr.Button updates from the current board state."""
114
+ updates = []
115
+ for i in range(9):
116
+ row, col = divmod(i, 3)
117
+ cell = tictactoe_env.board[row, col]
118
+ val = ""
119
+ interactive = True
120
+ if cell == 1:
121
+ val = '❌'
122
+ interactive = False
123
+ elif cell == -1:
124
+ val = 'β­•'
125
+ interactive = False
126
+
127
+ if tictactoe_env.game_over:
128
+ interactive = False
129
+
130
+ updates.append(gr.Button(value=val, interactive=interactive))
131
+ return updates
132
+
133
+ # TicTacToe specific functions (no longer need get_tictactoe_board_html)
134
 
135
  ttt_stats = gr.State({'wins': 0, 'losses': 0, 'draws': 0})
136
 
 
138
  """Minimax algorithm to find the best move."""
139
 
140
  # Base cases
141
+ winner = tictactoe_env._check_winner()
142
+ if winner == 1: # Human wins
143
  return -10, None
144
+ elif winner == -1: # AI wins
145
  return 10, None
146
  elif tictactoe_env._is_draw():
147
  return 0, None
 
170
  return best_score, best_move
171
 
172
  def play_tictactoe(position, stats):
173
+ """Play a TicTacToe move and yield updates for the button grid."""
174
  if tictactoe_env.game_over:
175
+ yield *update_board_buttons(), "Game is over! Click 'New Game' to start again.", "", stats
176
  return
177
 
178
  try:
179
  position = int(position)
 
 
180
 
181
  # Human move
182
+ tictactoe_env.step(position)
183
 
184
+ if tictactoe_env.game_over:
185
  winner = "You" if tictactoe_env.winner == 1 else "AI" if tictactoe_env.winner == -1 else "Draw"
186
  if winner == "You": stats['wins'] += 1
187
  elif winner == "AI": stats['losses'] += 1
188
  else: stats['draws'] += 1
189
+ yield *update_board_buttons(), f"Game Over! {winner} won!", "", stats
190
  return
191
 
192
  # Show "thinking" indicator
193
+ yield *update_board_buttons(), "AI is thinking...", "🧠...", stats
194
 
195
  # AI move
196
  _, ai_action = minimax(tictactoe_env.board.copy(), -1)
197
+ if ai_action is None:
198
  valid_actions = tictactoe_env._get_valid_actions()
199
+ if not valid_actions:
200
+ yield *update_board_buttons(), "Game is a draw!", "", stats
201
  return
202
  ai_action = random.choice(valid_actions)
203
 
 
204
  reasoning_prompt = f"In TicTacToe, the board is currently: {tictactoe_env.board.flatten().tolist()}. The human player (X) played position {position}. I am the AI (O). The available moves are {tictactoe_env._get_valid_actions()}. I have analyzed the game tree using minimax and determined the optimal move is {ai_action}. Explain my strategy."
205
  reasoning = generate_reasoning(reasoning_prompt)
206
+ tictactoe_env.step(ai_action)
207
 
208
+ if tictactoe_env.game_over:
209
  winner = "You" if tictactoe_env.winner == 1 else "AI" if tictactoe_env.winner == -1 else "Draw"
210
  if winner == "You": stats['wins'] += 1
211
  elif winner == "AI": stats['losses'] += 1
212
  else: stats['draws'] += 1
213
+ yield *update_board_buttons(), f"Game Over! {winner} won! AI played {ai_action}.", reasoning, stats
214
  else:
215
+ yield *update_board_buttons(), f"AI played position {ai_action}. Your turn!", reasoning, stats
216
 
217
  except Exception as e:
218
+ yield *update_board_buttons(), f"Error: {str(e)}", "", stats
219
+
220
  def reset_tictactoe(stats):
221
  """Reset TicTacToe game."""
222
  tictactoe_env.reset()
223
+ return *update_board_buttons(), "New game started! You are ❌ (X). Click a square to play.", "AI will show its reasoning here...", stats
224
 
225
  # Simplified layout focusing only on TicTacToe
226
+ gr.Markdown("### Play TicTacToe against AI\nYou are ❌ (X) and go first. Click on a square to make your move.")
227
+
228
+ with gr.Column():
229
+ board_buttons = []
230
+ for i in range(3):
 
 
 
 
 
 
 
 
 
231
  with gr.Row():
232
+ for j in range(3):
233
+ pos = i * 3 + j
234
+ button = gr.Button("", elem_id=f"ttt-cell-{pos}")
235
+ board_buttons.append(button)
236
+
237
+ with gr.Row():
238
+ ttt_reset_btn = gr.Button("New Game", variant="secondary")
239
  ttt_stats_display = gr.Markdown(value="Wins: 0 | Losses: 0 | Draws: 0")
240
+
241
  ttt_message = gr.Textbox(
242
  label="Game Status",
243
  value="Choose a position to start!",
 
251
  lines=3,
252
  interactive=False
253
  )
254
+
255
+ # Create a combined click handler
256
+ def on_board_click(pos, stats):
257
+ yield from play_tictactoe(pos, stats)
258
+
259
+ for i in range(9):
260
+ board_buttons[i].click(
261
+ fn=on_board_click,
262
+ inputs=[gr.State(i), ttt_stats],
263
+ outputs=[*board_buttons, ttt_message, ttt_reasoning, ttt_stats]
264
+ )
265
 
 
 
 
 
 
266
  ttt_reset_btn.click(
267
  fn=reset_tictactoe,
268
  inputs=[ttt_stats],
269
+ outputs=[*board_buttons, ttt_message, ttt_reasoning, ttt_stats]
270
  )
271
  # Update stats display on changes
272
  ttt_stats.change(