Spaces:
Build error
Build error
Kaushik Rajan
commited on
Commit
Β·
5c7fb25
1
Parent(s):
b1670f3
fix(tictactoe): Fix minimax bug and implement clickable board
Browse files
app.py
CHANGED
|
@@ -108,29 +108,29 @@ def create_interface():
|
|
| 108 |
gr.Markdown("Play TicTacToe against an AI, see its step-by-step reasoning, and learn how it thinks!")
|
| 109 |
|
| 110 |
if GAMES_AVAILABLE:
|
| 111 |
-
# TicTacToe specific functions
|
| 112 |
-
def get_tictactoe_board_html():
|
| 113 |
-
"""Get current TicTacToe board as HTML with emojis."""
|
| 114 |
-
board = tictactoe_env.board
|
| 115 |
-
html = '<table style="border: 1px solid black; text-align: center; font-size: 24px;">'
|
| 116 |
-
for row in range(3):
|
| 117 |
-
html += '<tr>'
|
| 118 |
-
for col in range(3):
|
| 119 |
-
cell = board[row, col]
|
| 120 |
-
if cell == 1:
|
| 121 |
-
content = 'β'
|
| 122 |
-
elif cell == -1:
|
| 123 |
-
content = 'β'
|
| 124 |
-
else:
|
| 125 |
-
content = f'{row*3 + col}'
|
| 126 |
-
html += f'<td style="border: 1px solid black; width: 50px; height: 50px;">{content}</td>'
|
| 127 |
-
html += '</tr>'
|
| 128 |
-
html += '</table>'
|
| 129 |
-
return html
|
| 130 |
|
| 131 |
-
def
|
| 132 |
-
"""
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
ttt_stats = gr.State({'wins': 0, 'losses': 0, 'draws': 0})
|
| 136 |
|
|
@@ -138,9 +138,10 @@ def create_interface():
|
|
| 138 |
"""Minimax algorithm to find the best move."""
|
| 139 |
|
| 140 |
# Base cases
|
| 141 |
-
|
|
|
|
| 142 |
return -10, None
|
| 143 |
-
elif
|
| 144 |
return 10, None
|
| 145 |
elif tictactoe_env._is_draw():
|
| 146 |
return 0, None
|
|
@@ -169,81 +170,74 @@ def create_interface():
|
|
| 169 |
return best_score, best_move
|
| 170 |
|
| 171 |
def play_tictactoe(position, stats):
|
| 172 |
-
"""Play a TicTacToe move."""
|
| 173 |
if tictactoe_env.game_over:
|
| 174 |
-
yield
|
| 175 |
return
|
| 176 |
|
| 177 |
try:
|
| 178 |
position = int(position)
|
| 179 |
-
if position < 0 or position > 8:
|
| 180 |
-
raise ValueError("Invalid position")
|
| 181 |
|
| 182 |
# Human move
|
| 183 |
-
|
| 184 |
|
| 185 |
-
if
|
| 186 |
winner = "You" if tictactoe_env.winner == 1 else "AI" if tictactoe_env.winner == -1 else "Draw"
|
| 187 |
if winner == "You": stats['wins'] += 1
|
| 188 |
elif winner == "AI": stats['losses'] += 1
|
| 189 |
else: stats['draws'] += 1
|
| 190 |
-
yield
|
| 191 |
return
|
| 192 |
|
| 193 |
# Show "thinking" indicator
|
| 194 |
-
yield
|
| 195 |
|
| 196 |
# AI move
|
| 197 |
_, ai_action = minimax(tictactoe_env.board.copy(), -1)
|
| 198 |
-
if ai_action is None:
|
| 199 |
valid_actions = tictactoe_env._get_valid_actions()
|
| 200 |
-
if not valid_actions:
|
| 201 |
-
yield
|
| 202 |
return
|
| 203 |
ai_action = random.choice(valid_actions)
|
| 204 |
|
| 205 |
-
|
| 206 |
reasoning_prompt = f"In TicTacToe, the board is currently: {tictactoe_env.board.flatten().tolist()}. The human player (X) played position {position}. I am the AI (O). The available moves are {tictactoe_env._get_valid_actions()}. I have analyzed the game tree using minimax and determined the optimal move is {ai_action}. Explain my strategy."
|
| 207 |
reasoning = generate_reasoning(reasoning_prompt)
|
| 208 |
-
|
| 209 |
|
| 210 |
-
if
|
| 211 |
winner = "You" if tictactoe_env.winner == 1 else "AI" if tictactoe_env.winner == -1 else "Draw"
|
| 212 |
if winner == "You": stats['wins'] += 1
|
| 213 |
elif winner == "AI": stats['losses'] += 1
|
| 214 |
else: stats['draws'] += 1
|
| 215 |
-
yield
|
| 216 |
else:
|
| 217 |
-
yield
|
| 218 |
|
| 219 |
except Exception as e:
|
| 220 |
-
yield
|
| 221 |
-
|
| 222 |
def reset_tictactoe(stats):
|
| 223 |
"""Reset TicTacToe game."""
|
| 224 |
tictactoe_env.reset()
|
| 225 |
-
return
|
| 226 |
|
| 227 |
# Simplified layout focusing only on TicTacToe
|
| 228 |
-
gr.Markdown("### Play TicTacToe against AI\nYou are β (X) and go first.
|
| 229 |
-
|
| 230 |
-
with gr.
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
label="Game Board",
|
| 234 |
-
value=get_tictactoe_board_html()
|
| 235 |
-
)
|
| 236 |
-
|
| 237 |
-
with gr.Column(scale=1):
|
| 238 |
-
ttt_position = gr.Dropdown(
|
| 239 |
-
label="Your Move (Valid Positions)",
|
| 240 |
-
choices=get_valid_tictactoe_positions()
|
| 241 |
-
)
|
| 242 |
with gr.Row():
|
| 243 |
-
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
ttt_stats_display = gr.Markdown(value="Wins: 0 | Losses: 0 | Draws: 0")
|
| 246 |
-
|
| 247 |
ttt_message = gr.Textbox(
|
| 248 |
label="Game Status",
|
| 249 |
value="Choose a position to start!",
|
|
@@ -257,16 +251,22 @@ def create_interface():
|
|
| 257 |
lines=3,
|
| 258 |
interactive=False
|
| 259 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
|
| 261 |
-
ttt_play_btn.click(
|
| 262 |
-
fn=play_tictactoe,
|
| 263 |
-
inputs=[ttt_position, ttt_stats],
|
| 264 |
-
outputs=[ttt_board, ttt_message, ttt_reasoning, ttt_stats, ttt_position]
|
| 265 |
-
)
|
| 266 |
ttt_reset_btn.click(
|
| 267 |
fn=reset_tictactoe,
|
| 268 |
inputs=[ttt_stats],
|
| 269 |
-
outputs=[
|
| 270 |
)
|
| 271 |
# Update stats display on changes
|
| 272 |
ttt_stats.change(
|
|
|
|
| 108 |
gr.Markdown("Play TicTacToe against an AI, see its step-by-step reasoning, and learn how it thinks!")
|
| 109 |
|
| 110 |
if GAMES_AVAILABLE:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
+
def update_board_buttons():
|
| 113 |
+
"""Create a list of gr.Button updates from the current board state."""
|
| 114 |
+
updates = []
|
| 115 |
+
for i in range(9):
|
| 116 |
+
row, col = divmod(i, 3)
|
| 117 |
+
cell = tictactoe_env.board[row, col]
|
| 118 |
+
val = ""
|
| 119 |
+
interactive = True
|
| 120 |
+
if cell == 1:
|
| 121 |
+
val = 'β'
|
| 122 |
+
interactive = False
|
| 123 |
+
elif cell == -1:
|
| 124 |
+
val = 'β'
|
| 125 |
+
interactive = False
|
| 126 |
+
|
| 127 |
+
if tictactoe_env.game_over:
|
| 128 |
+
interactive = False
|
| 129 |
+
|
| 130 |
+
updates.append(gr.Button(value=val, interactive=interactive))
|
| 131 |
+
return updates
|
| 132 |
+
|
| 133 |
+
# TicTacToe specific functions (no longer need get_tictactoe_board_html)
|
| 134 |
|
| 135 |
ttt_stats = gr.State({'wins': 0, 'losses': 0, 'draws': 0})
|
| 136 |
|
|
|
|
| 138 |
"""Minimax algorithm to find the best move."""
|
| 139 |
|
| 140 |
# Base cases
|
| 141 |
+
winner = tictactoe_env._check_winner()
|
| 142 |
+
if winner == 1: # Human wins
|
| 143 |
return -10, None
|
| 144 |
+
elif winner == -1: # AI wins
|
| 145 |
return 10, None
|
| 146 |
elif tictactoe_env._is_draw():
|
| 147 |
return 0, None
|
|
|
|
| 170 |
return best_score, best_move
|
| 171 |
|
| 172 |
def play_tictactoe(position, stats):
|
| 173 |
+
"""Play a TicTacToe move and yield updates for the button grid."""
|
| 174 |
if tictactoe_env.game_over:
|
| 175 |
+
yield *update_board_buttons(), "Game is over! Click 'New Game' to start again.", "", stats
|
| 176 |
return
|
| 177 |
|
| 178 |
try:
|
| 179 |
position = int(position)
|
|
|
|
|
|
|
| 180 |
|
| 181 |
# Human move
|
| 182 |
+
tictactoe_env.step(position)
|
| 183 |
|
| 184 |
+
if tictactoe_env.game_over:
|
| 185 |
winner = "You" if tictactoe_env.winner == 1 else "AI" if tictactoe_env.winner == -1 else "Draw"
|
| 186 |
if winner == "You": stats['wins'] += 1
|
| 187 |
elif winner == "AI": stats['losses'] += 1
|
| 188 |
else: stats['draws'] += 1
|
| 189 |
+
yield *update_board_buttons(), f"Game Over! {winner} won!", "", stats
|
| 190 |
return
|
| 191 |
|
| 192 |
# Show "thinking" indicator
|
| 193 |
+
yield *update_board_buttons(), "AI is thinking...", "π§ ...", stats
|
| 194 |
|
| 195 |
# AI move
|
| 196 |
_, ai_action = minimax(tictactoe_env.board.copy(), -1)
|
| 197 |
+
if ai_action is None:
|
| 198 |
valid_actions = tictactoe_env._get_valid_actions()
|
| 199 |
+
if not valid_actions:
|
| 200 |
+
yield *update_board_buttons(), "Game is a draw!", "", stats
|
| 201 |
return
|
| 202 |
ai_action = random.choice(valid_actions)
|
| 203 |
|
|
|
|
| 204 |
reasoning_prompt = f"In TicTacToe, the board is currently: {tictactoe_env.board.flatten().tolist()}. The human player (X) played position {position}. I am the AI (O). The available moves are {tictactoe_env._get_valid_actions()}. I have analyzed the game tree using minimax and determined the optimal move is {ai_action}. Explain my strategy."
|
| 205 |
reasoning = generate_reasoning(reasoning_prompt)
|
| 206 |
+
tictactoe_env.step(ai_action)
|
| 207 |
|
| 208 |
+
if tictactoe_env.game_over:
|
| 209 |
winner = "You" if tictactoe_env.winner == 1 else "AI" if tictactoe_env.winner == -1 else "Draw"
|
| 210 |
if winner == "You": stats['wins'] += 1
|
| 211 |
elif winner == "AI": stats['losses'] += 1
|
| 212 |
else: stats['draws'] += 1
|
| 213 |
+
yield *update_board_buttons(), f"Game Over! {winner} won! AI played {ai_action}.", reasoning, stats
|
| 214 |
else:
|
| 215 |
+
yield *update_board_buttons(), f"AI played position {ai_action}. Your turn!", reasoning, stats
|
| 216 |
|
| 217 |
except Exception as e:
|
| 218 |
+
yield *update_board_buttons(), f"Error: {str(e)}", "", stats
|
| 219 |
+
|
| 220 |
def reset_tictactoe(stats):
|
| 221 |
"""Reset TicTacToe game."""
|
| 222 |
tictactoe_env.reset()
|
| 223 |
+
return *update_board_buttons(), "New game started! You are β (X). Click a square to play.", "AI will show its reasoning here...", stats
|
| 224 |
|
| 225 |
# Simplified layout focusing only on TicTacToe
|
| 226 |
+
gr.Markdown("### Play TicTacToe against AI\nYou are β (X) and go first. Click on a square to make your move.")
|
| 227 |
+
|
| 228 |
+
with gr.Column():
|
| 229 |
+
board_buttons = []
|
| 230 |
+
for i in range(3):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
with gr.Row():
|
| 232 |
+
for j in range(3):
|
| 233 |
+
pos = i * 3 + j
|
| 234 |
+
button = gr.Button("", elem_id=f"ttt-cell-{pos}")
|
| 235 |
+
board_buttons.append(button)
|
| 236 |
+
|
| 237 |
+
with gr.Row():
|
| 238 |
+
ttt_reset_btn = gr.Button("New Game", variant="secondary")
|
| 239 |
ttt_stats_display = gr.Markdown(value="Wins: 0 | Losses: 0 | Draws: 0")
|
| 240 |
+
|
| 241 |
ttt_message = gr.Textbox(
|
| 242 |
label="Game Status",
|
| 243 |
value="Choose a position to start!",
|
|
|
|
| 251 |
lines=3,
|
| 252 |
interactive=False
|
| 253 |
)
|
| 254 |
+
|
| 255 |
+
# Create a combined click handler
|
| 256 |
+
def on_board_click(pos, stats):
|
| 257 |
+
yield from play_tictactoe(pos, stats)
|
| 258 |
+
|
| 259 |
+
for i in range(9):
|
| 260 |
+
board_buttons[i].click(
|
| 261 |
+
fn=on_board_click,
|
| 262 |
+
inputs=[gr.State(i), ttt_stats],
|
| 263 |
+
outputs=[*board_buttons, ttt_message, ttt_reasoning, ttt_stats]
|
| 264 |
+
)
|
| 265 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
ttt_reset_btn.click(
|
| 267 |
fn=reset_tictactoe,
|
| 268 |
inputs=[ttt_stats],
|
| 269 |
+
outputs=[*board_buttons, ttt_message, ttt_reasoning, ttt_stats]
|
| 270 |
)
|
| 271 |
# Update stats display on changes
|
| 272 |
ttt_stats.change(
|