Spaces:

kaushikvr06
/

reasoning-simulator

Build error

App Files Files Community

Kaushik Rajan commited on Jul 12

Commit

b1670f3

1 Parent(s): 6be63cd

feat(tictactoe): Refine UI, implement Minimax AI, and add tests

Browse files

Files changed (4) hide show

.gitignore +9 -1
app.py +109 -200
requirements.txt +3 -1
tests/test_games.py +68 -48

.gitignore CHANGED Viewed

@@ -230,4 +230,12 @@ gradio_cached_examples/
 execution-plan.md
 # Research paper images - not needed in repo
-research-paper-snips/

 execution-plan.md
 # Research paper images - not needed in repo
+research-paper-snips/
+# huggingface
+*.sagemaker_notebook.ipynb
+# virtualenv
+.venv/
+venv/
+ENV/

app.py CHANGED Viewed

@@ -76,10 +76,10 @@ if GAMES_AVAILABLE:
     try:
         # Test instantiation
         tictactoe_env = TicTacToeEnv()
-        kuhn_env = KuhnPokerEnv()
-        print("✅ Game environments created successfully")
     except Exception as e:
-        print(f"❌ Error creating game environments: {e}")
         print("📋 Full traceback:", traceback.format_exc())
         GAMES_AVAILABLE = False
 else:
@@ -105,10 +105,9 @@ def create_interface():
     with gr.Blocks(title="SPIRAL: Interactive Reasoning Game Simulator", theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🎮 SPIRAL: Interactive Reasoning Game Simulator")
         if GAMES_AVAILABLE:
-            gr.Markdown("**Demo Version** - Experience zero-sum games with AI! Full reasoning capabilities coming soon. Learn how AI makes decisions in competitive scenarios.")
             # TicTacToe specific functions
             def get_tictactoe_board_html():
                 """Get current TicTacToe board as HTML with emojis."""
@@ -135,11 +134,46 @@ def create_interface():
             ttt_stats = gr.State({'wins': 0, 'losses': 0, 'draws': 0})
             def play_tictactoe(position, stats):
                 """Play a TicTacToe move."""
                 if tictactoe_env.game_over:
-                    return get_tictactoe_board_html(), "Game is over! Click 'New Game' to start again.", "", stats, get_valid_tictactoe_positions()
                 try:
                     position = int(position)
                     if position < 0 or position > 8:
@@ -153,12 +187,23 @@ def create_interface():
                         if winner == "You": stats['wins'] += 1
                         elif winner == "AI": stats['losses'] += 1
                         else: stats['draws'] += 1
-                        return get_tictactoe_board_html(), f"Game Over! {winner} won!", f"Final reward: {reward}", stats, []
                     # AI move
-                    valid_actions = tictactoe_env._get_valid_actions()
-                    ai_action = random.choice(valid_actions)  # Still random for now; integrate policy later
-                    reasoning_prompt = f"In TicTacToe, board state: {tictactoe_env.board.flatten().tolist()}. Valid moves: {valid_actions}. Explain why to choose one randomly as placeholder."
                     reasoning = generate_reasoning(reasoning_prompt)
                     obs, reward, terminated, truncated, info = tictactoe_env.step(ai_action)
@@ -167,204 +212,71 @@ def create_interface():
                         if winner == "You": stats['wins'] += 1
                         elif winner == "AI": stats['losses'] += 1
                         else: stats['draws'] += 1
-                        return get_tictactoe_board_html(), f"Game Over! {winner} won! AI played {ai_action}.", reasoning, stats, []
                     else:
-                        return get_tictactoe_board_html(), f"AI played position {ai_action}. Your turn!", reasoning, stats, get_valid_tictactoe_positions()
                 except Exception as e:
-                    return get_tictactoe_board_html(), f"Error: {str(e)}", "", stats, get_valid_tictactoe_positions()
             def reset_tictactoe(stats):
                 """Reset TicTacToe game."""
                 tictactoe_env.reset()
                 return get_tictactoe_board_html(), "New game started! You are ❌ (X). Choose a position from the dropdown.", "AI will show its reasoning here...", stats, get_valid_tictactoe_positions()
-            def get_kuhn_poker_state_html():
-                """Get current Kuhn Poker state as HTML."""
-                card = ['J', 'Q', 'K'][kuhn_env.player1_card]
-                html = f"<div style='font-size: 18px;'><p>🃏 Your Card: <strong>{card}</strong></p>"
-                html += f"<p>💰 Pot: <strong>{kuhn_env.pot}</strong></p>"
-                html += f"<p>🎯 Current Player: <strong>{kuhn_env.current_player}</strong></p>"
-                html += f"<p>🔄 Betting Round: <strong>{kuhn_env.betting_round}</strong></p>"
-                if kuhn_env.actions_history:
-                    html += "<p>📋 Actions:</p><ul>"
-                    for player, action in kuhn_env.actions_history:
-                        action_name = ["Check/Call", "Bet", "Fold"][action]
-                        html += f"<li>Player {player}: {action_name}</li>"
-                    html += "</ul>"
-                html += "</div>"
-                return html
-            kuhn_stats = gr.State({'wins': 0, 'losses': 0, 'draws': 0})
-            def play_kuhn_poker(action_name, stats):
-                """Play a Kuhn Poker move."""
-                if kuhn_env.game_over:
-                    return get_kuhn_poker_state_html(), "Game is over! Click 'New Game' to start again.", "", stats
-                try:
-                    action_map = {"Check/Call": 0, "Bet": 1, "Fold": 2}
-                    if action_name not in action_map:
-                        raise ValueError("Invalid action")
-                    action = action_map[action_name]
-                    # Human move
-                    obs, reward, terminated, truncated, info = kuhn_env.step(action)
-                    if terminated:
-                        winner = "You" if kuhn_env.winner == 1 else "AI" if kuhn_env.winner == -1 else "Draw"
-                        if winner == "You": stats['wins'] += 1
-                        elif winner == "AI": stats['losses'] += 1
-                        else: stats['draws'] += 1
-                        return get_kuhn_poker_state_html(), f"Game Over! {winner} won! Pot: {kuhn_env.pot}", f"Your final reward: {reward}", stats
-                    # AI move
-                    valid_actions = kuhn_env._get_valid_actions()
-                    ai_action = random.choice(valid_actions)
-                    ai_action_name = ["Check/Call", "Bet", "Fold"][ai_action]
-                    reasoning_prompt = f"In Kuhn Poker, my card: {kuhn_env.player2_card}, history: {kuhn_env.actions_history}. Valid actions: {valid_actions}. Explain choice."
-                    reasoning = generate_reasoning(reasoning_prompt)
-                    obs, reward, terminated, truncated, info = kuhn_env.step(ai_action)
-                    if terminated:
-                        winner = "You" if kuhn_env.winner == 1 else "AI" if kuhn_env.winner == -1 else "Draw"
-                        if winner == "You": stats['wins'] += 1
-                        elif winner == "AI": stats['losses'] += 1
-                        else: stats['draws'] += 1
-                        return get_kuhn_poker_state_html(), f"AI chose {ai_action_name}. Game Over! {winner} won! Pot: {kuhn_env.pot}", reasoning, stats
-                    else:
-                        return get_kuhn_poker_state_html(), f"AI chose {ai_action_name}. Your turn!", reasoning, stats
-                except Exception as e:
-                    return get_kuhn_poker_state_html(), f"Error: {str(e)}", "", stats
-            def reset_kuhn_poker(stats):
-                """Reset Kuhn Poker game."""
-                kuhn_env.reset()
-                card = ['J', 'Q', 'K'][kuhn_env.player1_card]
-                return get_kuhn_poker_state_html(), "New game started! You are Player 1. Choose your action.", f"Your card: {card}", stats
-            with gr.Tabs():
-                # TicTacToe Tab
-                with gr.TabItem("🎯 TicTacToe"):
-                    gr.Markdown("### Play TicTacToe against AI\nYou are ❌ (X) and go first. Get 3 in a row to win! **How AI Thinks**: AI will analyze the board and explain its moves (random for now; full reasoning soon).\nPositions: Top-left=0, bottom-right=8.")
-                    with gr.Row():
-                        with gr.Column(scale=2):
-                            ttt_board = gr.HTML(
-                                label="Game Board",
-                                value=get_tictactoe_board_html()
-                            )
-                        with gr.Column(scale=1):
-                            ttt_position = gr.Dropdown(
-                                label="Your Move (Valid Positions)",
-                                choices=get_valid_tictactoe_positions()
-                            )
-                            with gr.Row():
-                                ttt_play_btn = gr.Button("Play Move", variant="primary")
-                                ttt_reset_btn = gr.Button("New Game", variant="secondary")
-                            ttt_stats_display = gr.Markdown(value="Wins: 0 | Losses: 0 | Draws: 0")
-                    ttt_message = gr.Textbox(
-                        label="Game Status",
-                        value="Choose a position to start!",
-                        lines=2,
-                        interactive=False
-                    )
-                    ttt_reasoning = gr.Textbox(
-                        label="AI Reasoning",
-                        value="AI will explain its thought process here...",
-                        lines=3,
-                        interactive=False
                     )
-                    ttt_play_btn.click(
-                        fn=play_tictactoe,
-                        inputs=[ttt_position, ttt_stats],
-                        outputs=[ttt_board, ttt_message, ttt_reasoning, ttt_stats, ttt_position]
-                    )
-                    ttt_reset_btn.click(
-                        fn=reset_tictactoe,
-                        inputs=[ttt_stats],
-                        outputs=[ttt_board, ttt_message, ttt_reasoning, ttt_stats, ttt_position]
                     )
-                    # Update stats display on changes
-                    ttt_stats.change(
-                        fn=lambda s: f"Wins: {s['wins']} | Losses: {s['losses']} | Draws: {s['draws']}",
-                        inputs=ttt_stats,
-                        outputs=ttt_stats_display
-                    )
-                # Kuhn Poker Tab
-                with gr.TabItem("🃏 Kuhn Poker"):
-                    gr.Markdown("### Play Kuhn Poker against AI\nSimplified poker with J/Q/K cards. You ante 1 chip each. Higher card wins if no fold. **How AI Thinks**: AI evaluates card strength and bets (random now; strategic soon).")
                     with gr.Row():
-                        with gr.Column(scale=2):
-                            kuhn_state = gr.HTML(
-                                label="Game State",
-                                value=get_kuhn_poker_state_html()
-                            )
-                        with gr.Column(scale=1):
-                            kuhn_action = gr.Dropdown(
-                                label="Your Action",
-                                choices=["Check/Call", "Bet", "Fold"],
-                                value="Check/Call"
-                            )
-                            with gr.Row():
-                                kuhn_play_btn = gr.Button("Play Action", variant="primary")
-                                kuhn_reset_btn = gr.Button("New Game", variant="secondary")
-                            kuhn_stats_display = gr.Markdown(value="Wins: 0 | Losses: 0 | Draws: 0")
-                    kuhn_message = gr.Textbox(
-                        label="Game Status",
-                        value="Choose your action!",
-                        lines=2,
-                        interactive=False
-                    )
-                    kuhn_reasoning = gr.Textbox(
-                        label="AI Reasoning",
-                        value="AI will explain its thought process here...",
-                        lines=3,
-                        interactive=False
-                    )
-                    kuhn_play_btn.click(
-                        fn=play_kuhn_poker,
-                        inputs=[kuhn_action, kuhn_stats],
-                        outputs=[kuhn_state, kuhn_message, kuhn_reasoning, kuhn_stats]
-                    )
-                    kuhn_reset_btn.click(
-                        fn=reset_kuhn_poker,
-                        inputs=[kuhn_stats],
-                        outputs=[kuhn_state, kuhn_message, kuhn_reasoning, kuhn_stats]
-                    )
-                    kuhn_stats.change(
-                        fn=lambda s: f"Wins: {s['wins']} | Losses: {s['losses']} | Draws: {s['draws']}",
-                        inputs=kuhn_stats,
-                        outputs=kuhn_stats_display
-                    )
-                # New Transfer Test Tab (stub)
-                with gr.TabItem("🔬 Transfer Test"):
-                    gr.Markdown("### Test AI Reasoning on Non-Game Tasks\n(Coming Soon) Enter a math problem or logic puzzle to see transferred reasoning from game training.")
-                    transfer_input = gr.Textbox(label="Input Prompt", placeholder="E.g., 'Solve: 2x + 3 = 7'")
-                    transfer_output = gr.Textbox(label="AI Response", interactive=False)
-                    transfer_btn = gr.Button("Test")
-                    def transfer_test(input):
-                        cot_prompt = f"Solve step-by-step: {input}"
-                        return generate_reasoning(cot_prompt)
-                    transfer_btn.click(fn=transfer_test, inputs=transfer_input, outputs=transfer_output)
         else:
             # Fallback interface when games don't load
             gr.Markdown("⚠️ **Game modules could not be loaded.** Showing diagnostic information.")
@@ -430,10 +342,7 @@ def create_interface():
             """)
             gr.Markdown("**New in this version:** Visual boards, stats tracking, and transfer test stub!")
-        if GAMES_AVAILABLE:
-            gr.Markdown("---")
-            gr.Markdown("🚧 **This is a development preview.** Full SPIRAL training and reasoning capabilities will be added in the next update!")
-        else:
             gr.Markdown("---")
             gr.Markdown("🔄 **Dependencies are loading.** Check the diagnostic info above and refresh in a few minutes!")

     try:
         # Test instantiation
         tictactoe_env = TicTacToeEnv()
+        # kuhn_env = KuhnPokerEnv() # No longer needed
+        print("✅ Game environment created successfully")
     except Exception as e:
+        print(f"❌ Error creating game environment: {e}")
         print("📋 Full traceback:", traceback.format_exc())
         GAMES_AVAILABLE = False
 else:
     with gr.Blocks(title="SPIRAL: Interactive Reasoning Game Simulator", theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🎮 SPIRAL: Interactive Reasoning Game Simulator")
+        gr.Markdown("Play TicTacToe against an AI, see its step-by-step reasoning, and learn how it thinks!")
         if GAMES_AVAILABLE:
             # TicTacToe specific functions
             def get_tictactoe_board_html():
                 """Get current TicTacToe board as HTML with emojis."""
             ttt_stats = gr.State({'wins': 0, 'losses': 0, 'draws': 0})
+            def minimax(board, player):
+                """Minimax algorithm to find the best move."""
+                # Base cases
+                if tictactoe_env._check_winner(1):
+                    return -10, None
+                elif tictactoe_env._check_winner(-1):
+                    return 10, None
+                elif tictactoe_env._is_draw():
+                    return 0, None
+                best_move = None
+                if player == -1: # AI is player -1 (O), maximizing player
+                    best_score = -float('inf')
+                    for move in tictactoe_env._get_valid_actions():
+                        row, col = divmod(move, 3)
+                        board[row, col] = -1
+                        score, _ = minimax(board.copy(), 1)
+                        board[row, col] = 0 # Undo move
+                        if score > best_score:
+                            best_score = score
+                            best_move = move
+                else: # Human is player 1 (X), minimizing player
+                    best_score = float('inf')
+                    for move in tictactoe_env._get_valid_actions():
+                        row, col = divmod(move, 3)
+                        board[row, col] = 1
+                        score, _ = minimax(board.copy(), -1)
+                        board[row, col] = 0 # Undo move
+                        if score < best_score:
+                            best_score = score
+                            best_move = move
+                return best_score, best_move
             def play_tictactoe(position, stats):
                 """Play a TicTacToe move."""
                 if tictactoe_env.game_over:
+                    yield get_tictactoe_board_html(), "Game is over! Click 'New Game' to start again.", "", stats, get_valid_tictactoe_positions()
+                    return
                 try:
                     position = int(position)
                     if position < 0 or position > 8:
                         if winner == "You": stats['wins'] += 1
                         elif winner == "AI": stats['losses'] += 1
                         else: stats['draws'] += 1
+                        yield get_tictactoe_board_html(), f"Game Over! {winner} won!", f"Final reward: {reward}", stats, []
+                        return
+                    # Show "thinking" indicator
+                    yield get_tictactoe_board_html(), "AI is thinking...", "🧠...", stats, []
                     # AI move
+                    _, ai_action = minimax(tictactoe_env.board.copy(), -1)
+                    if ai_action is None: # Handle case where minimax returns no move (e.g., game over)
+                        valid_actions = tictactoe_env._get_valid_actions()
+                        if not valid_actions: # No actions left
+                             yield get_tictactoe_board_html(), "Game is a draw!", "", stats, []
+                             return
+                        ai_action = random.choice(valid_actions)
+                    reasoning_prompt = f"In TicTacToe, the board is currently: {tictactoe_env.board.flatten().tolist()}. The human player (X) played position {position}. I am the AI (O). The available moves are {tictactoe_env._get_valid_actions()}. I have analyzed the game tree using minimax and determined the optimal move is {ai_action}. Explain my strategy."
                     reasoning = generate_reasoning(reasoning_prompt)
                     obs, reward, terminated, truncated, info = tictactoe_env.step(ai_action)
                         if winner == "You": stats['wins'] += 1
                         elif winner == "AI": stats['losses'] += 1
                         else: stats['draws'] += 1
+                        yield get_tictactoe_board_html(), f"Game Over! {winner} won! AI played {ai_action}.", reasoning, stats, []
                     else:
+                        yield get_tictactoe_board_html(), f"AI played position {ai_action}. Your turn!", reasoning, stats, get_valid_tictactoe_positions()
                 except Exception as e:
+                    yield get_tictactoe_board_html(), f"Error: {str(e)}", "", stats, get_valid_tictactoe_positions()
             def reset_tictactoe(stats):
                 """Reset TicTacToe game."""
                 tictactoe_env.reset()
                 return get_tictactoe_board_html(), "New game started! You are ❌ (X). Choose a position from the dropdown.", "AI will show its reasoning here...", stats, get_valid_tictactoe_positions()
+            # Simplified layout focusing only on TicTacToe
+            gr.Markdown("### Play TicTacToe against AI\nYou are ❌ (X) and go first. Get 3 in a row to win! **How AI Thinks**: AI will analyze the board and explain its moves.\nPositions: Top-left=0, bottom-right=8.")
+            with gr.Row():
+                with gr.Column(scale=2):
+                    ttt_board = gr.HTML(
+                        label="Game Board",
+                        value=get_tictactoe_board_html()
                     )
+                with gr.Column(scale=1):
+                    ttt_position = gr.Dropdown(
+                        label="Your Move (Valid Positions)",
+                        choices=get_valid_tictactoe_positions()
                     )
                     with gr.Row():
+                        ttt_play_btn = gr.Button("Play Move", variant="primary")
+                        ttt_reset_btn = gr.Button("New Game", variant="secondary")
+                    ttt_stats_display = gr.Markdown(value="Wins: 0 | Losses: 0 | Draws: 0")
+            ttt_message = gr.Textbox(
+                label="Game Status",
+                value="Choose a position to start!",
+                lines=2,
+                interactive=False
+            )
+            ttt_reasoning = gr.Textbox(
+                label="AI Reasoning",
+                value="AI will explain its thought process here...",
+                lines=3,
+                interactive=False
+            )
+            ttt_play_btn.click(
+                fn=play_tictactoe,
+                inputs=[ttt_position, ttt_stats],
+                outputs=[ttt_board, ttt_message, ttt_reasoning, ttt_stats, ttt_position]
+            )
+            ttt_reset_btn.click(
+                fn=reset_tictactoe,
+                inputs=[ttt_stats],
+                outputs=[ttt_board, ttt_message, ttt_reasoning, ttt_stats, ttt_position]
+            )
+            # Update stats display on changes
+            ttt_stats.change(
+                fn=lambda s: f"Wins: {s['wins']} | Losses: {s['losses']} | Draws: {s['draws']}",
+                inputs=ttt_stats,
+                outputs=ttt_stats_display
+            )
+            gr.Markdown("---")
+            gr.Markdown("🚧 **This is a development preview.** Full SPIRAL training and reasoning capabilities will be added in the next update!")
         else:
             # Fallback interface when games don't load
             gr.Markdown("⚠️ **Game modules could not be loaded.** Showing diagnostic information.")
             """)
             gr.Markdown("**New in this version:** Visual boards, stats tracking, and transfer test stub!")
+        if not GAMES_AVAILABLE:
             gr.Markdown("---")
             gr.Markdown("🔄 **Dependencies are loading.** Check the diagnostic info above and refresh in a few minutes!")

requirements.txt CHANGED Viewed

@@ -10,4 +10,6 @@ pandas>=1.3.0
 tqdm>=4.62.0
 pyyaml
 bitsandbytes
-accelerate>=0.26.0

 tqdm>=4.62.0
 pyyaml
 bitsandbytes
+accelerate>=0.26.0
+pytest
+Jinja2

tests/test_games.py CHANGED Viewed

@@ -1,58 +1,78 @@
 import pytest
 import numpy as np
-from src.games.tictactoe import TicTacToeEnv
-from src.games.kuhn_poker import KuhnPokerEnv
 @pytest.fixture
-def ttt_env():
     return TicTacToeEnv()
-@pytest.fixture
-def kuhn_env():
-    return KuhnPokerEnv()
-def test_tictactoe_reset(ttt_env):
-    obs, info = ttt_env.reset()
-    assert np.all(obs == 0)
-    assert ttt_env.current_player == 1
-    assert not ttt_env.game_over
-def test_tictactoe_win(ttt_env):
-    # Simulate win for player 1
-    ttt_env.step(0)  # X
-    ttt_env.step(3)  # O (invalid sim, but test step)
-    ttt_env.step(1)  # X
-    ttt_env.step(4)  # O
-    _, reward, terminated, _, _ = ttt_env.step(2)  # X wins
-    assert terminated
-    assert reward == 1  # From player 1 perspective
-    assert ttt_env.winner == 1
-def test_tictactoe_invalid_move(ttt_env):
-    ttt_env.step(0)
-    _, reward, terminated, _, info = ttt_env.step(0)  # Same spot
-    assert 'invalid_move' in info
-    assert terminated
-    assert reward == -1
-def test_kuhn_reset(kuhn_env):
-    obs, info = kuhn_env.reset()
-    assert kuhn_env.pot == 2  # Antes
-    assert kuhn_env.current_player == 1
-    assert not kuhn_env.game_over
-def test_kuhn_fold(kuhn_env):
-    _, reward, terminated, _, _ = kuhn_env.step(2)  # Player 1 folds
-    assert terminated
-    assert reward == -1  # Lost ante
-    assert kuhn_env.winner == -1
-def test_kuhn_win(kuhn_env):
-    kuhn_env.player1_card = 2  # K
-    kuhn_env.player2_card = 0  # J
-    kuhn_env.step(1)  # Bet
-    kuhn_env.step(0)  # Call
-    _, reward, terminated, _, _ = kuhn_env.step(0)  # Call (if needed)
     assert terminated
-    assert reward > 0  # Win with higher card
-    assert kuhn_env.winner == 1

 import pytest
 import numpy as np
+import sys
+import os
+# Add src to path to allow importing TicTacToeEnv
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src')))
+from games.tictactoe import TicTacToeEnv
 @pytest.fixture
+def env():
+    """Fixture to create a fresh TicTacToeEnv for each test."""
     return TicTacToeEnv()
+def test_initial_state(env):
+    """Test the initial state of the board."""
+    assert np.all(env.board == np.zeros((3, 3)))
+    assert env.current_player == 1
+    assert not env.game_over
+def test_player_move(env):
+    """Test a valid player move."""
+    env.step(0)
+    assert env.board[0, 0] == 1
+    assert env.current_player == -1
+    assert not env.game_over
+def test_invalid_move(env):
+    """Test making an invalid move on an occupied cell."""
+    env.step(0)
+    with pytest.raises(ValueError):
+        env.step(0)
+def test_win_condition_row(env):
+    """Test a win condition in a row."""
+    env.board = np.array([[1, 1, 1], [0, -1, 0], [-1, 0, 0]])
+    assert env._check_winner(1)
+    assert not env._check_winner(-1)
+def test_win_condition_col(env):
+    """Test a win condition in a column."""
+    env.board = np.array([[-1, 1, 0], [-1, 1, 0], [-1, 0, 0]])
+    assert not env._check_winner(1)
+    assert env._check_winner(-1)
+def test_win_condition_diag(env):
+    """Test a win condition on a diagonal."""
+    env.board = np.array([[1, 0, -1], [0, 1, -1], [0, 0, 1]])
+    assert env._check_winner(1)
+def test_draw_condition(env):
+    """Test a draw condition."""
+    env.board = np.array([[1, -1, 1], [1, -1, 1], [-1, 1, -1]])
+    assert env._is_draw()
+    assert not env._check_winner(1)
+    assert not env._check_winner(-1)
+def test_game_over_on_win(env):
+    """Test that the game_over flag is set on a win."""
+    env.step(0) # P1
+    env.step(3) # P2
+    env.step(1) # P1
+    env.step(4) # P2
+    _, _, terminated, _, _ = env.step(2) # P1 wins
     assert terminated
+    assert env.game_over
+    assert env.winner == 1
+def test_reset(env):
+    """Test if the environment resets correctly."""
+    env.step(0)
+    env.step(1)
+    env.reset()
+    assert np.all(env.board == np.zeros((3, 3)))
+    assert env.current_player == 1
+    assert not env.game_over
+    assert env.winner is None