Spaces:
Runtime error
Runtime error
| """Demo script for the Killer Sudoku Environment. | |
| Directly instantiates the environment (no Docker needed) and exercises | |
| all 3 action types, reward mechanics, and difficulty progression. | |
| """ | |
| import sys | |
| import os | |
| # Add parent directory so killer_sudoku_env is importable as a package, | |
| # and also the current directory so server-side `from models import ...` works. | |
| _this_dir = os.path.dirname(os.path.abspath(__file__)) | |
| sys.path.insert(0, os.path.dirname(_this_dir)) # parent (for package import) | |
| sys.path.insert(0, _this_dir) # current (for server-side `from models import`) | |
| from killer_sudoku_env.models import KillerSudokuAction, KillerSudokuObservation | |
| from killer_sudoku_env.server.killer_sudoku_env_environment import KillerSudokuEnvironment | |
| from killer_sudoku_env.server.sumdoku import Sumdoku | |
| # For test speed, always use non-unique puzzles (skips expensive uniqueness solver) | |
| _original_reset = KillerSudokuEnvironment.reset | |
| def _fast_reset(self): | |
| """Patched reset that forces unique=False for fast puzzle generation.""" | |
| import random as _random | |
| _orig_random = _random.random | |
| _random.random = lambda: 0.99 # > 0.95 → unique=False | |
| try: | |
| return _original_reset(self) | |
| finally: | |
| _random.random = _orig_random | |
| KillerSudokuEnvironment.reset = _fast_reset | |
| def print_section(title: str) -> None: | |
| print(f"\n{'=' * 60}") | |
| print(f" {title}") | |
| print(f"{'=' * 60}\n") | |
| def demo_basic_gameplay() -> None: | |
| """Demonstrate basic gameplay with all 3 action types.""" | |
| print_section("BASIC GAMEPLAY DEMO") | |
| env = KillerSudokuEnvironment() | |
| # --- Reset --- | |
| obs = env.reset() | |
| print("--- Reset ---") | |
| print(f"Board size: {obs.n}x{obs.n}, Difficulty: {obs.difficulty}") | |
| print(f"Action result: {obs.action_result}") | |
| print(f"Number of empty cells with candidates: {len(obs.candidates)}") | |
| print(f"\nBoard:\n{obs.board_display}") | |
| # Show first few lines of rules | |
| rules_lines = obs.rules_prompt.split('\n') | |
| print(f"Rules (first 5 lines):") | |
| for line in rules_lines[:5]: | |
| print(f" {line}") | |
| print(" ...") | |
| # Find an empty cell from candidates | |
| empty_cells = list(obs.candidates.keys()) | |
| if not empty_cells: | |
| print("No empty cells found!") | |
| return | |
| first_cell = empty_cells[0] | |
| x, y = map(int, first_cell.split(",")) | |
| print(f"\nUsing cell ({x}, {y}) for demos. Initial candidates: {obs.candidates[first_cell]}") | |
| # --- Action 1: propose_candidates --- | |
| print("\n--- propose_candidates ---") | |
| action = KillerSudokuAction( | |
| action_type="propose_candidates", | |
| x=x, y=y, | |
| values=[1, 2, 3], | |
| ) | |
| obs = env.step(action) | |
| print(f"Result: {obs.action_result}") | |
| print(f"Reward: {obs.reward}") | |
| print(f"Candidates for ({x},{y}): {obs.candidates.get(first_cell)}") | |
| # --- Action 2: eliminate_candidate --- | |
| print("\n--- eliminate_candidate ---") | |
| action = KillerSudokuAction( | |
| action_type="eliminate_candidate", | |
| x=x, y=y, | |
| values=[3], | |
| justification="row_constraint", | |
| ) | |
| obs = env.step(action) | |
| print(f"Result: {obs.action_result}") | |
| print(f"Reward: {obs.reward}") | |
| print(f"Candidates for ({x},{y}): {obs.candidates.get(first_cell)}") | |
| # --- Action 3: enter_answer (correct) --- | |
| # Get the correct answer from the solution | |
| solution_value = env._sumdoku.board.get_cell(x, y).solution_value | |
| print(f"\n--- enter_answer (correct: value={solution_value}) ---") | |
| action = KillerSudokuAction( | |
| action_type="enter_answer", | |
| x=x, y=y, | |
| value=solution_value, | |
| ) | |
| obs = env.step(action) | |
| print(f"Result: {obs.action_result}") | |
| print(f"Reward: {obs.reward}") | |
| print(f"Done: {obs.done}") | |
| # --- Action 4: enter_answer (incorrect) --- | |
| # Find another empty cell | |
| empty_cells_2 = [k for k in obs.candidates.keys()] | |
| if empty_cells_2: | |
| cell2 = empty_cells_2[0] | |
| x2, y2 = map(int, cell2.split(",")) | |
| correct_val = env._sumdoku.board.get_cell(x2, y2).solution_value | |
| # Pick an incorrect value | |
| wrong_val = 1 if correct_val != 1 else 2 | |
| print(f"\n--- enter_answer (incorrect: value={wrong_val} at ({x2},{y2})) ---") | |
| action = KillerSudokuAction( | |
| action_type="enter_answer", | |
| x=x2, y=y2, | |
| value=wrong_val, | |
| ) | |
| obs = env.step(action) | |
| print(f"Result: {obs.action_result}") | |
| print(f"Reward: {obs.reward}") | |
| print(f"Incorrect answers: {obs.incorrect_answers}") | |
| print(f"Done: {obs.done}") | |
| def demo_malformed_actions() -> None: | |
| """Demonstrate malformed action penalties.""" | |
| print_section("MALFORMED ACTION DEMO") | |
| env = KillerSudokuEnvironment() | |
| obs = env.reset() | |
| # Missing value for enter_answer | |
| print("--- enter_answer without value ---") | |
| action = KillerSudokuAction( | |
| action_type="enter_answer", | |
| x=0, y=0, | |
| value=None, | |
| ) | |
| obs = env.step(action) | |
| print(f"Result: {obs.action_result}") | |
| print(f"Reward: {obs.reward} (expected: -3.0)") | |
| # Out of range coordinates | |
| print("\n--- Out of range coordinates ---") | |
| action = KillerSudokuAction( | |
| action_type="propose_candidates", | |
| x=99, y=99, | |
| values=[1, 2], | |
| ) | |
| obs = env.step(action) | |
| print(f"Result: {obs.action_result}") | |
| print(f"Reward: {obs.reward} (expected: -3.0)") | |
| # eliminate_candidate without justification | |
| print("\n--- eliminate_candidate without justification ---") | |
| empty_cells = list(obs.candidates.keys()) | |
| if empty_cells: | |
| x, y = map(int, empty_cells[0].split(",")) | |
| action = KillerSudokuAction( | |
| action_type="eliminate_candidate", | |
| x=x, y=y, | |
| values=[1], | |
| justification=None, | |
| ) | |
| obs = env.step(action) | |
| print(f"Result: {obs.action_result}") | |
| print(f"Reward: {obs.reward} (expected: -3.0)") | |
| def demo_thinking_reward_decay() -> None: | |
| """Demonstrate that thinking rewards decay after 6 steps.""" | |
| print_section("THINKING REWARD DECAY DEMO") | |
| env = KillerSudokuEnvironment() | |
| obs = env.reset() | |
| empty_cells = list(obs.candidates.keys()) | |
| print("Proposing candidates for 8 consecutive cells:") | |
| for i, cell_key in enumerate(empty_cells[:8]): | |
| x, y = map(int, cell_key.split(",")) | |
| action = KillerSudokuAction( | |
| action_type="propose_candidates", | |
| x=x, y=y, | |
| values=[1, 2, 3], | |
| ) | |
| obs = env.step(action) | |
| expected = 0.1 if i < 6 else 0.0 | |
| status = "OK" if abs(obs.reward - expected) < 0.001 else "MISMATCH" | |
| print(f" Step {i+1}: reward={obs.reward:.1f} (expected {expected:.1f}) [{status}]") | |
| def demo_five_wrong_termination() -> None: | |
| """Demonstrate game termination after 5 incorrect answers.""" | |
| print_section("5 WRONG ANSWERS TERMINATION DEMO") | |
| env = KillerSudokuEnvironment() | |
| obs = env.reset() | |
| empty_cells = list(obs.candidates.keys()) | |
| print(f"Entering 5 wrong answers on different cells:") | |
| for i in range(5): | |
| if i >= len(empty_cells): | |
| break | |
| cell_key = empty_cells[i] | |
| x, y = map(int, cell_key.split(",")) | |
| correct_val = env._sumdoku.board.get_cell(x, y).solution_value | |
| wrong_val = 1 if correct_val != 1 else 2 | |
| action = KillerSudokuAction( | |
| action_type="enter_answer", | |
| x=x, y=y, | |
| value=wrong_val, | |
| ) | |
| obs = env.step(action) | |
| print(f" Wrong #{i+1}: reward={obs.reward}, incorrect={obs.incorrect_answers}, done={obs.done}") | |
| print(f"\nFinal reward on 5th wrong: {obs.reward} (expected: -15.0 = -5.0 + -10.0 penalty)") | |
| print(f"Done: {obs.done} (expected: True)") | |
| def demo_difficulty_progression() -> None: | |
| """Demonstrate difficulty progression across multiple games.""" | |
| print_section("DIFFICULTY PROGRESSION DEMO") | |
| env = KillerSudokuEnvironment() | |
| print(f"Starting: n={env._n}, difficulty={env._difficulty}") | |
| for game in range(7): | |
| obs = env.reset() | |
| print(f"\nGame {game + 1}: n={obs.n}, difficulty={obs.difficulty}, " | |
| f"empty_cells={len(obs.candidates)}") | |
| # Solve everything correctly to push avg_ratio high | |
| empty_cells = list(obs.candidates.keys()) | |
| solved = 0 | |
| for cell_key in empty_cells: | |
| x, y = map(int, cell_key.split(",")) | |
| solution_value = env._sumdoku.board.get_cell(x, y).solution_value | |
| action = KillerSudokuAction( | |
| action_type="enter_answer", | |
| x=x, y=y, | |
| value=solution_value, | |
| ) | |
| obs = env.step(action) | |
| solved += 1 | |
| if obs.done: | |
| break | |
| print(f" Solved {solved} cells, final reward: {obs.reward}, " | |
| f"episode_total: {env._episode_reward:.1f}, done: {obs.done}") | |
| print(f" Game history: {len(env._game_rewards)} recorded " | |
| f"(progression check at 5+)") | |
| print(f"\nFinal state: n={env._n}, difficulty={env._difficulty}") | |
| if env._difficulty > 15 or env._n > 9: | |
| print(" Difficulty increased as expected!") | |
| else: | |
| print(" (Difficulty may not have changed if fewer than 5 full games completed)") | |
| def demo_complete_puzzle() -> None: | |
| """Solve a complete puzzle to verify completion bonus.""" | |
| print_section("COMPLETE PUZZLE DEMO") | |
| env = KillerSudokuEnvironment() | |
| obs = env.reset() | |
| empty_cells = list(obs.candidates.keys()) | |
| print(f"Puzzle: {obs.n}x{obs.n}, difficulty={obs.difficulty}, " | |
| f"empty_cells={len(empty_cells)}") | |
| print(f"\nSolving all {len(empty_cells)} cells...") | |
| total_reward = 0.0 | |
| for cell_key in empty_cells: | |
| x, y = map(int, cell_key.split(",")) | |
| solution_value = env._sumdoku.board.get_cell(x, y).solution_value | |
| action = KillerSudokuAction( | |
| action_type="enter_answer", | |
| x=x, y=y, | |
| value=solution_value, | |
| ) | |
| obs = env.step(action) | |
| total_reward += obs.reward | |
| if obs.done: | |
| break | |
| print(f"Result: {obs.action_result}") | |
| print(f"Total reward: {total_reward:.1f} " | |
| f"(expected: {len(empty_cells)}.0 correct + 5.0 bonus = {len(empty_cells) + 5}.0)") | |
| print(f"Done: {obs.done}") | |
| print(f"\nFinal board:\n{obs.board_display}") | |
| def demo_step_limit() -> None: | |
| """Demonstrate step limit termination.""" | |
| print_section("STEP LIMIT DEMO") | |
| env = KillerSudokuEnvironment() | |
| obs = env.reset() | |
| # Override max_steps to a small number for demo purposes | |
| env._max_steps = 10 | |
| print(f"Set max_steps to {env._max_steps} (normally {env._n * env._n * 10})") | |
| empty_cells = list(obs.candidates.keys()) | |
| x, y = map(int, empty_cells[0].split(",")) | |
| print(f"Spamming propose_candidates for 10 steps...") | |
| for i in range(10): | |
| action = KillerSudokuAction( | |
| action_type="propose_candidates", | |
| x=x, y=y, | |
| values=[1, 2, 3], | |
| ) | |
| obs = env.step(action) | |
| if obs.done: | |
| print(f" Step {i+1}: TERMINATED") | |
| print(f" Result: {obs.action_result}") | |
| print(f" Reward: {obs.reward} (expected: -10.0)") | |
| print(f" Done: {obs.done} (expected: True)") | |
| break | |
| else: | |
| print(f" Step {i+1}: reward={obs.reward}") | |
| print(f"\nEpisode total reward: {env._episode_reward:.1f} (expected: negative)") | |
| assert obs.done, "Episode should have terminated" | |
| assert env._episode_reward < 0, f"Total reward should be negative, got {env._episode_reward}" | |
| print(" Confirmed: episode terminates with negative total reward!") | |
| def main() -> None: | |
| """Run all demos.""" | |
| demo_basic_gameplay() | |
| demo_malformed_actions() | |
| demo_thinking_reward_decay() | |
| demo_five_wrong_termination() | |
| demo_complete_puzzle() | |
| demo_step_limit() | |
| demo_difficulty_progression() | |
| print_section("ALL DEMOS COMPLETE") | |
| if __name__ == "__main__": | |
| main() | |