arnavster1's picture
Upload folder using huggingface_hub
95b25ca verified
"""Demo script for the Killer Sudoku Environment.
Directly instantiates the environment (no Docker needed) and exercises
all 3 action types, reward mechanics, and difficulty progression.
"""
import sys
import os
# Add parent directory so killer_sudoku_env is importable as a package,
# and also the current directory so server-side `from models import ...` works.
_this_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.dirname(_this_dir)) # parent (for package import)
sys.path.insert(0, _this_dir) # current (for server-side `from models import`)
from killer_sudoku_env.models import KillerSudokuAction, KillerSudokuObservation
from killer_sudoku_env.server.killer_sudoku_env_environment import KillerSudokuEnvironment
from killer_sudoku_env.server.sumdoku import Sumdoku
# For test speed, always use non-unique puzzles (skips expensive uniqueness solver)
_original_reset = KillerSudokuEnvironment.reset
def _fast_reset(self):
"""Patched reset that forces unique=False for fast puzzle generation."""
import random as _random
_orig_random = _random.random
_random.random = lambda: 0.99 # > 0.95 → unique=False
try:
return _original_reset(self)
finally:
_random.random = _orig_random
KillerSudokuEnvironment.reset = _fast_reset
def print_section(title: str) -> None:
print(f"\n{'=' * 60}")
print(f" {title}")
print(f"{'=' * 60}\n")
def demo_basic_gameplay() -> None:
"""Demonstrate basic gameplay with all 3 action types."""
print_section("BASIC GAMEPLAY DEMO")
env = KillerSudokuEnvironment()
# --- Reset ---
obs = env.reset()
print("--- Reset ---")
print(f"Board size: {obs.n}x{obs.n}, Difficulty: {obs.difficulty}")
print(f"Action result: {obs.action_result}")
print(f"Number of empty cells with candidates: {len(obs.candidates)}")
print(f"\nBoard:\n{obs.board_display}")
# Show first few lines of rules
rules_lines = obs.rules_prompt.split('\n')
print(f"Rules (first 5 lines):")
for line in rules_lines[:5]:
print(f" {line}")
print(" ...")
# Find an empty cell from candidates
empty_cells = list(obs.candidates.keys())
if not empty_cells:
print("No empty cells found!")
return
first_cell = empty_cells[0]
x, y = map(int, first_cell.split(","))
print(f"\nUsing cell ({x}, {y}) for demos. Initial candidates: {obs.candidates[first_cell]}")
# --- Action 1: propose_candidates ---
print("\n--- propose_candidates ---")
action = KillerSudokuAction(
action_type="propose_candidates",
x=x, y=y,
values=[1, 2, 3],
)
obs = env.step(action)
print(f"Result: {obs.action_result}")
print(f"Reward: {obs.reward}")
print(f"Candidates for ({x},{y}): {obs.candidates.get(first_cell)}")
# --- Action 2: eliminate_candidate ---
print("\n--- eliminate_candidate ---")
action = KillerSudokuAction(
action_type="eliminate_candidate",
x=x, y=y,
values=[3],
justification="row_constraint",
)
obs = env.step(action)
print(f"Result: {obs.action_result}")
print(f"Reward: {obs.reward}")
print(f"Candidates for ({x},{y}): {obs.candidates.get(first_cell)}")
# --- Action 3: enter_answer (correct) ---
# Get the correct answer from the solution
solution_value = env._sumdoku.board.get_cell(x, y).solution_value
print(f"\n--- enter_answer (correct: value={solution_value}) ---")
action = KillerSudokuAction(
action_type="enter_answer",
x=x, y=y,
value=solution_value,
)
obs = env.step(action)
print(f"Result: {obs.action_result}")
print(f"Reward: {obs.reward}")
print(f"Done: {obs.done}")
# --- Action 4: enter_answer (incorrect) ---
# Find another empty cell
empty_cells_2 = [k for k in obs.candidates.keys()]
if empty_cells_2:
cell2 = empty_cells_2[0]
x2, y2 = map(int, cell2.split(","))
correct_val = env._sumdoku.board.get_cell(x2, y2).solution_value
# Pick an incorrect value
wrong_val = 1 if correct_val != 1 else 2
print(f"\n--- enter_answer (incorrect: value={wrong_val} at ({x2},{y2})) ---")
action = KillerSudokuAction(
action_type="enter_answer",
x=x2, y=y2,
value=wrong_val,
)
obs = env.step(action)
print(f"Result: {obs.action_result}")
print(f"Reward: {obs.reward}")
print(f"Incorrect answers: {obs.incorrect_answers}")
print(f"Done: {obs.done}")
def demo_malformed_actions() -> None:
"""Demonstrate malformed action penalties."""
print_section("MALFORMED ACTION DEMO")
env = KillerSudokuEnvironment()
obs = env.reset()
# Missing value for enter_answer
print("--- enter_answer without value ---")
action = KillerSudokuAction(
action_type="enter_answer",
x=0, y=0,
value=None,
)
obs = env.step(action)
print(f"Result: {obs.action_result}")
print(f"Reward: {obs.reward} (expected: -3.0)")
# Out of range coordinates
print("\n--- Out of range coordinates ---")
action = KillerSudokuAction(
action_type="propose_candidates",
x=99, y=99,
values=[1, 2],
)
obs = env.step(action)
print(f"Result: {obs.action_result}")
print(f"Reward: {obs.reward} (expected: -3.0)")
# eliminate_candidate without justification
print("\n--- eliminate_candidate without justification ---")
empty_cells = list(obs.candidates.keys())
if empty_cells:
x, y = map(int, empty_cells[0].split(","))
action = KillerSudokuAction(
action_type="eliminate_candidate",
x=x, y=y,
values=[1],
justification=None,
)
obs = env.step(action)
print(f"Result: {obs.action_result}")
print(f"Reward: {obs.reward} (expected: -3.0)")
def demo_thinking_reward_decay() -> None:
"""Demonstrate that thinking rewards decay after 6 steps."""
print_section("THINKING REWARD DECAY DEMO")
env = KillerSudokuEnvironment()
obs = env.reset()
empty_cells = list(obs.candidates.keys())
print("Proposing candidates for 8 consecutive cells:")
for i, cell_key in enumerate(empty_cells[:8]):
x, y = map(int, cell_key.split(","))
action = KillerSudokuAction(
action_type="propose_candidates",
x=x, y=y,
values=[1, 2, 3],
)
obs = env.step(action)
expected = 0.1 if i < 6 else 0.0
status = "OK" if abs(obs.reward - expected) < 0.001 else "MISMATCH"
print(f" Step {i+1}: reward={obs.reward:.1f} (expected {expected:.1f}) [{status}]")
def demo_five_wrong_termination() -> None:
"""Demonstrate game termination after 5 incorrect answers."""
print_section("5 WRONG ANSWERS TERMINATION DEMO")
env = KillerSudokuEnvironment()
obs = env.reset()
empty_cells = list(obs.candidates.keys())
print(f"Entering 5 wrong answers on different cells:")
for i in range(5):
if i >= len(empty_cells):
break
cell_key = empty_cells[i]
x, y = map(int, cell_key.split(","))
correct_val = env._sumdoku.board.get_cell(x, y).solution_value
wrong_val = 1 if correct_val != 1 else 2
action = KillerSudokuAction(
action_type="enter_answer",
x=x, y=y,
value=wrong_val,
)
obs = env.step(action)
print(f" Wrong #{i+1}: reward={obs.reward}, incorrect={obs.incorrect_answers}, done={obs.done}")
print(f"\nFinal reward on 5th wrong: {obs.reward} (expected: -15.0 = -5.0 + -10.0 penalty)")
print(f"Done: {obs.done} (expected: True)")
def demo_difficulty_progression() -> None:
"""Demonstrate difficulty progression across multiple games."""
print_section("DIFFICULTY PROGRESSION DEMO")
env = KillerSudokuEnvironment()
print(f"Starting: n={env._n}, difficulty={env._difficulty}")
for game in range(7):
obs = env.reset()
print(f"\nGame {game + 1}: n={obs.n}, difficulty={obs.difficulty}, "
f"empty_cells={len(obs.candidates)}")
# Solve everything correctly to push avg_ratio high
empty_cells = list(obs.candidates.keys())
solved = 0
for cell_key in empty_cells:
x, y = map(int, cell_key.split(","))
solution_value = env._sumdoku.board.get_cell(x, y).solution_value
action = KillerSudokuAction(
action_type="enter_answer",
x=x, y=y,
value=solution_value,
)
obs = env.step(action)
solved += 1
if obs.done:
break
print(f" Solved {solved} cells, final reward: {obs.reward}, "
f"episode_total: {env._episode_reward:.1f}, done: {obs.done}")
print(f" Game history: {len(env._game_rewards)} recorded "
f"(progression check at 5+)")
print(f"\nFinal state: n={env._n}, difficulty={env._difficulty}")
if env._difficulty > 15 or env._n > 9:
print(" Difficulty increased as expected!")
else:
print(" (Difficulty may not have changed if fewer than 5 full games completed)")
def demo_complete_puzzle() -> None:
"""Solve a complete puzzle to verify completion bonus."""
print_section("COMPLETE PUZZLE DEMO")
env = KillerSudokuEnvironment()
obs = env.reset()
empty_cells = list(obs.candidates.keys())
print(f"Puzzle: {obs.n}x{obs.n}, difficulty={obs.difficulty}, "
f"empty_cells={len(empty_cells)}")
print(f"\nSolving all {len(empty_cells)} cells...")
total_reward = 0.0
for cell_key in empty_cells:
x, y = map(int, cell_key.split(","))
solution_value = env._sumdoku.board.get_cell(x, y).solution_value
action = KillerSudokuAction(
action_type="enter_answer",
x=x, y=y,
value=solution_value,
)
obs = env.step(action)
total_reward += obs.reward
if obs.done:
break
print(f"Result: {obs.action_result}")
print(f"Total reward: {total_reward:.1f} "
f"(expected: {len(empty_cells)}.0 correct + 5.0 bonus = {len(empty_cells) + 5}.0)")
print(f"Done: {obs.done}")
print(f"\nFinal board:\n{obs.board_display}")
def demo_step_limit() -> None:
"""Demonstrate step limit termination."""
print_section("STEP LIMIT DEMO")
env = KillerSudokuEnvironment()
obs = env.reset()
# Override max_steps to a small number for demo purposes
env._max_steps = 10
print(f"Set max_steps to {env._max_steps} (normally {env._n * env._n * 10})")
empty_cells = list(obs.candidates.keys())
x, y = map(int, empty_cells[0].split(","))
print(f"Spamming propose_candidates for 10 steps...")
for i in range(10):
action = KillerSudokuAction(
action_type="propose_candidates",
x=x, y=y,
values=[1, 2, 3],
)
obs = env.step(action)
if obs.done:
print(f" Step {i+1}: TERMINATED")
print(f" Result: {obs.action_result}")
print(f" Reward: {obs.reward} (expected: -10.0)")
print(f" Done: {obs.done} (expected: True)")
break
else:
print(f" Step {i+1}: reward={obs.reward}")
print(f"\nEpisode total reward: {env._episode_reward:.1f} (expected: negative)")
assert obs.done, "Episode should have terminated"
assert env._episode_reward < 0, f"Total reward should be negative, got {env._episode_reward}"
print(" Confirmed: episode terminates with negative total reward!")
def main() -> None:
"""Run all demos."""
demo_basic_gameplay()
demo_malformed_actions()
demo_thinking_reward_decay()
demo_five_wrong_termination()
demo_complete_puzzle()
demo_step_limit()
demo_difficulty_progression()
print_section("ALL DEMOS COMPLETE")
if __name__ == "__main__":
main()