Spaces:

arnavster1
/

killer_sudoku_env

Runtime error

App Files Files Community

killer_sudoku_env / test.py

arnavster1

Upload folder using huggingface_hub

95b25ca verified about 2 months ago

raw

history blame contribute delete

12.1 kB

	"""Demo script for the Killer Sudoku Environment.

	Directly instantiates the environment (no Docker needed) and exercises
	all 3 action types, reward mechanics, and difficulty progression.
	"""

	import sys
	import os

	# Add parent directory so killer_sudoku_env is importable as a package,
	# and also the current directory so server-side `from models import ...` works.
	_this_dir = os.path.dirname(os.path.abspath(__file__))
	sys.path.insert(0, os.path.dirname(_this_dir)) # parent (for package import)
	sys.path.insert(0, _this_dir) # current (for server-side `from models import`)

	from killer_sudoku_env.models import KillerSudokuAction, KillerSudokuObservation
	from killer_sudoku_env.server.killer_sudoku_env_environment import KillerSudokuEnvironment
	from killer_sudoku_env.server.sumdoku import Sumdoku

	# For test speed, always use non-unique puzzles (skips expensive uniqueness solver)
	_original_reset = KillerSudokuEnvironment.reset

	def _fast_reset(self):
	"""Patched reset that forces unique=False for fast puzzle generation."""
	import random as _random
	_orig_random = _random.random
	_random.random = lambda: 0.99 # > 0.95 → unique=False
	try:
	return _original_reset(self)
	finally:
	_random.random = _orig_random

	KillerSudokuEnvironment.reset = _fast_reset


	def print_section(title: str) -> None:
	print(f"\n{'=' * 60}")
	print(f" {title}")
	print(f"{'=' * 60}\n")


	def demo_basic_gameplay() -> None:
	"""Demonstrate basic gameplay with all 3 action types."""
	print_section("BASIC GAMEPLAY DEMO")

	env = KillerSudokuEnvironment()

	# --- Reset ---
	obs = env.reset()
	print("--- Reset ---")
	print(f"Board size: {obs.n}x{obs.n}, Difficulty: {obs.difficulty}")
	print(f"Action result: {obs.action_result}")
	print(f"Number of empty cells with candidates: {len(obs.candidates)}")
	print(f"\nBoard:\n{obs.board_display}")

	# Show first few lines of rules
	rules_lines = obs.rules_prompt.split('\n')
	print(f"Rules (first 5 lines):")
	for line in rules_lines[:5]:
	print(f" {line}")
	print(" ...")

	# Find an empty cell from candidates
	empty_cells = list(obs.candidates.keys())
	if not empty_cells:
	print("No empty cells found!")
	return

	first_cell = empty_cells[0]
	x, y = map(int, first_cell.split(","))
	print(f"\nUsing cell ({x}, {y}) for demos. Initial candidates: {obs.candidates[first_cell]}")

	# --- Action 1: propose_candidates ---
	print("\n--- propose_candidates ---")
	action = KillerSudokuAction(
	action_type="propose_candidates",
	x=x, y=y,
	values=[1, 2, 3],
	)
	obs = env.step(action)
	print(f"Result: {obs.action_result}")
	print(f"Reward: {obs.reward}")
	print(f"Candidates for ({x},{y}): {obs.candidates.get(first_cell)}")

	# --- Action 2: eliminate_candidate ---
	print("\n--- eliminate_candidate ---")
	action = KillerSudokuAction(
	action_type="eliminate_candidate",
	x=x, y=y,
	values=[3],
	justification="row_constraint",
	)
	obs = env.step(action)
	print(f"Result: {obs.action_result}")
	print(f"Reward: {obs.reward}")
	print(f"Candidates for ({x},{y}): {obs.candidates.get(first_cell)}")

	# --- Action 3: enter_answer (correct) ---
	# Get the correct answer from the solution
	solution_value = env._sumdoku.board.get_cell(x, y).solution_value
	print(f"\n--- enter_answer (correct: value={solution_value}) ---")
	action = KillerSudokuAction(
	action_type="enter_answer",
	x=x, y=y,
	value=solution_value,
	)
	obs = env.step(action)
	print(f"Result: {obs.action_result}")
	print(f"Reward: {obs.reward}")
	print(f"Done: {obs.done}")

	# --- Action 4: enter_answer (incorrect) ---
	# Find another empty cell
	empty_cells_2 = [k for k in obs.candidates.keys()]
	if empty_cells_2:
	cell2 = empty_cells_2[0]
	x2, y2 = map(int, cell2.split(","))
	correct_val = env._sumdoku.board.get_cell(x2, y2).solution_value
	# Pick an incorrect value
	wrong_val = 1 if correct_val != 1 else 2

	print(f"\n--- enter_answer (incorrect: value={wrong_val} at ({x2},{y2})) ---")
	action = KillerSudokuAction(
	action_type="enter_answer",
	x=x2, y=y2,
	value=wrong_val,
	)
	obs = env.step(action)
	print(f"Result: {obs.action_result}")
	print(f"Reward: {obs.reward}")
	print(f"Incorrect answers: {obs.incorrect_answers}")
	print(f"Done: {obs.done}")


	def demo_malformed_actions() -> None:
	"""Demonstrate malformed action penalties."""
	print_section("MALFORMED ACTION DEMO")

	env = KillerSudokuEnvironment()
	obs = env.reset()

	# Missing value for enter_answer
	print("--- enter_answer without value ---")
	action = KillerSudokuAction(
	action_type="enter_answer",
	x=0, y=0,
	value=None,
	)
	obs = env.step(action)
	print(f"Result: {obs.action_result}")
	print(f"Reward: {obs.reward} (expected: -3.0)")

	# Out of range coordinates
	print("\n--- Out of range coordinates ---")
	action = KillerSudokuAction(
	action_type="propose_candidates",
	x=99, y=99,
	values=[1, 2],
	)
	obs = env.step(action)
	print(f"Result: {obs.action_result}")
	print(f"Reward: {obs.reward} (expected: -3.0)")

	# eliminate_candidate without justification
	print("\n--- eliminate_candidate without justification ---")
	empty_cells = list(obs.candidates.keys())
	if empty_cells:
	x, y = map(int, empty_cells[0].split(","))
	action = KillerSudokuAction(
	action_type="eliminate_candidate",
	x=x, y=y,
	values=[1],
	justification=None,
	)
	obs = env.step(action)
	print(f"Result: {obs.action_result}")
	print(f"Reward: {obs.reward} (expected: -3.0)")


	def demo_thinking_reward_decay() -> None:
	"""Demonstrate that thinking rewards decay after 6 steps."""
	print_section("THINKING REWARD DECAY DEMO")

	env = KillerSudokuEnvironment()
	obs = env.reset()

	empty_cells = list(obs.candidates.keys())
	print("Proposing candidates for 8 consecutive cells:")
	for i, cell_key in enumerate(empty_cells[:8]):
	x, y = map(int, cell_key.split(","))
	action = KillerSudokuAction(
	action_type="propose_candidates",
	x=x, y=y,
	values=[1, 2, 3],
	)
	obs = env.step(action)
	expected = 0.1 if i < 6 else 0.0
	status = "OK" if abs(obs.reward - expected) < 0.001 else "MISMATCH"
	print(f" Step {i+1}: reward={obs.reward:.1f} (expected {expected:.1f}) [{status}]")


	def demo_five_wrong_termination() -> None:
	"""Demonstrate game termination after 5 incorrect answers."""
	print_section("5 WRONG ANSWERS TERMINATION DEMO")

	env = KillerSudokuEnvironment()
	obs = env.reset()

	empty_cells = list(obs.candidates.keys())
	print(f"Entering 5 wrong answers on different cells:")

	for i in range(5):
	if i >= len(empty_cells):
	break
	cell_key = empty_cells[i]
	x, y = map(int, cell_key.split(","))
	correct_val = env._sumdoku.board.get_cell(x, y).solution_value
	wrong_val = 1 if correct_val != 1 else 2

	action = KillerSudokuAction(
	action_type="enter_answer",
	x=x, y=y,
	value=wrong_val,
	)
	obs = env.step(action)
	print(f" Wrong #{i+1}: reward={obs.reward}, incorrect={obs.incorrect_answers}, done={obs.done}")

	print(f"\nFinal reward on 5th wrong: {obs.reward} (expected: -15.0 = -5.0 + -10.0 penalty)")
	print(f"Done: {obs.done} (expected: True)")


	def demo_difficulty_progression() -> None:
	"""Demonstrate difficulty progression across multiple games."""
	print_section("DIFFICULTY PROGRESSION DEMO")

	env = KillerSudokuEnvironment()
	print(f"Starting: n={env._n}, difficulty={env._difficulty}")

	for game in range(7):
	obs = env.reset()
	print(f"\nGame {game + 1}: n={obs.n}, difficulty={obs.difficulty}, "
	f"empty_cells={len(obs.candidates)}")

	# Solve everything correctly to push avg_ratio high
	empty_cells = list(obs.candidates.keys())
	solved = 0
	for cell_key in empty_cells:
	x, y = map(int, cell_key.split(","))
	solution_value = env._sumdoku.board.get_cell(x, y).solution_value
	action = KillerSudokuAction(
	action_type="enter_answer",
	x=x, y=y,
	value=solution_value,
	)
	obs = env.step(action)
	solved += 1
	if obs.done:
	break

	print(f" Solved {solved} cells, final reward: {obs.reward}, "
	f"episode_total: {env._episode_reward:.1f}, done: {obs.done}")
	print(f" Game history: {len(env._game_rewards)} recorded "
	f"(progression check at 5+)")

	print(f"\nFinal state: n={env._n}, difficulty={env._difficulty}")
	if env._difficulty > 15 or env._n > 9:
	print(" Difficulty increased as expected!")
	else:
	print(" (Difficulty may not have changed if fewer than 5 full games completed)")


	def demo_complete_puzzle() -> None:
	"""Solve a complete puzzle to verify completion bonus."""
	print_section("COMPLETE PUZZLE DEMO")

	env = KillerSudokuEnvironment()
	obs = env.reset()

	empty_cells = list(obs.candidates.keys())
	print(f"Puzzle: {obs.n}x{obs.n}, difficulty={obs.difficulty}, "
	f"empty_cells={len(empty_cells)}")
	print(f"\nSolving all {len(empty_cells)} cells...")

	total_reward = 0.0
	for cell_key in empty_cells:
	x, y = map(int, cell_key.split(","))
	solution_value = env._sumdoku.board.get_cell(x, y).solution_value
	action = KillerSudokuAction(
	action_type="enter_answer",
	x=x, y=y,
	value=solution_value,
	)
	obs = env.step(action)
	total_reward += obs.reward
	if obs.done:
	break

	print(f"Result: {obs.action_result}")
	print(f"Total reward: {total_reward:.1f} "
	f"(expected: {len(empty_cells)}.0 correct + 5.0 bonus = {len(empty_cells) + 5}.0)")
	print(f"Done: {obs.done}")
	print(f"\nFinal board:\n{obs.board_display}")


	def demo_step_limit() -> None:
	"""Demonstrate step limit termination."""
	print_section("STEP LIMIT DEMO")

	env = KillerSudokuEnvironment()
	obs = env.reset()

	# Override max_steps to a small number for demo purposes
	env._max_steps = 10
	print(f"Set max_steps to {env._max_steps} (normally {env._n * env._n * 10})")

	empty_cells = list(obs.candidates.keys())
	x, y = map(int, empty_cells[0].split(","))

	print(f"Spamming propose_candidates for 10 steps...")
	for i in range(10):
	action = KillerSudokuAction(
	action_type="propose_candidates",
	x=x, y=y,
	values=[1, 2, 3],
	)
	obs = env.step(action)
	if obs.done:
	print(f" Step {i+1}: TERMINATED")
	print(f" Result: {obs.action_result}")
	print(f" Reward: {obs.reward} (expected: -10.0)")
	print(f" Done: {obs.done} (expected: True)")
	break
	else:
	print(f" Step {i+1}: reward={obs.reward}")

	print(f"\nEpisode total reward: {env._episode_reward:.1f} (expected: negative)")
	assert obs.done, "Episode should have terminated"
	assert env._episode_reward < 0, f"Total reward should be negative, got {env._episode_reward}"
	print(" Confirmed: episode terminates with negative total reward!")


	def main() -> None:
	"""Run all demos."""
	demo_basic_gameplay()
	demo_malformed_actions()
	demo_thinking_reward_decay()
	demo_five_wrong_termination()
	demo_complete_puzzle()
	demo_step_limit()
	demo_difficulty_progression()

	print_section("ALL DEMOS COMPLETE")


	if __name__ == "__main__":
	main()