Spaces:

Mooizz
/

New-space-openenv

Sleeping

App Files Files Community

New-space-openenv / plugins /tests /test_codenames_plugin.py

Mooizz

Upload folder using huggingface_hub

1070765 verified 6 days ago

raw

history blame contribute delete

22.8 kB

	"""Tests for the Codenames multi-agent plugin.

	Tests cover:
	- Plugin registration and basic methods
	- Configuration validation
	- Game state transitions (using mock board)
	- Word interactions serialization
	- Conversation log (matching Cicero pattern)
	- Integration tests

	Uses shared local Qwen3 8B game-play model from avalon/llm.py.
	Tests marked with SKIP_WITHOUT_GPU require a GPU for the local model.
	"""

	from __future__ import annotations

	import pytest
	from watchdog_env.plugins.codenames.codenames_config import CodenamesConfig, CODENAMES_AGENTS
	from watchdog_env.plugins.codenames.codenames_plugin import CodenamesPlugin
	from watchdog_env.plugins.codenames.board_generator import (
	generate_board,
	BoardAssignment,
	BoardGenerationError,
	)
	from watchdog_env.plugins.codenames.game_state import CodenamesGameState, ClueRecord, GuessRecord
	from watchdog_env.plugins.codenames.word_interactions import (
	WordInteractions,
	WordRelation,
	ThematicCluster,
	)
	from watchdog_env.plugins.codenames.agents import (
	CodenamesAgent,
	create_agents,
	ClueAction,
	GuessAction,
	AgentActionError,
	)
	from watchdog_env.plugins.codenames.game_runner import (
	CodenamesGameRunner,
	GameRecord,
	)
	from watchdog_env.plugins.base import get_conversation_log
	from watchdog_env.plugins.registry import get_plugin, list_game_ids


	def _has_gpu():
	"""Check if GPU is available for local model."""
	try:
	import torch
	return torch.cuda.is_available()
	except ImportError:
	return False


	# Skip tests that require GPU for local model
	SKIP_WITHOUT_GPU = pytest.mark.skipif(
	not _has_gpu(),
	reason="GPU not available for local model"
	)


	def _create_mock_board() -> BoardAssignment:
	"""Create a mock board for testing without API."""
	words = [
	"APPLE", "BANK", "CARD", "DOOR", "ENGINE",
	"FIRE", "GHOST", "HOTEL", "ICE", "JAZZ",
	"KING", "LAMP", "MOON", "NIGHT", "OPERA",
	"PIANO", "QUEEN", "RIVER", "STAR", "TREE",
	"UMBRELLA", "VIOLIN", "WATER", "YARD", "ZEBRA",
	]

	assignments = {}
	# Assassin
	assignments["ZEBRA"] = "assassin"
	# Red team (9)
	for w in words[0:9]:
	assignments[w] = "red"
	# Blue team (8)
	for w in words[9:17]:
	assignments[w] = "blue"
	# Neutral (7)
	for w in words[17:24]:
	assignments[w] = "neutral"

	interactions = WordInteractions(words=words)

	grid = [words[i5:(i+1)5] for i in range(5)]

	return BoardAssignment(
	words=words,
	assignments=assignments,
	interactions=interactions,
	grid=grid,
	)


	# ============================================================================
	# Plugin Registration Tests
	# ============================================================================

	class TestPluginRegistration:
	"""Test plugin registration and basic interface."""

	def test_plugin_registered(self):
	"""Codenames plugin should be registered."""
	assert "codenames" in list_game_ids()

	def test_get_plugin(self):
	"""Should be able to get Codenames plugin by ID."""
	plugin = get_plugin("codenames")
	assert plugin is not None
	assert plugin.get_game_id() == "codenames"

	def test_game_id(self):
	"""Plugin should return correct game ID."""
	plugin = CodenamesPlugin()
	assert plugin.get_game_id() == "codenames"

	def test_display_name(self):
	"""Plugin should return display name."""
	plugin = CodenamesPlugin()
	assert "Codenames" in plugin.get_display_name()

	def test_list_agent_ids(self):
	"""Plugin should list all 4 agent IDs."""
	plugin = CodenamesPlugin()
	agents = plugin.list_agent_ids()
	assert len(agents) == 4
	assert "red_spymaster" in agents
	assert "red_operative" in agents
	assert "blue_spymaster" in agents
	assert "blue_operative" in agents


	# ============================================================================
	# Configuration Tests
	# ============================================================================

	class TestCodenamesConfig:
	"""Test game configuration."""

	def test_default_config(self):
	"""Default config should have valid values."""
	config = CodenamesConfig()
	assert config.board_size == 25
	assert config.red_words == 9
	assert config.blue_words == 8
	assert config.neutral_words == 7
	assert config.assassin_words == 1
	assert config.starting_team == "red"
	assert config.complexity_level == 2

	def test_validate_correct_total(self):
	"""Config with correct word totals should validate."""
	config = CodenamesConfig()
	config.validate() # Should not raise

	def test_validate_incorrect_total(self):
	"""Config with incorrect word totals should fail validation."""
	config = CodenamesConfig(red_words=10) # Total = 26 != 25
	with pytest.raises(ValueError, match="must equal board_size"):
	config.validate()

	def test_validate_invalid_team(self):
	"""Config with invalid starting team should fail validation."""
	config = CodenamesConfig(starting_team="green") # type: ignore
	with pytest.raises(ValueError, match="must be 'red' or 'blue'"):
	config.validate()

	def test_validate_invalid_complexity(self):
	"""Config with invalid complexity should fail validation."""
	config = CodenamesConfig(complexity_level=5)
	with pytest.raises(ValueError, match="must be 1, 2, or 3"):
	config.validate()


	# ============================================================================
	# Board Generation Tests
	# ============================================================================

	@SKIP_WITHOUT_GPU
	class TestBoardGeneration:
	"""Test board generation functionality (requires API)."""

	def test_generate_board_basic(self):
	"""Should generate board with basic complexity."""
	board = generate_board(seed=42, complexity_level=1)
	assert len(board.words) == 25
	assert len(board.assignments) == 25
	assert len(board.grid) == 5
	assert all(len(row) == 5 for row in board.grid)

	def test_generate_board_medium(self):
	"""Should generate board with medium complexity."""
	board = generate_board(seed=42, complexity_level=2)
	assert len(board.words) == 25
	assert board.interactions is not None

	def test_generate_board_complex(self):
	"""Should generate board with complex complexity."""
	board = generate_board(seed=42, complexity_level=3)
	assert len(board.words) == 25
	assert board.interactions is not None

	def test_board_assignments_correct_counts(self):
	"""Board should have correct number of each word type."""
	board = generate_board(seed=42)

	red_count = sum(1 for t in board.assignments.values() if t == "red")
	blue_count = sum(1 for t in board.assignments.values() if t == "blue")
	neutral_count = sum(1 for t in board.assignments.values() if t == "neutral")
	assassin_count = sum(1 for t in board.assignments.values() if t == "assassin")

	assert red_count == 9
	assert blue_count == 8
	assert neutral_count == 7
	assert assassin_count == 1


	# ============================================================================
	# Word Interactions Tests
	# ============================================================================

	class TestWordInteractions:
	"""Test word interaction functionality."""

	def test_word_interactions_serialization(self):
	"""Word interactions should serialize and deserialize correctly."""
	original = WordInteractions(
	words=["BANK", "RIVER", "CURRENT"],
	relations={
	"BANK": WordRelation("BANK", ["RIVER"], "polyseme", ["finance", "nature"]),
	},
	clusters=[ThematicCluster("water", ["BANK", "RIVER", "CURRENT"])],
	polysemes=["BANK", "CURRENT"],
	false_friends=[("BANK", "SUIT")],
	assassin_traps=["CURRENT"],
	)

	data = original.to_dict()
	restored = WordInteractions.from_dict(data)

	assert restored.words == original.words
	assert restored.polysemes == original.polysemes
	assert len(restored.clusters) == len(original.clusters)
	assert len(restored.relations) == len(original.relations)

	def test_get_related_words(self):
	"""Should get related words for a word."""
	interactions = WordInteractions(
	words=["BANK", "RIVER"],
	relations={
	"BANK": WordRelation("BANK", ["RIVER", "CURRENT"], "polyseme"),
	},
	)

	related = interactions.get_related_words("BANK")
	assert "RIVER" in related
	assert "CURRENT" in related

	def test_get_trap_level(self):
	"""Should get trap level for a word."""
	interactions = WordInteractions(
	words=["BANK"],
	relations={
	"BANK": WordRelation("BANK", [], "polyseme", [], trap_level=2),
	},
	)

	assert interactions.get_trap_level("BANK") == 2
	assert interactions.get_trap_level("UNKNOWN") == 0


	# ============================================================================
	# Game State Tests (using mock board)
	# ============================================================================

	class TestGameState:
	"""Test game state management using mock board."""

	@pytest.fixture
	def game_state(self):
	"""Create a game state for testing."""
	board = _create_mock_board()
	return CodenamesGameState(board=board)

	def test_initial_state(self, game_state):
	"""Initial state should be correct."""
	assert game_state.current_team == "red"
	assert game_state.current_phase == "clue"
	assert game_state.turn_number == 0
	assert not game_state.game_over
	assert game_state.winner is None

	def test_get_remaining_words(self, game_state):
	"""Should get correct remaining words for each team."""
	red_words = game_state.get_remaining_words("red")
	blue_words = game_state.get_remaining_words("blue")

	assert len(red_words) == 9
	assert len(blue_words) == 8

	def test_process_clue(self, game_state):
	"""Processing clue should update state correctly."""
	game_state.process_clue("ANIMAL", 3, "Three animal words")

	assert game_state.current_phase == "guess"
	assert game_state.current_clue is not None
	assert game_state.current_clue.clue_word == "ANIMAL"
	assert game_state.current_clue.clue_number == 3
	assert len(game_state.clue_history) == 1

	def test_process_correct_guess(self, game_state):
	"""Correct guess should allow continued guessing."""
	game_state.process_clue("TEST", 2)

	# Get a red word
	red_words = game_state.get_remaining_words("red")
	word = red_words[0]

	can_continue, message = game_state.process_guess(word)

	assert can_continue
	assert word in game_state.revealed_words
	assert len(game_state.get_remaining_words("red")) == 8

	def test_process_wrong_guess_ends_turn(self, game_state):
	"""Wrong guess should end turn."""
	game_state.process_clue("TEST", 2)

	# Get a blue word (wrong for red team)
	blue_words = game_state.get_remaining_words("blue")
	word = blue_words[0]

	can_continue, message = game_state.process_guess(word)

	assert not can_continue
	assert word in game_state.revealed_words

	def test_assassin_ends_game(self, game_state):
	"""Hitting assassin should end game."""
	game_state.process_clue("TEST", 1)

	# Get assassin word
	assassin_word = [w for w, t in game_state.board.assignments.items() if t == "assassin"][0]

	can_continue, message = game_state.process_guess(assassin_word)

	assert not can_continue
	assert game_state.game_over
	assert game_state.winner == "blue" # Opponent wins
	assert game_state.game_over_reason == "assassin"

	def test_all_words_found_wins(self, game_state):
	"""Finding all team words should win the game."""
	game_state.process_clue("TEST", 10) # High number for testing

	red_words = list(game_state.get_remaining_words("red"))
	for word in red_words:
	can_continue, message = game_state.process_guess(word)
	if game_state.game_over:
	break

	assert game_state.game_over
	assert game_state.winner == "red"
	assert game_state.game_over_reason == "all_words"

	def test_end_turn_switches_team(self, game_state):
	"""Ending turn should switch to other team."""
	assert game_state.current_team == "red"

	game_state.process_clue("TEST", 1)
	game_state.end_turn()

	assert game_state.current_team == "blue"
	assert game_state.current_phase == "clue"
	assert game_state.turn_number == 1

	def test_state_serialization(self, game_state):
	"""Game state should serialize and deserialize correctly."""
	game_state.process_clue("TEST", 2)

	data = game_state.to_dict()
	restored = CodenamesGameState.from_dict(data)

	assert restored.current_team == game_state.current_team
	assert restored.current_phase == game_state.current_phase
	assert restored.turn_number == game_state.turn_number
	assert len(restored.clue_history) == len(game_state.clue_history)


	# ============================================================================
	# Agent Tests
	# ============================================================================

	class TestAgentBasics:
	"""Test basic agent functionality."""

	def test_create_agents(self):
	"""Should create all 4 agents."""
	agents = create_agents()
	assert len(agents) == 4
	assert "red_spymaster" in agents
	assert "red_operative" in agents
	assert "blue_spymaster" in agents
	assert "blue_operative" in agents

	def test_agent_roles(self):
	"""Agents should have correct roles."""
	agents = create_agents()

	assert agents["red_spymaster"].is_spymaster
	assert agents["red_spymaster"].team == "red"

	assert not agents["red_operative"].is_spymaster
	assert agents["red_operative"].team == "red"

	assert agents["blue_spymaster"].is_spymaster
	assert agents["blue_spymaster"].team == "blue"


	@SKIP_WITHOUT_GPU
	class TestAgentsWithLLM:
	"""Test agent functionality with API."""

	def test_spymaster_clue(self):
	"""Spymaster should generate valid clue."""
	board = generate_board(seed=42)
	game_state = CodenamesGameState(board=board)

	agent = CodenamesAgent("red_spymaster")
	action = agent.get_action(game_state)

	assert isinstance(action, ClueAction)
	assert action.clue_word
	assert action.clue_number >= 1

	def test_operative_guess(self):
	"""Operative should generate valid guess."""
	board = generate_board(seed=42)
	game_state = CodenamesGameState(board=board)
	game_state.process_clue("TEST", 2)

	agent = CodenamesAgent("red_operative")
	action = agent.get_action(game_state)

	assert isinstance(action, GuessAction)
	# Either a valid word guess or a pass
	if not action.pass_turn:
	unrevealed = set(game_state.board.words) - game_state.revealed_words
	assert action.guessed_word in unrevealed


	# ============================================================================
	# Plugin Integration Tests
	# ============================================================================

	@SKIP_WITHOUT_GPU
	class TestPluginIntegration:
	"""Test the plugin as a whole (requires API)."""

	def test_reset_and_generate_step(self):
	"""Should reset and generate steps."""
	plugin = CodenamesPlugin()
	plugin.reset(seed=42, config=CodenamesConfig(complexity_level=1))

	# First step should be red spymaster clue
	step0 = plugin.generate_step(seed=42, step_index=0)

	assert len(step0.turns) >= 1
	assert step0.turns[0].agent_id == "red_spymaster"
	assert "CLUE" in step0.turns[0].action_text

	def test_state_tracking(self):
	"""Plugin should track state correctly."""
	plugin = CodenamesPlugin()
	plugin.reset(seed=42)

	state = plugin.get_state()
	assert state.step_index == 0
	assert len(state.turns_so_far) == 0

	plugin.generate_step(seed=42, step_index=0)

	state = plugin.get_state()
	assert state.step_index == 1
	assert len(state.turns_so_far) >= 1

	def test_game_state_access(self):
	"""Should be able to access internal game state."""
	plugin = CodenamesPlugin()
	plugin.reset(seed=42)

	game_state = plugin.get_game_state()
	assert game_state is not None
	assert len(game_state.board.words) == 25

	def test_full_game_state(self):
	"""Should get full serialized game state."""
	plugin = CodenamesPlugin()
	plugin.reset(seed=42)

	full_state = plugin.get_full_game_state()
	assert "game_state" in full_state
	assert "plugin_state" in full_state

	def test_conversation_log_cleared_on_reset(self):
	"""Reset clears conversation_log (matching Cicero pattern)."""
	plugin = CodenamesPlugin()
	plugin.reset(seed=1, config=CodenamesConfig(complexity_level=1))
	plugin.generate_step(seed=1, step_index=0)
	plugin.reset(seed=99, config=CodenamesConfig(complexity_level=1))
	assert len(get_conversation_log(plugin.get_state())) == 0

	def test_conversation_log_in_step_state(self):
	"""Each step returns state with conversation_log; entries have speaker_id, message."""
	plugin = CodenamesPlugin()
	plugin.reset(seed=1, config=CodenamesConfig(complexity_level=1))
	step = plugin.generate_step(seed=1, step_index=0)
	assert step.state is not None
	assert isinstance(step.state.conversation_log, list)
	log = step.state.conversation_log
	for entry in log:
	assert "speaker_id" in entry and "message" in entry
	assert isinstance(entry["message"], str) and len(entry["message"]) > 0

	def test_conversation_log_accumulates(self):
	"""Conversation log accumulates across steps."""
	plugin = CodenamesPlugin()
	plugin.reset(seed=1, config=CodenamesConfig(complexity_level=1))

	plugin.generate_step(seed=1, step_index=0)
	log1 = get_conversation_log(plugin.get_state())
	assert len(log1) >= 1

	plugin.generate_step(seed=1, step_index=1)
	log2 = get_conversation_log(plugin.get_state())
	assert len(log2) >= 2


	# ============================================================================
	# Game Runner Tests
	# ============================================================================

	@SKIP_WITHOUT_GPU
	class TestGameRunner:
	"""Test game runner functionality (requires API)."""

	def test_run_game_produces_record(self):
	"""Running a game should produce a GameRecord."""
	runner = CodenamesGameRunner(max_steps=10)
	record = runner.run_game(seed=42, verbose=False)

	assert isinstance(record, GameRecord)
	assert record.game_id
	assert record.start_time
	assert len(record.steps) > 0

	def test_game_record_serialization(self):
	"""Game record should serialize to JSON."""
	runner = CodenamesGameRunner(max_steps=5)
	record = runner.run_game(seed=42, verbose=False)

	json_str = record.to_json()
	assert json_str
	assert "game_id" in json_str
	assert "steps" in json_str

	def test_game_record_save_load(self, tmp_path):
	"""Game record should save and load correctly."""
	runner = CodenamesGameRunner(max_steps=5)
	record = runner.run_game(seed=42, verbose=False)

	filepath = tmp_path / "test_game.json"
	record.save(filepath)

	loaded = GameRecord.load(filepath)
	assert loaded.game_id == record.game_id
	assert loaded.seed == record.seed
	assert len(loaded.steps) == len(record.steps)


	# ============================================================================
	# Full Integration Tests with API
	# ============================================================================

	@SKIP_WITHOUT_GPU
	class TestFullIntegration:
	"""Full integration tests that require LLM (local or Gemini)."""

	def test_board_generation_with_llm(self):
	"""Board generation should use LLM."""
	board = generate_board(seed=42, complexity_level=2)
	assert len(board.words) == 25
	assert board.interactions is not None

	def test_agent_with_llm(self):
	"""Agent should use LLM for decisions."""
	board = generate_board(seed=42)
	game_state = CodenamesGameState(board=board)

	agent = CodenamesAgent("red_spymaster")
	action = agent.get_action(game_state)

	assert isinstance(action, ClueAction)
	# LLM should provide reasoning
	assert action.reasoning

	def test_full_game_with_llm(self):
	"""Should be able to run a full game with LLM."""
	runner = CodenamesGameRunner(max_steps=50)
	record = runner.run_game(
	config=CodenamesConfig(complexity_level=2),
	seed=42,
	verbose=False
	)

	assert len(record.steps) > 0
	# Game should have some meaningful interactions
	clue_steps = [s for s in record.steps if "CLUE" in s.action_text]
	guess_steps = [s for s in record.steps if "GUESS" in s.action_text]

	assert len(clue_steps) > 0
	assert len(guess_steps) > 0


	if __name__ == "__main__":
	pytest.main([__file__, "-v"])