New-space-openenv / plugins /tests /test_codenames_plugin.py
Mooizz's picture
Upload folder using huggingface_hub
1070765 verified
"""Tests for the Codenames multi-agent plugin.
Tests cover:
- Plugin registration and basic methods
- Configuration validation
- Game state transitions (using mock board)
- Word interactions serialization
- Conversation log (matching Cicero pattern)
- Integration tests
Uses shared local Qwen3 8B game-play model from avalon/llm.py.
Tests marked with SKIP_WITHOUT_GPU require a GPU for the local model.
"""
from __future__ import annotations
import pytest
from watchdog_env.plugins.codenames.codenames_config import CodenamesConfig, CODENAMES_AGENTS
from watchdog_env.plugins.codenames.codenames_plugin import CodenamesPlugin
from watchdog_env.plugins.codenames.board_generator import (
generate_board,
BoardAssignment,
BoardGenerationError,
)
from watchdog_env.plugins.codenames.game_state import CodenamesGameState, ClueRecord, GuessRecord
from watchdog_env.plugins.codenames.word_interactions import (
WordInteractions,
WordRelation,
ThematicCluster,
)
from watchdog_env.plugins.codenames.agents import (
CodenamesAgent,
create_agents,
ClueAction,
GuessAction,
AgentActionError,
)
from watchdog_env.plugins.codenames.game_runner import (
CodenamesGameRunner,
GameRecord,
)
from watchdog_env.plugins.base import get_conversation_log
from watchdog_env.plugins.registry import get_plugin, list_game_ids
def _has_gpu():
"""Check if GPU is available for local model."""
try:
import torch
return torch.cuda.is_available()
except ImportError:
return False
# Skip tests that require GPU for local model
SKIP_WITHOUT_GPU = pytest.mark.skipif(
not _has_gpu(),
reason="GPU not available for local model"
)
def _create_mock_board() -> BoardAssignment:
"""Create a mock board for testing without API."""
words = [
"APPLE", "BANK", "CARD", "DOOR", "ENGINE",
"FIRE", "GHOST", "HOTEL", "ICE", "JAZZ",
"KING", "LAMP", "MOON", "NIGHT", "OPERA",
"PIANO", "QUEEN", "RIVER", "STAR", "TREE",
"UMBRELLA", "VIOLIN", "WATER", "YARD", "ZEBRA",
]
assignments = {}
# Assassin
assignments["ZEBRA"] = "assassin"
# Red team (9)
for w in words[0:9]:
assignments[w] = "red"
# Blue team (8)
for w in words[9:17]:
assignments[w] = "blue"
# Neutral (7)
for w in words[17:24]:
assignments[w] = "neutral"
interactions = WordInteractions(words=words)
grid = [words[i*5:(i+1)*5] for i in range(5)]
return BoardAssignment(
words=words,
assignments=assignments,
interactions=interactions,
grid=grid,
)
# ============================================================================
# Plugin Registration Tests
# ============================================================================
class TestPluginRegistration:
"""Test plugin registration and basic interface."""
def test_plugin_registered(self):
"""Codenames plugin should be registered."""
assert "codenames" in list_game_ids()
def test_get_plugin(self):
"""Should be able to get Codenames plugin by ID."""
plugin = get_plugin("codenames")
assert plugin is not None
assert plugin.get_game_id() == "codenames"
def test_game_id(self):
"""Plugin should return correct game ID."""
plugin = CodenamesPlugin()
assert plugin.get_game_id() == "codenames"
def test_display_name(self):
"""Plugin should return display name."""
plugin = CodenamesPlugin()
assert "Codenames" in plugin.get_display_name()
def test_list_agent_ids(self):
"""Plugin should list all 4 agent IDs."""
plugin = CodenamesPlugin()
agents = plugin.list_agent_ids()
assert len(agents) == 4
assert "red_spymaster" in agents
assert "red_operative" in agents
assert "blue_spymaster" in agents
assert "blue_operative" in agents
# ============================================================================
# Configuration Tests
# ============================================================================
class TestCodenamesConfig:
"""Test game configuration."""
def test_default_config(self):
"""Default config should have valid values."""
config = CodenamesConfig()
assert config.board_size == 25
assert config.red_words == 9
assert config.blue_words == 8
assert config.neutral_words == 7
assert config.assassin_words == 1
assert config.starting_team == "red"
assert config.complexity_level == 2
def test_validate_correct_total(self):
"""Config with correct word totals should validate."""
config = CodenamesConfig()
config.validate() # Should not raise
def test_validate_incorrect_total(self):
"""Config with incorrect word totals should fail validation."""
config = CodenamesConfig(red_words=10) # Total = 26 != 25
with pytest.raises(ValueError, match="must equal board_size"):
config.validate()
def test_validate_invalid_team(self):
"""Config with invalid starting team should fail validation."""
config = CodenamesConfig(starting_team="green") # type: ignore
with pytest.raises(ValueError, match="must be 'red' or 'blue'"):
config.validate()
def test_validate_invalid_complexity(self):
"""Config with invalid complexity should fail validation."""
config = CodenamesConfig(complexity_level=5)
with pytest.raises(ValueError, match="must be 1, 2, or 3"):
config.validate()
# ============================================================================
# Board Generation Tests
# ============================================================================
@SKIP_WITHOUT_GPU
class TestBoardGeneration:
"""Test board generation functionality (requires API)."""
def test_generate_board_basic(self):
"""Should generate board with basic complexity."""
board = generate_board(seed=42, complexity_level=1)
assert len(board.words) == 25
assert len(board.assignments) == 25
assert len(board.grid) == 5
assert all(len(row) == 5 for row in board.grid)
def test_generate_board_medium(self):
"""Should generate board with medium complexity."""
board = generate_board(seed=42, complexity_level=2)
assert len(board.words) == 25
assert board.interactions is not None
def test_generate_board_complex(self):
"""Should generate board with complex complexity."""
board = generate_board(seed=42, complexity_level=3)
assert len(board.words) == 25
assert board.interactions is not None
def test_board_assignments_correct_counts(self):
"""Board should have correct number of each word type."""
board = generate_board(seed=42)
red_count = sum(1 for t in board.assignments.values() if t == "red")
blue_count = sum(1 for t in board.assignments.values() if t == "blue")
neutral_count = sum(1 for t in board.assignments.values() if t == "neutral")
assassin_count = sum(1 for t in board.assignments.values() if t == "assassin")
assert red_count == 9
assert blue_count == 8
assert neutral_count == 7
assert assassin_count == 1
# ============================================================================
# Word Interactions Tests
# ============================================================================
class TestWordInteractions:
"""Test word interaction functionality."""
def test_word_interactions_serialization(self):
"""Word interactions should serialize and deserialize correctly."""
original = WordInteractions(
words=["BANK", "RIVER", "CURRENT"],
relations={
"BANK": WordRelation("BANK", ["RIVER"], "polyseme", ["finance", "nature"]),
},
clusters=[ThematicCluster("water", ["BANK", "RIVER", "CURRENT"])],
polysemes=["BANK", "CURRENT"],
false_friends=[("BANK", "SUIT")],
assassin_traps=["CURRENT"],
)
data = original.to_dict()
restored = WordInteractions.from_dict(data)
assert restored.words == original.words
assert restored.polysemes == original.polysemes
assert len(restored.clusters) == len(original.clusters)
assert len(restored.relations) == len(original.relations)
def test_get_related_words(self):
"""Should get related words for a word."""
interactions = WordInteractions(
words=["BANK", "RIVER"],
relations={
"BANK": WordRelation("BANK", ["RIVER", "CURRENT"], "polyseme"),
},
)
related = interactions.get_related_words("BANK")
assert "RIVER" in related
assert "CURRENT" in related
def test_get_trap_level(self):
"""Should get trap level for a word."""
interactions = WordInteractions(
words=["BANK"],
relations={
"BANK": WordRelation("BANK", [], "polyseme", [], trap_level=2),
},
)
assert interactions.get_trap_level("BANK") == 2
assert interactions.get_trap_level("UNKNOWN") == 0
# ============================================================================
# Game State Tests (using mock board)
# ============================================================================
class TestGameState:
"""Test game state management using mock board."""
@pytest.fixture
def game_state(self):
"""Create a game state for testing."""
board = _create_mock_board()
return CodenamesGameState(board=board)
def test_initial_state(self, game_state):
"""Initial state should be correct."""
assert game_state.current_team == "red"
assert game_state.current_phase == "clue"
assert game_state.turn_number == 0
assert not game_state.game_over
assert game_state.winner is None
def test_get_remaining_words(self, game_state):
"""Should get correct remaining words for each team."""
red_words = game_state.get_remaining_words("red")
blue_words = game_state.get_remaining_words("blue")
assert len(red_words) == 9
assert len(blue_words) == 8
def test_process_clue(self, game_state):
"""Processing clue should update state correctly."""
game_state.process_clue("ANIMAL", 3, "Three animal words")
assert game_state.current_phase == "guess"
assert game_state.current_clue is not None
assert game_state.current_clue.clue_word == "ANIMAL"
assert game_state.current_clue.clue_number == 3
assert len(game_state.clue_history) == 1
def test_process_correct_guess(self, game_state):
"""Correct guess should allow continued guessing."""
game_state.process_clue("TEST", 2)
# Get a red word
red_words = game_state.get_remaining_words("red")
word = red_words[0]
can_continue, message = game_state.process_guess(word)
assert can_continue
assert word in game_state.revealed_words
assert len(game_state.get_remaining_words("red")) == 8
def test_process_wrong_guess_ends_turn(self, game_state):
"""Wrong guess should end turn."""
game_state.process_clue("TEST", 2)
# Get a blue word (wrong for red team)
blue_words = game_state.get_remaining_words("blue")
word = blue_words[0]
can_continue, message = game_state.process_guess(word)
assert not can_continue
assert word in game_state.revealed_words
def test_assassin_ends_game(self, game_state):
"""Hitting assassin should end game."""
game_state.process_clue("TEST", 1)
# Get assassin word
assassin_word = [w for w, t in game_state.board.assignments.items() if t == "assassin"][0]
can_continue, message = game_state.process_guess(assassin_word)
assert not can_continue
assert game_state.game_over
assert game_state.winner == "blue" # Opponent wins
assert game_state.game_over_reason == "assassin"
def test_all_words_found_wins(self, game_state):
"""Finding all team words should win the game."""
game_state.process_clue("TEST", 10) # High number for testing
red_words = list(game_state.get_remaining_words("red"))
for word in red_words:
can_continue, message = game_state.process_guess(word)
if game_state.game_over:
break
assert game_state.game_over
assert game_state.winner == "red"
assert game_state.game_over_reason == "all_words"
def test_end_turn_switches_team(self, game_state):
"""Ending turn should switch to other team."""
assert game_state.current_team == "red"
game_state.process_clue("TEST", 1)
game_state.end_turn()
assert game_state.current_team == "blue"
assert game_state.current_phase == "clue"
assert game_state.turn_number == 1
def test_state_serialization(self, game_state):
"""Game state should serialize and deserialize correctly."""
game_state.process_clue("TEST", 2)
data = game_state.to_dict()
restored = CodenamesGameState.from_dict(data)
assert restored.current_team == game_state.current_team
assert restored.current_phase == game_state.current_phase
assert restored.turn_number == game_state.turn_number
assert len(restored.clue_history) == len(game_state.clue_history)
# ============================================================================
# Agent Tests
# ============================================================================
class TestAgentBasics:
"""Test basic agent functionality."""
def test_create_agents(self):
"""Should create all 4 agents."""
agents = create_agents()
assert len(agents) == 4
assert "red_spymaster" in agents
assert "red_operative" in agents
assert "blue_spymaster" in agents
assert "blue_operative" in agents
def test_agent_roles(self):
"""Agents should have correct roles."""
agents = create_agents()
assert agents["red_spymaster"].is_spymaster
assert agents["red_spymaster"].team == "red"
assert not agents["red_operative"].is_spymaster
assert agents["red_operative"].team == "red"
assert agents["blue_spymaster"].is_spymaster
assert agents["blue_spymaster"].team == "blue"
@SKIP_WITHOUT_GPU
class TestAgentsWithLLM:
"""Test agent functionality with API."""
def test_spymaster_clue(self):
"""Spymaster should generate valid clue."""
board = generate_board(seed=42)
game_state = CodenamesGameState(board=board)
agent = CodenamesAgent("red_spymaster")
action = agent.get_action(game_state)
assert isinstance(action, ClueAction)
assert action.clue_word
assert action.clue_number >= 1
def test_operative_guess(self):
"""Operative should generate valid guess."""
board = generate_board(seed=42)
game_state = CodenamesGameState(board=board)
game_state.process_clue("TEST", 2)
agent = CodenamesAgent("red_operative")
action = agent.get_action(game_state)
assert isinstance(action, GuessAction)
# Either a valid word guess or a pass
if not action.pass_turn:
unrevealed = set(game_state.board.words) - game_state.revealed_words
assert action.guessed_word in unrevealed
# ============================================================================
# Plugin Integration Tests
# ============================================================================
@SKIP_WITHOUT_GPU
class TestPluginIntegration:
"""Test the plugin as a whole (requires API)."""
def test_reset_and_generate_step(self):
"""Should reset and generate steps."""
plugin = CodenamesPlugin()
plugin.reset(seed=42, config=CodenamesConfig(complexity_level=1))
# First step should be red spymaster clue
step0 = plugin.generate_step(seed=42, step_index=0)
assert len(step0.turns) >= 1
assert step0.turns[0].agent_id == "red_spymaster"
assert "CLUE" in step0.turns[0].action_text
def test_state_tracking(self):
"""Plugin should track state correctly."""
plugin = CodenamesPlugin()
plugin.reset(seed=42)
state = plugin.get_state()
assert state.step_index == 0
assert len(state.turns_so_far) == 0
plugin.generate_step(seed=42, step_index=0)
state = plugin.get_state()
assert state.step_index == 1
assert len(state.turns_so_far) >= 1
def test_game_state_access(self):
"""Should be able to access internal game state."""
plugin = CodenamesPlugin()
plugin.reset(seed=42)
game_state = plugin.get_game_state()
assert game_state is not None
assert len(game_state.board.words) == 25
def test_full_game_state(self):
"""Should get full serialized game state."""
plugin = CodenamesPlugin()
plugin.reset(seed=42)
full_state = plugin.get_full_game_state()
assert "game_state" in full_state
assert "plugin_state" in full_state
def test_conversation_log_cleared_on_reset(self):
"""Reset clears conversation_log (matching Cicero pattern)."""
plugin = CodenamesPlugin()
plugin.reset(seed=1, config=CodenamesConfig(complexity_level=1))
plugin.generate_step(seed=1, step_index=0)
plugin.reset(seed=99, config=CodenamesConfig(complexity_level=1))
assert len(get_conversation_log(plugin.get_state())) == 0
def test_conversation_log_in_step_state(self):
"""Each step returns state with conversation_log; entries have speaker_id, message."""
plugin = CodenamesPlugin()
plugin.reset(seed=1, config=CodenamesConfig(complexity_level=1))
step = plugin.generate_step(seed=1, step_index=0)
assert step.state is not None
assert isinstance(step.state.conversation_log, list)
log = step.state.conversation_log
for entry in log:
assert "speaker_id" in entry and "message" in entry
assert isinstance(entry["message"], str) and len(entry["message"]) > 0
def test_conversation_log_accumulates(self):
"""Conversation log accumulates across steps."""
plugin = CodenamesPlugin()
plugin.reset(seed=1, config=CodenamesConfig(complexity_level=1))
plugin.generate_step(seed=1, step_index=0)
log1 = get_conversation_log(plugin.get_state())
assert len(log1) >= 1
plugin.generate_step(seed=1, step_index=1)
log2 = get_conversation_log(plugin.get_state())
assert len(log2) >= 2
# ============================================================================
# Game Runner Tests
# ============================================================================
@SKIP_WITHOUT_GPU
class TestGameRunner:
"""Test game runner functionality (requires API)."""
def test_run_game_produces_record(self):
"""Running a game should produce a GameRecord."""
runner = CodenamesGameRunner(max_steps=10)
record = runner.run_game(seed=42, verbose=False)
assert isinstance(record, GameRecord)
assert record.game_id
assert record.start_time
assert len(record.steps) > 0
def test_game_record_serialization(self):
"""Game record should serialize to JSON."""
runner = CodenamesGameRunner(max_steps=5)
record = runner.run_game(seed=42, verbose=False)
json_str = record.to_json()
assert json_str
assert "game_id" in json_str
assert "steps" in json_str
def test_game_record_save_load(self, tmp_path):
"""Game record should save and load correctly."""
runner = CodenamesGameRunner(max_steps=5)
record = runner.run_game(seed=42, verbose=False)
filepath = tmp_path / "test_game.json"
record.save(filepath)
loaded = GameRecord.load(filepath)
assert loaded.game_id == record.game_id
assert loaded.seed == record.seed
assert len(loaded.steps) == len(record.steps)
# ============================================================================
# Full Integration Tests with API
# ============================================================================
@SKIP_WITHOUT_GPU
class TestFullIntegration:
"""Full integration tests that require LLM (local or Gemini)."""
def test_board_generation_with_llm(self):
"""Board generation should use LLM."""
board = generate_board(seed=42, complexity_level=2)
assert len(board.words) == 25
assert board.interactions is not None
def test_agent_with_llm(self):
"""Agent should use LLM for decisions."""
board = generate_board(seed=42)
game_state = CodenamesGameState(board=board)
agent = CodenamesAgent("red_spymaster")
action = agent.get_action(game_state)
assert isinstance(action, ClueAction)
# LLM should provide reasoning
assert action.reasoning
def test_full_game_with_llm(self):
"""Should be able to run a full game with LLM."""
runner = CodenamesGameRunner(max_steps=50)
record = runner.run_game(
config=CodenamesConfig(complexity_level=2),
seed=42,
verbose=False
)
assert len(record.steps) > 0
# Game should have some meaningful interactions
clue_steps = [s for s in record.steps if "CLUE" in s.action_text]
guess_steps = [s for s in record.steps if "GUESS" in s.action_text]
assert len(clue_steps) > 0
assert len(guess_steps) > 0
if __name__ == "__main__":
pytest.main([__file__, "-v"])