Spaces:

Mooizz
/

New-space-openenv

Sleeping

File size: 22,781 Bytes
"""Tests for the Codenames multi-agent plugin.

Tests cover:
- Plugin registration and basic methods
- Configuration validation
- Game state transitions (using mock board)
- Word interactions serialization
- Conversation log (matching Cicero pattern)
- Integration tests

Uses shared local Qwen3 8B game-play model from avalon/llm.py.
Tests marked with SKIP_WITHOUT_GPU require a GPU for the local model.
"""

from __future__ import annotations

import pytest
from watchdog_env.plugins.codenames.codenames_config import CodenamesConfig, CODENAMES_AGENTS
from watchdog_env.plugins.codenames.codenames_plugin import CodenamesPlugin
from watchdog_env.plugins.codenames.board_generator import (
    generate_board,
    BoardAssignment,
    BoardGenerationError,
)
from watchdog_env.plugins.codenames.game_state import CodenamesGameState, ClueRecord, GuessRecord
from watchdog_env.plugins.codenames.word_interactions import (
    WordInteractions,
    WordRelation,
    ThematicCluster,
)
from watchdog_env.plugins.codenames.agents import (
    CodenamesAgent,
    create_agents,
    ClueAction,
    GuessAction,
    AgentActionError,
)
from watchdog_env.plugins.codenames.game_runner import (
    CodenamesGameRunner,
    GameRecord,
)
from watchdog_env.plugins.base import get_conversation_log
from watchdog_env.plugins.registry import get_plugin, list_game_ids


def _has_gpu():
    """Check if GPU is available for local model."""
    try:
        import torch
        return torch.cuda.is_available()
    except ImportError:
        return False


# Skip tests that require GPU for local model
SKIP_WITHOUT_GPU = pytest.mark.skipif(
    not _has_gpu(),
    reason="GPU not available for local model"
)


def _create_mock_board() -> BoardAssignment:
    """Create a mock board for testing without API."""
    words = [
        "APPLE", "BANK", "CARD", "DOOR", "ENGINE",
        "FIRE", "GHOST", "HOTEL", "ICE", "JAZZ",
        "KING", "LAMP", "MOON", "NIGHT", "OPERA",
        "PIANO", "QUEEN", "RIVER", "STAR", "TREE",
        "UMBRELLA", "VIOLIN", "WATER", "YARD", "ZEBRA",
    ]
    
    assignments = {}
    # Assassin
    assignments["ZEBRA"] = "assassin"
    # Red team (9)
    for w in words[0:9]:
        assignments[w] = "red"
    # Blue team (8)
    for w in words[9:17]:
        assignments[w] = "blue"
    # Neutral (7)
    for w in words[17:24]:
        assignments[w] = "neutral"
    
    interactions = WordInteractions(words=words)
    
    grid = [words[i*5:(i+1)*5] for i in range(5)]
    
    return BoardAssignment(
        words=words,
        assignments=assignments,
        interactions=interactions,
        grid=grid,
    )


# ============================================================================
# Plugin Registration Tests
# ============================================================================

class TestPluginRegistration:
    """Test plugin registration and basic interface."""
    
    def test_plugin_registered(self):
        """Codenames plugin should be registered."""
        assert "codenames" in list_game_ids()
    
    def test_get_plugin(self):
        """Should be able to get Codenames plugin by ID."""
        plugin = get_plugin("codenames")
        assert plugin is not None
        assert plugin.get_game_id() == "codenames"
    
    def test_game_id(self):
        """Plugin should return correct game ID."""
        plugin = CodenamesPlugin()
        assert plugin.get_game_id() == "codenames"
    
    def test_display_name(self):
        """Plugin should return display name."""
        plugin = CodenamesPlugin()
        assert "Codenames" in plugin.get_display_name()
    
    def test_list_agent_ids(self):
        """Plugin should list all 4 agent IDs."""
        plugin = CodenamesPlugin()
        agents = plugin.list_agent_ids()
        assert len(agents) == 4
        assert "red_spymaster" in agents
        assert "red_operative" in agents
        assert "blue_spymaster" in agents
        assert "blue_operative" in agents


# ============================================================================
# Configuration Tests
# ============================================================================

class TestCodenamesConfig:
    """Test game configuration."""
    
    def test_default_config(self):
        """Default config should have valid values."""
        config = CodenamesConfig()
        assert config.board_size == 25
        assert config.red_words == 9
        assert config.blue_words == 8
        assert config.neutral_words == 7
        assert config.assassin_words == 1
        assert config.starting_team == "red"
        assert config.complexity_level == 2
    
    def test_validate_correct_total(self):
        """Config with correct word totals should validate."""
        config = CodenamesConfig()
        config.validate()  # Should not raise
    
    def test_validate_incorrect_total(self):
        """Config with incorrect word totals should fail validation."""
        config = CodenamesConfig(red_words=10)  # Total = 26 != 25
        with pytest.raises(ValueError, match="must equal board_size"):
            config.validate()
    
    def test_validate_invalid_team(self):
        """Config with invalid starting team should fail validation."""
        config = CodenamesConfig(starting_team="green")  # type: ignore
        with pytest.raises(ValueError, match="must be 'red' or 'blue'"):
            config.validate()
    
    def test_validate_invalid_complexity(self):
        """Config with invalid complexity should fail validation."""
        config = CodenamesConfig(complexity_level=5)
        with pytest.raises(ValueError, match="must be 1, 2, or 3"):
            config.validate()


# ============================================================================
# Board Generation Tests
# ============================================================================

@SKIP_WITHOUT_GPU
class TestBoardGeneration:
    """Test board generation functionality (requires API)."""
    
    def test_generate_board_basic(self):
        """Should generate board with basic complexity."""
        board = generate_board(seed=42, complexity_level=1)
        assert len(board.words) == 25
        assert len(board.assignments) == 25
        assert len(board.grid) == 5
        assert all(len(row) == 5 for row in board.grid)
    
    def test_generate_board_medium(self):
        """Should generate board with medium complexity."""
        board = generate_board(seed=42, complexity_level=2)
        assert len(board.words) == 25
        assert board.interactions is not None
    
    def test_generate_board_complex(self):
        """Should generate board with complex complexity."""
        board = generate_board(seed=42, complexity_level=3)
        assert len(board.words) == 25
        assert board.interactions is not None
    
    def test_board_assignments_correct_counts(self):
        """Board should have correct number of each word type."""
        board = generate_board(seed=42)
        
        red_count = sum(1 for t in board.assignments.values() if t == "red")
        blue_count = sum(1 for t in board.assignments.values() if t == "blue")
        neutral_count = sum(1 for t in board.assignments.values() if t == "neutral")
        assassin_count = sum(1 for t in board.assignments.values() if t == "assassin")
        
        assert red_count == 9
        assert blue_count == 8
        assert neutral_count == 7
        assert assassin_count == 1


# ============================================================================
# Word Interactions Tests
# ============================================================================

class TestWordInteractions:
    """Test word interaction functionality."""
    
    def test_word_interactions_serialization(self):
        """Word interactions should serialize and deserialize correctly."""
        original = WordInteractions(
            words=["BANK", "RIVER", "CURRENT"],
            relations={
                "BANK": WordRelation("BANK", ["RIVER"], "polyseme", ["finance", "nature"]),
            },
            clusters=[ThematicCluster("water", ["BANK", "RIVER", "CURRENT"])],
            polysemes=["BANK", "CURRENT"],
            false_friends=[("BANK", "SUIT")],
            assassin_traps=["CURRENT"],
        )
        
        data = original.to_dict()
        restored = WordInteractions.from_dict(data)
        
        assert restored.words == original.words
        assert restored.polysemes == original.polysemes
        assert len(restored.clusters) == len(original.clusters)
        assert len(restored.relations) == len(original.relations)
    
    def test_get_related_words(self):
        """Should get related words for a word."""
        interactions = WordInteractions(
            words=["BANK", "RIVER"],
            relations={
                "BANK": WordRelation("BANK", ["RIVER", "CURRENT"], "polyseme"),
            },
        )
        
        related = interactions.get_related_words("BANK")
        assert "RIVER" in related
        assert "CURRENT" in related
    
    def test_get_trap_level(self):
        """Should get trap level for a word."""
        interactions = WordInteractions(
            words=["BANK"],
            relations={
                "BANK": WordRelation("BANK", [], "polyseme", [], trap_level=2),
            },
        )
        
        assert interactions.get_trap_level("BANK") == 2
        assert interactions.get_trap_level("UNKNOWN") == 0


# ============================================================================
# Game State Tests (using mock board)
# ============================================================================

class TestGameState:
    """Test game state management using mock board."""
    
    @pytest.fixture
    def game_state(self):
        """Create a game state for testing."""
        board = _create_mock_board()
        return CodenamesGameState(board=board)
    
    def test_initial_state(self, game_state):
        """Initial state should be correct."""
        assert game_state.current_team == "red"
        assert game_state.current_phase == "clue"
        assert game_state.turn_number == 0
        assert not game_state.game_over
        assert game_state.winner is None
    
    def test_get_remaining_words(self, game_state):
        """Should get correct remaining words for each team."""
        red_words = game_state.get_remaining_words("red")
        blue_words = game_state.get_remaining_words("blue")
        
        assert len(red_words) == 9
        assert len(blue_words) == 8
    
    def test_process_clue(self, game_state):
        """Processing clue should update state correctly."""
        game_state.process_clue("ANIMAL", 3, "Three animal words")
        
        assert game_state.current_phase == "guess"
        assert game_state.current_clue is not None
        assert game_state.current_clue.clue_word == "ANIMAL"
        assert game_state.current_clue.clue_number == 3
        assert len(game_state.clue_history) == 1
    
    def test_process_correct_guess(self, game_state):
        """Correct guess should allow continued guessing."""
        game_state.process_clue("TEST", 2)
        
        # Get a red word
        red_words = game_state.get_remaining_words("red")
        word = red_words[0]
        
        can_continue, message = game_state.process_guess(word)
        
        assert can_continue
        assert word in game_state.revealed_words
        assert len(game_state.get_remaining_words("red")) == 8
    
    def test_process_wrong_guess_ends_turn(self, game_state):
        """Wrong guess should end turn."""
        game_state.process_clue("TEST", 2)
        
        # Get a blue word (wrong for red team)
        blue_words = game_state.get_remaining_words("blue")
        word = blue_words[0]
        
        can_continue, message = game_state.process_guess(word)
        
        assert not can_continue
        assert word in game_state.revealed_words
    
    def test_assassin_ends_game(self, game_state):
        """Hitting assassin should end game."""
        game_state.process_clue("TEST", 1)
        
        # Get assassin word
        assassin_word = [w for w, t in game_state.board.assignments.items() if t == "assassin"][0]
        
        can_continue, message = game_state.process_guess(assassin_word)
        
        assert not can_continue
        assert game_state.game_over
        assert game_state.winner == "blue"  # Opponent wins
        assert game_state.game_over_reason == "assassin"
    
    def test_all_words_found_wins(self, game_state):
        """Finding all team words should win the game."""
        game_state.process_clue("TEST", 10)  # High number for testing
        
        red_words = list(game_state.get_remaining_words("red"))
        for word in red_words:
            can_continue, message = game_state.process_guess(word)
            if game_state.game_over:
                break
        
        assert game_state.game_over
        assert game_state.winner == "red"
        assert game_state.game_over_reason == "all_words"
    
    def test_end_turn_switches_team(self, game_state):
        """Ending turn should switch to other team."""
        assert game_state.current_team == "red"
        
        game_state.process_clue("TEST", 1)
        game_state.end_turn()
        
        assert game_state.current_team == "blue"
        assert game_state.current_phase == "clue"
        assert game_state.turn_number == 1
    
    def test_state_serialization(self, game_state):
        """Game state should serialize and deserialize correctly."""
        game_state.process_clue("TEST", 2)
        
        data = game_state.to_dict()
        restored = CodenamesGameState.from_dict(data)
        
        assert restored.current_team == game_state.current_team
        assert restored.current_phase == game_state.current_phase
        assert restored.turn_number == game_state.turn_number
        assert len(restored.clue_history) == len(game_state.clue_history)


# ============================================================================
# Agent Tests
# ============================================================================

class TestAgentBasics:
    """Test basic agent functionality."""
    
    def test_create_agents(self):
        """Should create all 4 agents."""
        agents = create_agents()
        assert len(agents) == 4
        assert "red_spymaster" in agents
        assert "red_operative" in agents
        assert "blue_spymaster" in agents
        assert "blue_operative" in agents
    
    def test_agent_roles(self):
        """Agents should have correct roles."""
        agents = create_agents()
        
        assert agents["red_spymaster"].is_spymaster
        assert agents["red_spymaster"].team == "red"
        
        assert not agents["red_operative"].is_spymaster
        assert agents["red_operative"].team == "red"
        
        assert agents["blue_spymaster"].is_spymaster
        assert agents["blue_spymaster"].team == "blue"


@SKIP_WITHOUT_GPU
class TestAgentsWithLLM:
    """Test agent functionality with API."""
    
    def test_spymaster_clue(self):
        """Spymaster should generate valid clue."""
        board = generate_board(seed=42)
        game_state = CodenamesGameState(board=board)
        
        agent = CodenamesAgent("red_spymaster")
        action = agent.get_action(game_state)
        
        assert isinstance(action, ClueAction)
        assert action.clue_word
        assert action.clue_number >= 1
    
    def test_operative_guess(self):
        """Operative should generate valid guess."""
        board = generate_board(seed=42)
        game_state = CodenamesGameState(board=board)
        game_state.process_clue("TEST", 2)
        
        agent = CodenamesAgent("red_operative")
        action = agent.get_action(game_state)
        
        assert isinstance(action, GuessAction)
        # Either a valid word guess or a pass
        if not action.pass_turn:
            unrevealed = set(game_state.board.words) - game_state.revealed_words
            assert action.guessed_word in unrevealed


# ============================================================================
# Plugin Integration Tests
# ============================================================================

@SKIP_WITHOUT_GPU
class TestPluginIntegration:
    """Test the plugin as a whole (requires API)."""
    
    def test_reset_and_generate_step(self):
        """Should reset and generate steps."""
        plugin = CodenamesPlugin()
        plugin.reset(seed=42, config=CodenamesConfig(complexity_level=1))
        
        # First step should be red spymaster clue
        step0 = plugin.generate_step(seed=42, step_index=0)
        
        assert len(step0.turns) >= 1
        assert step0.turns[0].agent_id == "red_spymaster"
        assert "CLUE" in step0.turns[0].action_text
    
    def test_state_tracking(self):
        """Plugin should track state correctly."""
        plugin = CodenamesPlugin()
        plugin.reset(seed=42)
        
        state = plugin.get_state()
        assert state.step_index == 0
        assert len(state.turns_so_far) == 0
        
        plugin.generate_step(seed=42, step_index=0)
        
        state = plugin.get_state()
        assert state.step_index == 1
        assert len(state.turns_so_far) >= 1
    
    def test_game_state_access(self):
        """Should be able to access internal game state."""
        plugin = CodenamesPlugin()
        plugin.reset(seed=42)
        
        game_state = plugin.get_game_state()
        assert game_state is not None
        assert len(game_state.board.words) == 25
    
    def test_full_game_state(self):
        """Should get full serialized game state."""
        plugin = CodenamesPlugin()
        plugin.reset(seed=42)
        
        full_state = plugin.get_full_game_state()
        assert "game_state" in full_state
        assert "plugin_state" in full_state
    
    def test_conversation_log_cleared_on_reset(self):
        """Reset clears conversation_log (matching Cicero pattern)."""
        plugin = CodenamesPlugin()
        plugin.reset(seed=1, config=CodenamesConfig(complexity_level=1))
        plugin.generate_step(seed=1, step_index=0)
        plugin.reset(seed=99, config=CodenamesConfig(complexity_level=1))
        assert len(get_conversation_log(plugin.get_state())) == 0
    
    def test_conversation_log_in_step_state(self):
        """Each step returns state with conversation_log; entries have speaker_id, message."""
        plugin = CodenamesPlugin()
        plugin.reset(seed=1, config=CodenamesConfig(complexity_level=1))
        step = plugin.generate_step(seed=1, step_index=0)
        assert step.state is not None
        assert isinstance(step.state.conversation_log, list)
        log = step.state.conversation_log
        for entry in log:
            assert "speaker_id" in entry and "message" in entry
            assert isinstance(entry["message"], str) and len(entry["message"]) > 0
    
    def test_conversation_log_accumulates(self):
        """Conversation log accumulates across steps."""
        plugin = CodenamesPlugin()
        plugin.reset(seed=1, config=CodenamesConfig(complexity_level=1))
        
        plugin.generate_step(seed=1, step_index=0)
        log1 = get_conversation_log(plugin.get_state())
        assert len(log1) >= 1
        
        plugin.generate_step(seed=1, step_index=1)
        log2 = get_conversation_log(plugin.get_state())
        assert len(log2) >= 2


# ============================================================================
# Game Runner Tests
# ============================================================================

@SKIP_WITHOUT_GPU
class TestGameRunner:
    """Test game runner functionality (requires API)."""
    
    def test_run_game_produces_record(self):
        """Running a game should produce a GameRecord."""
        runner = CodenamesGameRunner(max_steps=10)
        record = runner.run_game(seed=42, verbose=False)
        
        assert isinstance(record, GameRecord)
        assert record.game_id
        assert record.start_time
        assert len(record.steps) > 0
    
    def test_game_record_serialization(self):
        """Game record should serialize to JSON."""
        runner = CodenamesGameRunner(max_steps=5)
        record = runner.run_game(seed=42, verbose=False)
        
        json_str = record.to_json()
        assert json_str
        assert "game_id" in json_str
        assert "steps" in json_str
    
    def test_game_record_save_load(self, tmp_path):
        """Game record should save and load correctly."""
        runner = CodenamesGameRunner(max_steps=5)
        record = runner.run_game(seed=42, verbose=False)
        
        filepath = tmp_path / "test_game.json"
        record.save(filepath)
        
        loaded = GameRecord.load(filepath)
        assert loaded.game_id == record.game_id
        assert loaded.seed == record.seed
        assert len(loaded.steps) == len(record.steps)


# ============================================================================
# Full Integration Tests with API
# ============================================================================

@SKIP_WITHOUT_GPU
class TestFullIntegration:
    """Full integration tests that require LLM (local or Gemini)."""
    
    def test_board_generation_with_llm(self):
        """Board generation should use LLM."""
        board = generate_board(seed=42, complexity_level=2)
        assert len(board.words) == 25
        assert board.interactions is not None
    
    def test_agent_with_llm(self):
        """Agent should use LLM for decisions."""
        board = generate_board(seed=42)
        game_state = CodenamesGameState(board=board)
        
        agent = CodenamesAgent("red_spymaster")
        action = agent.get_action(game_state)
        
        assert isinstance(action, ClueAction)
        # LLM should provide reasoning
        assert action.reasoning
    
    def test_full_game_with_llm(self):
        """Should be able to run a full game with LLM."""
        runner = CodenamesGameRunner(max_steps=50)
        record = runner.run_game(
            config=CodenamesConfig(complexity_level=2),
            seed=42,
            verbose=False
        )
        
        assert len(record.steps) > 0
        # Game should have some meaningful interactions
        clue_steps = [s for s in record.steps if "CLUE" in s.action_text]
        guess_steps = [s for s in record.steps if "GUESS" in s.action_text]
        
        assert len(clue_steps) > 0
        assert len(guess_steps) > 0


if __name__ == "__main__":
    pytest.main([__file__, "-v"])