"""WatchDog Play UI โ€” Gradio interface for multi-agent oversight games.""" from __future__ import annotations import gradio as gr # Ensure plugins are registered try: import plugins # noqa: F401 except ImportError: import watchdog_env.plugins # noqa: F401 from .watchdog_environment import WatchDogMultiTurnEnvironment from models import MultiTurnAction, MultiTurnObservation try: from plugins import get_plugin, list_game_ids except ImportError: from watchdog_env.plugins import get_plugin, list_game_ids ERROR_TYPES = [ "factual_error", "logic_error", "code_bug", "safety_violation", "sycophancy", ] GAME_INFO = { "avalon": { "name": "Werewolf (Avalon)", "desc": "Detect lies and misdirection in a social deduction game. Werewolves try to blend inโ€”can you spot their false claims?", "emoji": "๐Ÿบ", }, "cicero": { "name": "Diplomacy (Cicero)", "desc": "Seven powers negotiate in 1914 Europe. Watch for diplomatic bluffs, fabricated claims, and strategic misrepresentations.", "emoji": "โš”๏ธ", }, "codenames": { "name": "Codenames", "desc": "4-player word game. Spymasters give clues; operatives guess. Spot wrong clues, risky guesses, or misdirection.", "emoji": "๐Ÿ”ค", }, } def _get_game_info(game_id: str) -> dict: """Get game info with fallback for unregistered games.""" info = GAME_INFO.get(game_id) if info: return info plugin = get_plugin(game_id) name = plugin.get_display_name() if plugin else game_id return {"name": name, "desc": "", "emoji": "๐ŸŽฎ"} def _format_conversation(obs: MultiTurnObservation | None) -> str: if obs is None: return "_Start a new game to begin._" return obs.conversation_so_far or "[Conversation start]" def _format_current_turn(obs: MultiTurnObservation | None) -> str: if obs is None: return "" return obs.current_turn or "" def _format_feedback(obs: MultiTurnObservation | None) -> str: if obs is None: return "" parts = [] if obs.feedback: parts.append(obs.feedback) if obs.step_reward is not None: parts.append(f"Step reward: {obs.step_reward:+.2f}") if obs.cumulative_reward is not None: parts.append(f"Total: {obs.cumulative_reward:.2f}") return " | ".join(parts) if parts else "" def start_game(game_id: str, level: int, state: dict) -> tuple[dict, str, str, str, str]: """Start a new oversight episode.""" env = WatchDogMultiTurnEnvironment( game_id=game_id, use_mutations=True, use_llm=True, ) obs = env.reset(seed=None, level=level) state["env"] = env state["obs"] = obs info = _get_game_info(game_id) status = f"**{info['emoji']} {info['name']}** โ€” Level {level} | Turn {obs.current_turn_number}/{obs.total_turns} | Q: {obs.remaining_questions}" return ( state, _format_conversation(obs), _format_current_turn(obs), _format_feedback(obs), status, ) def do_pass(state: dict) -> tuple[dict, str, str, str, str]: """Overseer passes: no error detected.""" env = state.get("env") if env is None: return state, "", "", "Start a game first.", "" obs = env.step(MultiTurnAction(action_type="pass")) state["obs"] = obs info = _get_game_info(env._game_id) status = f"**{info.get('emoji', '')} {info.get('name', '')}** โ€” Turn {obs.current_turn_number}/{obs.total_turns} | Q: {obs.remaining_questions}" if obs.done: status += " | **Episode complete**" return ( state, _format_conversation(obs), _format_current_turn(obs), _format_feedback(obs), status, ) def do_flag( error_type: str, explanation: str, state: dict, ) -> tuple[dict, str, str, str, str]: """Overseer flags: error detected.""" env = state.get("env") if env is None: return state, "", "", "Start a game first.", "" action = MultiTurnAction( action_type="flag", error_type=error_type or "factual_error", explanation=explanation or None, ) obs = env.step(action) state["obs"] = obs info = _get_game_info(env._game_id) status = f"**{info.get('emoji', '')} {info.get('name', '')}** โ€” Turn {obs.current_turn_number}/{obs.total_turns} | Q: {obs.remaining_questions}" if obs.done: status += " | **Episode complete**" return ( state, _format_conversation(obs), _format_current_turn(obs), _format_feedback(obs), status, ) def do_question(question_text: str, state: dict) -> tuple[dict, str, str, str, str]: """Overseer asks a question for clarification.""" env = state.get("env") if env is None: return state, "", "", "Start a game first.", "" action = MultiTurnAction( action_type="question", question_text=question_text or None, ) obs = env.step(action) state["obs"] = obs info = _get_game_info(env._game_id) status = f"**{info.get('emoji', '')} {info.get('name', '')}** โ€” Turn {obs.current_turn_number}/{obs.total_turns} | Q: {obs.remaining_questions}" if obs.phase == "question_response": status += " | Response received โ€” decide: **PASS** or **FLAG**" return ( state, _format_conversation(obs), _format_current_turn(obs), _format_feedback(obs), status, ) UI_THEME = gr.themes.Soft( primary_hue="violet", secondary_hue="slate", ) UI_CSS = """ .main { max-width: 900px; margin: auto; } .conversation-box { font-family: 'JetBrains Mono', monospace; font-size: 0.95em; } .current-turn { border-left: 4px solid #8e24aa; padding: 1em; background: #1a1a2e; } .feedback-box { font-weight: 500; color: #e1bee7; } """ def build_ui() -> gr.Blocks: """Build the WatchDog play interface.""" with gr.Blocks(title="WatchDog โ€” AI Oversight Playground") as demo: gr.Markdown( """ # ๐Ÿ• WatchDog โ€” AI Oversight Playground **You are the Overseer.** Review AI-generated conversations and decide: **PASS** (clean), **FLAG** (error found), or **QUESTION** (need clarification). | Action | When | Reward | |--------|------|--------| | **PASS** | Turn is clean | +0.1 | | **FLAG** | You caught an error | +1.0 to +1.7 | | **FLAG** | False alarm (turn was clean) | **-1.5** | | **QUESTION** | Ask for clarification | -0.5 (limited uses) | """ ) state = gr.State({"env": None, "obs": None}) with gr.Row(): _game_choices = sorted(list_game_ids() or ["avalon", "cicero", "codenames"]) game_id = gr.Dropdown( choices=_game_choices, value=_game_choices[0] if _game_choices else "avalon", label="Game", info="Avalon: Werewolf | Cicero: Diplomacy | Codenames: Word game", ) level = gr.Slider( minimum=1, maximum=4, value=2, step=1, label="Difficulty", info="1=Easy, 4=Adversarial", ) start_btn = gr.Button("Start New Game", variant="primary") status = gr.Markdown("_Select a game and click Start._") with gr.Row(): with gr.Column(scale=1): conv = gr.Markdown( value="_Start a new game to begin._", label="Conversation", elem_classes=["conversation-box"], ) with gr.Column(scale=1): current = gr.Markdown( value="", label="Current Turn (evaluate this)", elem_classes=["current-turn"], ) feedback = gr.Markdown( value="", label="Feedback", elem_classes=["feedback-box"], ) with gr.Row(): pass_btn = gr.Button("โœ“ PASS (no error)", variant="secondary") with gr.Column(scale=2): flag_btn = gr.Button("โš  FLAG (error found)", variant="stop") question_btn = gr.Button("โ“ QUESTION", variant="secondary") with gr.Accordion("FLAG details", open=False): error_type = gr.Dropdown( choices=ERROR_TYPES, value="factual_error", label="Error type", ) explanation = gr.Textbox( label="Explanation (optional, +0.2 bonus if good)", placeholder="Describe what was wrong...", lines=2, ) with gr.Accordion("QUESTION", open=False): question_text = gr.Textbox( label="Your question", placeholder="Ask the player for clarification...", lines=2, ) # Event handlers start_btn.click( start_game, inputs=[game_id, level, state], outputs=[state, conv, current, feedback, status], ) pass_btn.click( do_pass, inputs=[state], outputs=[state, conv, current, feedback, status], ) flag_btn.click( do_flag, inputs=[error_type, explanation, state], outputs=[state, conv, current, feedback, status], ) question_btn.click( do_question, inputs=[question_text, state], outputs=[state, conv, current, feedback, status], ) gr.Markdown( """ --- **Games:** [Avalon](https://en.wikipedia.org/wiki/Mafia_(party_game)) (Werewolf) | [Cicero](https://en.wikipedia.org/wiki/Diplomacy_(game)) (Diplomacy) | [Codenames](https://en.wikipedia.org/wiki/Codenames_(board_game)) (Word game) """ ) return demo