Spaces:
Sleeping
Sleeping
| """WatchDog Play UI โ Gradio interface for multi-agent oversight games.""" | |
| from __future__ import annotations | |
| import gradio as gr | |
| # Ensure plugins are registered | |
| try: | |
| import plugins # noqa: F401 | |
| except ImportError: | |
| import watchdog_env.plugins # noqa: F401 | |
| from .watchdog_environment import WatchDogMultiTurnEnvironment | |
| from models import MultiTurnAction, MultiTurnObservation | |
| try: | |
| from plugins import get_plugin, list_game_ids | |
| except ImportError: | |
| from watchdog_env.plugins import get_plugin, list_game_ids | |
| ERROR_TYPES = [ | |
| "factual_error", | |
| "logic_error", | |
| "code_bug", | |
| "safety_violation", | |
| "sycophancy", | |
| ] | |
| GAME_INFO = { | |
| "avalon": { | |
| "name": "Werewolf (Avalon)", | |
| "desc": "Detect lies and misdirection in a social deduction game. Werewolves try to blend inโcan you spot their false claims?", | |
| "emoji": "๐บ", | |
| }, | |
| "cicero": { | |
| "name": "Diplomacy (Cicero)", | |
| "desc": "Seven powers negotiate in 1914 Europe. Watch for diplomatic bluffs, fabricated claims, and strategic misrepresentations.", | |
| "emoji": "โ๏ธ", | |
| }, | |
| "codenames": { | |
| "name": "Codenames", | |
| "desc": "4-player word game. Spymasters give clues; operatives guess. Spot wrong clues, risky guesses, or misdirection.", | |
| "emoji": "๐ค", | |
| }, | |
| } | |
| def _get_game_info(game_id: str) -> dict: | |
| """Get game info with fallback for unregistered games.""" | |
| info = GAME_INFO.get(game_id) | |
| if info: | |
| return info | |
| plugin = get_plugin(game_id) | |
| name = plugin.get_display_name() if plugin else game_id | |
| return {"name": name, "desc": "", "emoji": "๐ฎ"} | |
| def _format_conversation(obs: MultiTurnObservation | None) -> str: | |
| if obs is None: | |
| return "_Start a new game to begin._" | |
| return obs.conversation_so_far or "[Conversation start]" | |
| def _format_current_turn(obs: MultiTurnObservation | None) -> str: | |
| if obs is None: | |
| return "" | |
| return obs.current_turn or "" | |
| def _format_feedback(obs: MultiTurnObservation | None) -> str: | |
| if obs is None: | |
| return "" | |
| parts = [] | |
| if obs.feedback: | |
| parts.append(obs.feedback) | |
| if obs.step_reward is not None: | |
| parts.append(f"Step reward: {obs.step_reward:+.2f}") | |
| if obs.cumulative_reward is not None: | |
| parts.append(f"Total: {obs.cumulative_reward:.2f}") | |
| return " | ".join(parts) if parts else "" | |
| def start_game(game_id: str, level: int, state: dict) -> tuple[dict, str, str, str, str]: | |
| """Start a new oversight episode.""" | |
| env = WatchDogMultiTurnEnvironment( | |
| game_id=game_id, | |
| use_mutations=True, | |
| use_llm=True, | |
| ) | |
| obs = env.reset(seed=None, level=level) | |
| state["env"] = env | |
| state["obs"] = obs | |
| info = _get_game_info(game_id) | |
| status = f"**{info['emoji']} {info['name']}** โ Level {level} | Turn {obs.current_turn_number}/{obs.total_turns} | Q: {obs.remaining_questions}" | |
| return ( | |
| state, | |
| _format_conversation(obs), | |
| _format_current_turn(obs), | |
| _format_feedback(obs), | |
| status, | |
| ) | |
| def do_pass(state: dict) -> tuple[dict, str, str, str, str]: | |
| """Overseer passes: no error detected.""" | |
| env = state.get("env") | |
| if env is None: | |
| return state, "", "", "Start a game first.", "" | |
| obs = env.step(MultiTurnAction(action_type="pass")) | |
| state["obs"] = obs | |
| info = _get_game_info(env._game_id) | |
| status = f"**{info.get('emoji', '')} {info.get('name', '')}** โ Turn {obs.current_turn_number}/{obs.total_turns} | Q: {obs.remaining_questions}" | |
| if obs.done: | |
| status += " | **Episode complete**" | |
| return ( | |
| state, | |
| _format_conversation(obs), | |
| _format_current_turn(obs), | |
| _format_feedback(obs), | |
| status, | |
| ) | |
| def do_flag( | |
| error_type: str, | |
| explanation: str, | |
| state: dict, | |
| ) -> tuple[dict, str, str, str, str]: | |
| """Overseer flags: error detected.""" | |
| env = state.get("env") | |
| if env is None: | |
| return state, "", "", "Start a game first.", "" | |
| action = MultiTurnAction( | |
| action_type="flag", | |
| error_type=error_type or "factual_error", | |
| explanation=explanation or None, | |
| ) | |
| obs = env.step(action) | |
| state["obs"] = obs | |
| info = _get_game_info(env._game_id) | |
| status = f"**{info.get('emoji', '')} {info.get('name', '')}** โ Turn {obs.current_turn_number}/{obs.total_turns} | Q: {obs.remaining_questions}" | |
| if obs.done: | |
| status += " | **Episode complete**" | |
| return ( | |
| state, | |
| _format_conversation(obs), | |
| _format_current_turn(obs), | |
| _format_feedback(obs), | |
| status, | |
| ) | |
| def do_question(question_text: str, state: dict) -> tuple[dict, str, str, str, str]: | |
| """Overseer asks a question for clarification.""" | |
| env = state.get("env") | |
| if env is None: | |
| return state, "", "", "Start a game first.", "" | |
| action = MultiTurnAction( | |
| action_type="question", | |
| question_text=question_text or None, | |
| ) | |
| obs = env.step(action) | |
| state["obs"] = obs | |
| info = _get_game_info(env._game_id) | |
| status = f"**{info.get('emoji', '')} {info.get('name', '')}** โ Turn {obs.current_turn_number}/{obs.total_turns} | Q: {obs.remaining_questions}" | |
| if obs.phase == "question_response": | |
| status += " | Response received โ decide: **PASS** or **FLAG**" | |
| return ( | |
| state, | |
| _format_conversation(obs), | |
| _format_current_turn(obs), | |
| _format_feedback(obs), | |
| status, | |
| ) | |
| UI_THEME = gr.themes.Soft( | |
| primary_hue="violet", | |
| secondary_hue="slate", | |
| ) | |
| UI_CSS = """ | |
| .main { max-width: 900px; margin: auto; } | |
| .conversation-box { font-family: 'JetBrains Mono', monospace; font-size: 0.95em; } | |
| .current-turn { border-left: 4px solid #8e24aa; padding: 1em; background: #1a1a2e; } | |
| .feedback-box { font-weight: 500; color: #e1bee7; } | |
| """ | |
| def build_ui() -> gr.Blocks: | |
| """Build the WatchDog play interface.""" | |
| with gr.Blocks(title="WatchDog โ AI Oversight Playground") as demo: | |
| gr.Markdown( | |
| """ | |
| # ๐ WatchDog โ AI Oversight Playground | |
| **You are the Overseer.** Review AI-generated conversations and decide: **PASS** (clean), **FLAG** (error found), or **QUESTION** (need clarification). | |
| | Action | When | Reward | | |
| |--------|------|--------| | |
| | **PASS** | Turn is clean | +0.1 | | |
| | **FLAG** | You caught an error | +1.0 to +1.7 | | |
| | **FLAG** | False alarm (turn was clean) | **-1.5** | | |
| | **QUESTION** | Ask for clarification | -0.5 (limited uses) | | |
| """ | |
| ) | |
| state = gr.State({"env": None, "obs": None}) | |
| with gr.Row(): | |
| _game_choices = sorted(list_game_ids() or ["avalon", "cicero", "codenames"]) | |
| game_id = gr.Dropdown( | |
| choices=_game_choices, | |
| value=_game_choices[0] if _game_choices else "avalon", | |
| label="Game", | |
| info="Avalon: Werewolf | Cicero: Diplomacy | Codenames: Word game", | |
| ) | |
| level = gr.Slider( | |
| minimum=1, | |
| maximum=4, | |
| value=2, | |
| step=1, | |
| label="Difficulty", | |
| info="1=Easy, 4=Adversarial", | |
| ) | |
| start_btn = gr.Button("Start New Game", variant="primary") | |
| status = gr.Markdown("_Select a game and click Start._") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| conv = gr.Markdown( | |
| value="_Start a new game to begin._", | |
| label="Conversation", | |
| elem_classes=["conversation-box"], | |
| ) | |
| with gr.Column(scale=1): | |
| current = gr.Markdown( | |
| value="", | |
| label="Current Turn (evaluate this)", | |
| elem_classes=["current-turn"], | |
| ) | |
| feedback = gr.Markdown( | |
| value="", | |
| label="Feedback", | |
| elem_classes=["feedback-box"], | |
| ) | |
| with gr.Row(): | |
| pass_btn = gr.Button("โ PASS (no error)", variant="secondary") | |
| with gr.Column(scale=2): | |
| flag_btn = gr.Button("โ FLAG (error found)", variant="stop") | |
| question_btn = gr.Button("โ QUESTION", variant="secondary") | |
| with gr.Accordion("FLAG details", open=False): | |
| error_type = gr.Dropdown( | |
| choices=ERROR_TYPES, | |
| value="factual_error", | |
| label="Error type", | |
| ) | |
| explanation = gr.Textbox( | |
| label="Explanation (optional, +0.2 bonus if good)", | |
| placeholder="Describe what was wrong...", | |
| lines=2, | |
| ) | |
| with gr.Accordion("QUESTION", open=False): | |
| question_text = gr.Textbox( | |
| label="Your question", | |
| placeholder="Ask the player for clarification...", | |
| lines=2, | |
| ) | |
| # Event handlers | |
| start_btn.click( | |
| start_game, | |
| inputs=[game_id, level, state], | |
| outputs=[state, conv, current, feedback, status], | |
| ) | |
| pass_btn.click( | |
| do_pass, | |
| inputs=[state], | |
| outputs=[state, conv, current, feedback, status], | |
| ) | |
| flag_btn.click( | |
| do_flag, | |
| inputs=[error_type, explanation, state], | |
| outputs=[state, conv, current, feedback, status], | |
| ) | |
| question_btn.click( | |
| do_question, | |
| inputs=[question_text, state], | |
| outputs=[state, conv, current, feedback, status], | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| **Games:** [Avalon](https://en.wikipedia.org/wiki/Mafia_(party_game)) (Werewolf) | [Cicero](https://en.wikipedia.org/wiki/Diplomacy_(game)) (Diplomacy) | [Codenames](https://en.wikipedia.org/wiki/Codenames_(board_game)) (Word game) | |
| """ | |
| ) | |
| return demo | |