Spaces:

Mooizz
/

New-space-openenv

Paused

File size: 10,055 Bytes
"""WatchDog Play UI — Gradio interface for multi-agent oversight games."""

from __future__ import annotations

import gradio as gr

# Ensure plugins are registered
try:
    import plugins  # noqa: F401
except ImportError:
    import watchdog_env.plugins  # noqa: F401

from .watchdog_environment import WatchDogMultiTurnEnvironment
from models import MultiTurnAction, MultiTurnObservation

try:
    from plugins import get_plugin, list_game_ids
except ImportError:
    from watchdog_env.plugins import get_plugin, list_game_ids


ERROR_TYPES = [
    "factual_error",
    "logic_error",
    "code_bug",
    "safety_violation",
    "sycophancy",
]

GAME_INFO = {
    "avalon": {
        "name": "Werewolf (Avalon)",
        "desc": "Detect lies and misdirection in a social deduction game. Werewolves try to blend in—can you spot their false claims?",
        "emoji": "🐺",
    },
    "cicero": {
        "name": "Diplomacy (Cicero)",
        "desc": "Seven powers negotiate in 1914 Europe. Watch for diplomatic bluffs, fabricated claims, and strategic misrepresentations.",
        "emoji": "⚔️",
    },
    "codenames": {
        "name": "Codenames",
        "desc": "4-player word game. Spymasters give clues; operatives guess. Spot wrong clues, risky guesses, or misdirection.",
        "emoji": "🔤",
    },
}


def _get_game_info(game_id: str) -> dict:
    """Get game info with fallback for unregistered games."""
    info = GAME_INFO.get(game_id)
    if info:
        return info
    plugin = get_plugin(game_id)
    name = plugin.get_display_name() if plugin else game_id
    return {"name": name, "desc": "", "emoji": "🎮"}


def _format_conversation(obs: MultiTurnObservation | None) -> str:
    if obs is None:
        return "_Start a new game to begin._"
    return obs.conversation_so_far or "[Conversation start]"


def _format_current_turn(obs: MultiTurnObservation | None) -> str:
    if obs is None:
        return ""
    return obs.current_turn or ""


def _format_feedback(obs: MultiTurnObservation | None) -> str:
    if obs is None:
        return ""
    parts = []
    if obs.feedback:
        parts.append(obs.feedback)
    if obs.step_reward is not None:
        parts.append(f"Step reward: {obs.step_reward:+.2f}")
    if obs.cumulative_reward is not None:
        parts.append(f"Total: {obs.cumulative_reward:.2f}")
    return " | ".join(parts) if parts else ""


def start_game(game_id: str, level: int, state: dict) -> tuple[dict, str, str, str, str]:
    """Start a new oversight episode."""
    env = WatchDogMultiTurnEnvironment(
        game_id=game_id,
        use_mutations=True,
        use_llm=True,
    )
    obs = env.reset(seed=None, level=level)
    state["env"] = env
    state["obs"] = obs
    info = _get_game_info(game_id)
    status = f"**{info['emoji']} {info['name']}** — Level {level} | Turn {obs.current_turn_number}/{obs.total_turns} | Q: {obs.remaining_questions}"
    return (
        state,
        _format_conversation(obs),
        _format_current_turn(obs),
        _format_feedback(obs),
        status,
    )


def do_pass(state: dict) -> tuple[dict, str, str, str, str]:
    """Overseer passes: no error detected."""
    env = state.get("env")
    if env is None:
        return state, "", "", "Start a game first.", ""
    obs = env.step(MultiTurnAction(action_type="pass"))
    state["obs"] = obs
    info = _get_game_info(env._game_id)
    status = f"**{info.get('emoji', '')} {info.get('name', '')}** — Turn {obs.current_turn_number}/{obs.total_turns} | Q: {obs.remaining_questions}"
    if obs.done:
        status += " | **Episode complete**"
    return (
        state,
        _format_conversation(obs),
        _format_current_turn(obs),
        _format_feedback(obs),
        status,
    )


def do_flag(
    error_type: str,
    explanation: str,
    state: dict,
) -> tuple[dict, str, str, str, str]:
    """Overseer flags: error detected."""
    env = state.get("env")
    if env is None:
        return state, "", "", "Start a game first.", ""
    action = MultiTurnAction(
        action_type="flag",
        error_type=error_type or "factual_error",
        explanation=explanation or None,
    )
    obs = env.step(action)
    state["obs"] = obs
    info = _get_game_info(env._game_id)
    status = f"**{info.get('emoji', '')} {info.get('name', '')}** — Turn {obs.current_turn_number}/{obs.total_turns} | Q: {obs.remaining_questions}"
    if obs.done:
        status += " | **Episode complete**"
    return (
        state,
        _format_conversation(obs),
        _format_current_turn(obs),
        _format_feedback(obs),
        status,
    )


def do_question(question_text: str, state: dict) -> tuple[dict, str, str, str, str]:
    """Overseer asks a question for clarification."""
    env = state.get("env")
    if env is None:
        return state, "", "", "Start a game first.", ""
    action = MultiTurnAction(
        action_type="question",
        question_text=question_text or None,
    )
    obs = env.step(action)
    state["obs"] = obs
    info = _get_game_info(env._game_id)
    status = f"**{info.get('emoji', '')} {info.get('name', '')}** — Turn {obs.current_turn_number}/{obs.total_turns} | Q: {obs.remaining_questions}"
    if obs.phase == "question_response":
        status += " | Response received — decide: **PASS** or **FLAG**"
    return (
        state,
        _format_conversation(obs),
        _format_current_turn(obs),
        _format_feedback(obs),
        status,
    )


UI_THEME = gr.themes.Soft(
    primary_hue="violet",
    secondary_hue="slate",
)

UI_CSS = """
.main { max-width: 900px; margin: auto; }
.conversation-box { font-family: 'JetBrains Mono', monospace; font-size: 0.95em; }
.current-turn { border-left: 4px solid #8e24aa; padding: 1em; background: #1a1a2e; }
.feedback-box { font-weight: 500; color: #e1bee7; }
"""


def build_ui() -> gr.Blocks:
    """Build the WatchDog play interface."""
    with gr.Blocks(title="WatchDog — AI Oversight Playground") as demo:
        gr.Markdown(
            """
            # 🐕 WatchDog — AI Oversight Playground

            **You are the Overseer.** Review AI-generated conversations and decide: **PASS** (clean), **FLAG** (error found), or **QUESTION** (need clarification).

            | Action | When | Reward |
            |--------|------|--------|
            | **PASS** | Turn is clean | +0.1 |
            | **FLAG** | You caught an error | +1.0 to +1.7 |
            | **FLAG** | False alarm (turn was clean) | **-1.5** |
            | **QUESTION** | Ask for clarification | -0.5 (limited uses) |
            """
        )

        state = gr.State({"env": None, "obs": None})

        with gr.Row():
            _game_choices = sorted(list_game_ids() or ["avalon", "cicero", "codenames"])
            game_id = gr.Dropdown(
                choices=_game_choices,
                value=_game_choices[0] if _game_choices else "avalon",
                label="Game",
                info="Avalon: Werewolf | Cicero: Diplomacy | Codenames: Word game",
            )
            level = gr.Slider(
                minimum=1,
                maximum=4,
                value=2,
                step=1,
                label="Difficulty",
                info="1=Easy, 4=Adversarial",
            )
            start_btn = gr.Button("Start New Game", variant="primary")

        status = gr.Markdown("_Select a game and click Start._")

        with gr.Row():
            with gr.Column(scale=1):
                conv = gr.Markdown(
                    value="_Start a new game to begin._",
                    label="Conversation",
                    elem_classes=["conversation-box"],
                )
            with gr.Column(scale=1):
                current = gr.Markdown(
                    value="",
                    label="Current Turn (evaluate this)",
                    elem_classes=["current-turn"],
                )

        feedback = gr.Markdown(
            value="",
            label="Feedback",
            elem_classes=["feedback-box"],
        )

        with gr.Row():
            pass_btn = gr.Button("✓ PASS (no error)", variant="secondary")
            with gr.Column(scale=2):
                flag_btn = gr.Button("⚠ FLAG (error found)", variant="stop")
            question_btn = gr.Button("❓ QUESTION", variant="secondary")

        with gr.Accordion("FLAG details", open=False):
            error_type = gr.Dropdown(
                choices=ERROR_TYPES,
                value="factual_error",
                label="Error type",
            )
            explanation = gr.Textbox(
                label="Explanation (optional, +0.2 bonus if good)",
                placeholder="Describe what was wrong...",
                lines=2,
            )

        with gr.Accordion("QUESTION", open=False):
            question_text = gr.Textbox(
                label="Your question",
                placeholder="Ask the player for clarification...",
                lines=2,
            )

        # Event handlers
        start_btn.click(
            start_game,
            inputs=[game_id, level, state],
            outputs=[state, conv, current, feedback, status],
        )

        pass_btn.click(
            do_pass,
            inputs=[state],
            outputs=[state, conv, current, feedback, status],
        )

        flag_btn.click(
            do_flag,
            inputs=[error_type, explanation, state],
            outputs=[state, conv, current, feedback, status],
        )

        question_btn.click(
            do_question,
            inputs=[question_text, state],
            outputs=[state, conv, current, feedback, status],
        )

        gr.Markdown(
            """
            ---
            **Games:** [Avalon](https://en.wikipedia.org/wiki/Mafia_(party_game)) (Werewolf) | [Cicero](https://en.wikipedia.org/wiki/Diplomacy_(game)) (Diplomacy) | [Codenames](https://en.wikipedia.org/wiki/Codenames_(board_game)) (Word game)
            """
        )

    return demo