"""
Desktop Computer-Use OpenEnv Environment.

Exposes a cloud desktop sandbox (E2B) with tools designed to mirror the action
schemas of the major frontier computer-use models — so a model's native tool
output can drive the env with minimal token-level rewriting.

Action surface (modelled on Anthropic's `computer_20251124` since it's the
broadest superset of OpenAI Operator and Qwen3-VL ComputerUse):

  Observation:
    screenshot()                         -> image (PNG)
    cursor_position()                    -> "x,y"
    get_screen_size()                    -> "WxH"

  Mouse — all coordinate args are `[x, y]` arrays (matches Anthropic + Qwen):
    left_click(coordinate, text=None)
    right_click(coordinate, text=None)
    middle_click(coordinate, text=None)
    double_click(coordinate, text=None)
    triple_click(coordinate, text=None)
    mouse_move(coordinate)
    left_click_drag(start_coordinate, coordinate, text=None)
    left_mouse_down(coordinate=None)
    left_mouse_up(coordinate=None)
    scroll(coordinate, scroll_direction, scroll_amount, text=None)

  Keyboard:
    type(text)
    key(keys)                  e.g. "ctrl+s" or "enter"
    hold_key(keys, duration)

  Control:
    wait(duration)
    terminate(status)          status="success"|"failure"; sets done=True
    run_command(command)       bash escape hatch (out-of-band of the model spec)

The `text` modifier on click/scroll holds shift/ctrl/alt/super while clicking,
matching Anthropic's spec exactly. Coordinates are in **pixel space** at the
configured `display_width_px` × `display_height_px`. If the model emits
0–1000 normalized coords (Qwen2.5-VL), the rollout adapter must rescale.
"""

import base64
import os
import time
from typing import Any, List, Optional, Tuple
from uuid import uuid4

from dotenv import load_dotenv
from e2b_desktop import Sandbox
from fastmcp import FastMCP
from fastmcp.utilities.types import Image
from openenv.core.env_server.mcp_environment import MCPEnvironment
from openenv.core.env_server.types import Action, Observation

load_dotenv()


# Pre-built app configs: (install_commands, launch_command, wait_ms)
APP_PRESETS = {
    "libreoffice-calc": (
        ["sudo apt-get update -qq", "sudo DEBIAN_FRONTEND=noninteractive apt-get install -y -qq libreoffice-calc"],
        "libreoffice --calc",
        5000,
    ),
    "libreoffice-writer": (
        ["sudo apt-get update -qq", "sudo DEBIAN_FRONTEND=noninteractive apt-get install -y -qq libreoffice-writer"],
        "libreoffice --writer",
        5000,
    ),
    "libreoffice-impress": (
        ["sudo apt-get update -qq", "sudo DEBIAN_FRONTEND=noninteractive apt-get install -y -qq libreoffice-impress"],
        "libreoffice --impress",
        5000,
    ),
    "firefox": (
        ["sudo apt-get update -qq", "sudo DEBIAN_FRONTEND=noninteractive apt-get install -y -qq firefox"],
        "firefox",
        5000,
    ),
    "blender": (
        ["sudo apt-get update -qq", "sudo DEBIAN_FRONTEND=noninteractive apt-get install -y -qq blender"],
        "blender",
        8000,
    ),
    "terminal": (
        [],
        "xfce4-terminal",
        2000,
    ),
    "gimp": (
        ["sudo apt-get update -qq", "sudo DEBIAN_FRONTEND=noninteractive apt-get install -y -qq gimp"],
        "gimp",
        6000,
    ),
    "desktop": (
        [],
        None,
        1000,
    ),
}


_MODIFIER_ALIAS = {
    "shift": "shift",
    "ctrl": "ctrl",
    "control": "ctrl",
    "alt": "alt",
    "option": "alt",
    "super": "super",
    "cmd": "super",
    "command": "super",
    "win": "super",
    "meta": "super",
}


def _coerce_coord(coord: Any) -> Tuple[int, int]:
    """Accept [x,y] / (x,y) / "x,y"; return (int, int)."""
    if isinstance(coord, str):
        parts = coord.replace("(", "").replace(")", "").replace("[", "").replace("]", "").split(",")
        coord = [int(p.strip()) for p in parts]
    x, y = coord
    return int(x), int(y)


def _split_modifiers(mod_text: Optional[str]) -> List[str]:
    """Split a modifier text like 'shift' or 'ctrl+shift' into normalized keys."""
    if not mod_text:
        return []
    return [_MODIFIER_ALIAS.get(p.strip().lower(), p.strip().lower()) for p in mod_text.split("+")]


class DesktopEnvironment(MCPEnvironment):
    """Cloud desktop environment backed by E2B Desktop sandbox."""

    SUPPORTS_CONCURRENT_SESSIONS = True

    def __init__(self):
        self._api_key = os.environ["E2B_API_KEY"]
        self._sandbox: Optional[Sandbox] = None
        self._resolution = (1024, 768)  # safe default for vision-model coord scaling
        self._timeout = 600

        try:
            from ..models import DesktopState, ScreenAction
        except ImportError:
            from models import DesktopState, ScreenAction

        self._DesktopState = DesktopState
        self._ScreenAction = ScreenAction
        self._state = DesktopState(episode_id=str(uuid4()))
        self._terminated = False
        self._terminate_status: Optional[str] = None

        # ── Register MCP tools ──────────────────────────────────────────
        mcp = FastMCP("desktop_env")

        # ----- Observation ------------------------------------------------

        @mcp.tool
        def screenshot() -> Image:
            """Capture the current screen state.

            Returns the screen as a PNG image content block — the model sees
            the actual pixels, not a base64 string.
            """
            self._require_sandbox()
            data = self._sandbox.screenshot()
            self._state.last_screenshot_b64 = base64.b64encode(data).decode("utf-8")
            self._record("screenshot", "Captured screenshot")
            return Image(data=data, format="png")

        @mcp.tool
        def cursor_position() -> str:
            """Return current mouse cursor position as 'x,y'."""
            self._require_sandbox()
            x, y = self._sandbox.get_cursor_position()
            return f"{x},{y}"

        @mcp.tool
        def get_screen_size() -> str:
            """Return screen dimensions as 'WxH'."""
            self._require_sandbox()
            w, h = self._sandbox.get_screen_size()
            return f"{w}x{h}"

        # ----- Mouse: clicks --------------------------------------------

        @mcp.tool
        def left_click(coordinate: List[int], text: Optional[str] = None) -> str:
            """Left-click at `coordinate=[x, y]`.

            Optional `text` holds modifier keys ("shift", "ctrl", "alt",
            "super", or combinations like "ctrl+shift") for the duration
            of the click.
            """
            return self._click("left", coordinate, text)

        @mcp.tool
        def right_click(coordinate: List[int], text: Optional[str] = None) -> str:
            """Right-click at `coordinate=[x, y]`. Optional modifier `text`."""
            return self._click("right", coordinate, text)

        @mcp.tool
        def middle_click(coordinate: List[int], text: Optional[str] = None) -> str:
            """Middle-click at `coordinate=[x, y]`. Optional modifier `text`."""
            return self._click("middle", coordinate, text)

        @mcp.tool
        def double_click(coordinate: List[int], text: Optional[str] = None) -> str:
            """Double-click at `coordinate=[x, y]`. Optional modifier `text`."""
            self._require_sandbox()
            x, y = _coerce_coord(coordinate)
            with self._held(_split_modifiers(text)):
                self._sandbox.double_click(x, y)
            self._record("double_click", f"Double click at ({x},{y}) mods={text or ''}")
            return f"Double-clicked at ({x},{y})"

        @mcp.tool
        def triple_click(coordinate: List[int], text: Optional[str] = None) -> str:
            """Triple-click at `coordinate=[x, y]`. Selects line/word in most apps."""
            self._require_sandbox()
            x, y = _coerce_coord(coordinate)
            with self._held(_split_modifiers(text)):
                # E2B has no triple_click — emulate with three rapid left clicks
                self._sandbox.left_click(x, y)
                self._sandbox.left_click(x, y)
                self._sandbox.left_click(x, y)
            self._record("triple_click", f"Triple click at ({x},{y})")
            return f"Triple-clicked at ({x},{y})"

        # ----- Mouse: motion --------------------------------------------

        @mcp.tool
        def mouse_move(coordinate: List[int]) -> str:
            """Move the mouse cursor to `coordinate=[x, y]` without clicking."""
            self._require_sandbox()
            x, y = _coerce_coord(coordinate)
            self._sandbox.move_mouse(x, y)
            self._record("mouse_move", f"Moved mouse to ({x},{y})")
            return f"Moved cursor to ({x},{y})"

        @mcp.tool
        def left_click_drag(
            start_coordinate: List[int],
            coordinate: List[int],
            text: Optional[str] = None,
        ) -> str:
            """Press at `start_coordinate`, drag to `coordinate`, then release."""
            self._require_sandbox()
            sx, sy = _coerce_coord(start_coordinate)
            ex, ey = _coerce_coord(coordinate)
            with self._held(_split_modifiers(text)):
                self._sandbox.drag((sx, sy), (ex, ey))
            self._record("left_click_drag", f"Drag ({sx},{sy})→({ex},{ey}) mods={text or ''}")
            return f"Dragged from ({sx},{sy}) to ({ex},{ey})"

        @mcp.tool
        def left_mouse_down(coordinate: Optional[List[int]] = None) -> str:
            """Press the left mouse button (without releasing). Optionally move first."""
            self._require_sandbox()
            if coordinate is not None:
                x, y = _coerce_coord(coordinate)
                self._sandbox.move_mouse(x, y)
            try:
                self._sandbox.mouse_press("left")
            except AttributeError:
                # older e2b_desktop: emulate with left_click
                pass
            self._record("left_mouse_down", f"Pressed left at {coordinate}")
            return "Left mouse pressed"

        @mcp.tool
        def left_mouse_up(coordinate: Optional[List[int]] = None) -> str:
            """Release the left mouse button. Optionally move first."""
            self._require_sandbox()
            if coordinate is not None:
                x, y = _coerce_coord(coordinate)
                self._sandbox.move_mouse(x, y)
            try:
                self._sandbox.mouse_release("left")
            except AttributeError:
                pass
            self._record("left_mouse_up", f"Released left at {coordinate}")
            return "Left mouse released"

        @mcp.tool
        def scroll(
            coordinate: List[int],
            scroll_direction: str,
            scroll_amount: int,
            text: Optional[str] = None,
        ) -> str:
            """Scroll at `coordinate=[x, y]` in `scroll_direction` ("up"/"down"/"left"/"right").

            `scroll_amount` is the number of clicks of the scroll wheel.
            Optional `text` modifier (e.g. "shift" for horizontal scrolling).
            """
            self._require_sandbox()
            x, y = _coerce_coord(coordinate)
            self._sandbox.move_mouse(x, y)
            with self._held(_split_modifiers(text)):
                self._sandbox.scroll(direction=scroll_direction, amount=int(scroll_amount))
            self._record("scroll", f"Scrolled {scroll_direction} {scroll_amount} at ({x},{y})")
            return f"Scrolled {scroll_direction} {scroll_amount} clicks at ({x},{y})"

        # ----- Keyboard --------------------------------------------------

        @mcp.tool(name="type")
        def type_text(text: str) -> str:
            """Type `text` at the current cursor position (character-by-character)."""
            self._require_sandbox()
            self._sandbox.write(text)
            preview = text[:80] + ("..." if len(text) > 80 else "")
            self._record("type", f'Typed: "{preview}"')
            return f"Typed {len(text)} chars"

        @mcp.tool
        def key(keys: str) -> str:
            """Press a key or key combo using xdotool syntax.

            Examples: "enter", "ctrl+s", "ctrl+shift+t", "alt+F4".
            """
            self._require_sandbox()
            if "+" in keys:
                self._sandbox.press([k.strip() for k in keys.split("+")])
            else:
                self._sandbox.press(keys)
            self._record("key", f"Pressed: {keys}")
            return f"Pressed {keys}"

        @mcp.tool
        def hold_key(keys: str, duration: float) -> str:
            """Hold `keys` (e.g. "shift") for `duration` seconds."""
            self._require_sandbox()
            parts = [k.strip() for k in keys.split("+")]
            try:
                for p in parts:
                    self._sandbox.key_press(p)
                time.sleep(float(duration))
            finally:
                for p in reversed(parts):
                    try:
                        self._sandbox.key_release(p)
                    except Exception:
                        pass
            self._record("hold_key", f"Held {keys} for {duration}s")
            return f"Held {keys} for {duration}s"

        # ----- Control ---------------------------------------------------

        @mcp.tool
        def wait(duration: float) -> str:
            """Pause for `duration` seconds. Useful while UI animations settle."""
            time.sleep(float(duration))
            self._record("wait", f"Waited {duration}s")
            return f"Waited {duration}s"

        @mcp.tool
        def terminate(status: str) -> str:
            """End the episode with `status` ("success" or "failure")."""
            self._terminated = True
            self._terminate_status = status
            self._record("terminate", f"Terminated: {status}")
            return f"Episode terminated with status={status}"

        @mcp.tool
        def run_command(command: str) -> str:
            """Run a shell command in the sandbox (escape hatch / grading hook)."""
            self._require_sandbox()
            result = self._sandbox.commands.run(command, timeout=60)
            output = result.stdout or ""
            if result.exit_code != 0 and result.stderr:
                output += f"\nSTDERR: {result.stderr}"
            self._record("command", f"$ {command}")
            return output if output else "(no output)"

        super().__init__(mcp)

    # ── Internal helpers ───────────────────────────────────────────────

    def _require_sandbox(self):
        if not self._sandbox:
            raise RuntimeError("Environment not reset — call reset() first.")

    def _record(self, action_type: str, detail: str):
        self._state.actions.append(self._ScreenAction(
            action_type=action_type,
            detail=detail,
            step=self._state.step_count,
        ))

    def _click(self, button: str, coordinate, modifier_text: Optional[str]) -> str:
        self._require_sandbox()
        x, y = _coerce_coord(coordinate)
        click_fn = {
            "left": self._sandbox.left_click,
            "right": self._sandbox.right_click,
            "middle": getattr(self._sandbox, "middle_click", self._sandbox.left_click),
        }[button]
        with self._held(_split_modifiers(modifier_text)):
            click_fn(x, y)
        self._record(f"{button}_click", f"{button} click at ({x},{y}) mods={modifier_text or ''}")
        return f"{button.title()}-clicked at ({x},{y})"

    class _Held:
        def __init__(self, sandbox, mods: List[str]):
            self._sandbox = sandbox
            self._mods = mods or []

        def __enter__(self):
            for m in self._mods:
                try:
                    self._sandbox.key_press(m)
                except Exception:
                    pass
            return self

        def __exit__(self, *exc):
            for m in reversed(self._mods):
                try:
                    self._sandbox.key_release(m)
                except Exception:
                    pass

    def _held(self, mods: List[str]):
        return self._Held(self._sandbox, mods)

    # ── OpenEnv lifecycle ──────────────────────────────────────────────

    def reset(
        self,
        seed: Optional[int] = None,
        episode_id: Optional[str] = None,
        **kwargs: Any,
    ) -> Observation:
        if self._sandbox:
            try:
                self._sandbox.kill()
            except Exception:
                pass

        app = kwargs.get("app", "desktop")
        resolution = tuple(kwargs.get("resolution", (1024, 768)))
        timeout = int(kwargs.get("timeout", 600))
        custom_install = kwargs.get("install_commands", [])

        self._resolution = resolution
        self._terminated = False
        self._terminate_status = None

        if app in APP_PRESETS:
            install_cmds, launch_cmd, wait_ms = APP_PRESETS[app]
        else:
            install_cmds = custom_install
            launch_cmd = app
            wait_ms = 3000

        self._sandbox = Sandbox.create(
            resolution=resolution,
            dpi=96,
            timeout=timeout,
            api_key=self._api_key,
        )

        for cmd in install_cmds:
            self._sandbox.commands.run(cmd, timeout=300)

        if launch_cmd:
            self._sandbox.commands.run(launch_cmd, background=True)
            self._sandbox.wait(wait_ms)

        self._sandbox.stream.start()
        stream_url = self._sandbox.stream.get_url()

        self._state = self._DesktopState(
            episode_id=episode_id or str(uuid4()),
            sandbox_id=self._sandbox.sandbox_id,
            stream_url=stream_url,
            app=app,
            screen_width=resolution[0],
            screen_height=resolution[1],
            step_count=0,
        )

        return Observation(
            done=False,
            reward=None,
            metadata={
                "status": "ready",
                "sandbox_id": self._sandbox.sandbox_id,
                "stream_url": stream_url,
                "app": app,
                "resolution": f"{resolution[0]}x{resolution[1]}",
                "message": (
                    f"Desktop ready ({app}, {resolution[0]}x{resolution[1]}). "
                    "Call screenshot to see the screen, then drive the mouse / "
                    "keyboard with coordinate arrays in pixel space. Coordinates "
                    "are absolute pixels in this resolution."
                ),
            },
        )

    def _step_impl(
        self,
        action: Action,
        timeout_s: Optional[float] = None,
        **kwargs: Any,
    ) -> Observation:
        return Observation(
            done=False,
            reward=None,
            metadata={
                "error": f"Unknown action type: {type(action).__name__}. "
                "Use ListToolsAction or CallToolAction for MCP interactions."
            },
        )

    def step(
        self,
        action: Action,
        timeout_s: Optional[float] = None,
        **kwargs: Any,
    ) -> Observation:
        self._state.step_count += 1
        obs = super().step(action, timeout_s=timeout_s, **kwargs)
        if self._terminated:
            obs = Observation(
                done=True,
                reward=1.0 if self._terminate_status == "success" else 0.0,
                metadata={**(obs.metadata or {}), "terminate_status": self._terminate_status},
            )
        return obs

    async def step_async(
        self,
        action: Action,
        timeout_s: Optional[float] = None,
        **kwargs: Any,
    ) -> Observation:
        self._state.step_count += 1
        obs = await super().step_async(action, timeout_s=timeout_s, **kwargs)
        if self._terminated:
            obs = Observation(
                done=True,
                reward=1.0 if self._terminate_status == "success" else 0.0,
                metadata={**(obs.metadata or {}), "terminate_status": self._terminate_status},
            )
        return obs

    @property
    def state(self):
        return self._state