File size: 2,696 Bytes
eca7752
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""
Desktop Computer-Use Environment Client.

Connects to a running desktop-openenv server and exposes
desktop interaction tools via the MCP tool-calling interface.

Example:
    >>> with DesktopEnv(base_url="http://localhost:8000") as env:
    ...     obs = env.reset(app="libreoffice-calc")
    ...     print("Stream:", obs["metadata"]["stream_url"])
    ...
    ...     # Discover tools
    ...     tools = env.list_tools()
    ...     print([t.name for t in tools])
    ...     # ['screenshot', 'click', 'double_click', 'right_click',
    ...     #  'type_text', 'press_key', 'scroll', 'drag',
    ...     #  'run_command', 'get_cursor_position', 'get_screen_size']
    ...
    ...     # Take screenshot
    ...     img_b64 = env.call_tool("screenshot")
    ...
    ...     # Click on something
    ...     env.call_tool("click", x=500, y=300)
    ...
    ...     # Type text
    ...     env.call_tool("type_text", text="Hello world")
    ...
    ...     # Press key combo
    ...     env.call_tool("press_key", key="ctrl+s")
"""

from openenv.core.mcp_client import MCPToolClient


class DesktopEnv(MCPToolClient):
    """
    Client for the Desktop Computer-Use environment.

    Inherits all MCP tool-calling functionality from MCPToolClient:
    - ``list_tools()``              β€” discover available desktop tools
    - ``call_tool(name, **kwargs)`` β€” call a tool by name
    - ``reset(**kwargs)``           β€” start new episode with chosen app
    - ``step(action)``              β€” low-level action execution

    Reset kwargs:
    - ``app``: Preset name ("libreoffice-calc", "firefox", "blender", etc.)
               or a raw shell command to launch.
    - ``resolution``: Tuple (width, height). Default (1920, 1080).
    - ``timeout``: Sandbox timeout in seconds. Default 600.
    - ``install_commands``: List of shell commands to run before launch.

    Available tools (exposed by server):
    - ``screenshot()``                          β€” capture screen as base64 PNG
    - ``click(x, y)``                           β€” left click
    - ``double_click(x, y)``                    β€” double click
    - ``right_click(x, y)``                     β€” right click
    - ``type_text(text)``                       β€” type at cursor
    - ``press_key(key)``                        β€” press key/combo ("enter", "ctrl+s")
    - ``scroll(direction, amount)``             β€” scroll up/down
    - ``drag(start_x, start_y, end_x, end_y)`` β€” drag mouse
    - ``run_command(command)``                  β€” shell command
    - ``get_cursor_position()``                 β€” current mouse position
    - ``get_screen_size()``                     β€” screen dimensions
    """

    pass