File size: 4,760 Bytes
446b0a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
OpenApp Environment HTTP Client.

This module provides the client for connecting to an OpenApp Environment server
over HTTP.
"""

from typing import Any, Dict

# Support both in-repo and standalone imports
try:
    # In-repo imports (when running from OpenEnv repository)
    from core.client_types import StepResult
    from core.env_server.types import State
    from core.http_env_client import HTTPEnvClient

    from .models import OpenAppAction, OpenAppObservation
except ImportError:
    # Standalone imports (when environment is standalone with openenv-core from pip)
    from openenv_core.client_types import StepResult
    from openenv_core.env_server.types import State
    from openenv_core.http_env_client import HTTPEnvClient

    from openapp_env.models import OpenAppAction, OpenAppObservation


class OpenAppEnv(HTTPEnvClient[OpenAppAction, OpenAppObservation]):
    """
    HTTP client for the OpenApp Environment.

    This client connects to an OpenAppEnvironment HTTP server and provides
    methods to interact with it: reset(), step(), and state access.

    The OpenApp environment simulates web applications (calendar, todo, messenger, maps)
    and allows agents to interact with them using browser-based actions.

    Example:
        >>> # Connect to a running server
        >>> client = OpenAppEnv(base_url="http://localhost:8000")
        >>> result = client.reset()
        >>> print(result.observation.url)
        >>>
        >>> # Click on an element
        >>> result = client.step(OpenAppAction(action_type="click", bid="123"))
        >>> print(result.observation.html)
        >>> print(result.reward)

    Example with Docker:
        >>> # Automatically start container and connect
        >>> client = OpenAppEnv.from_docker_image("openapp-env:latest")
        >>> result = client.reset()
        >>> # Fill a text field
        >>> result = client.step(OpenAppAction(
        ...     action_type="fill",
        ...     bid="456",
        ...     text="Meeting with team"
        ... ))
    """

    def _step_payload(self, action: OpenAppAction) -> Dict:
        """
        Convert OpenAppAction to JSON payload for step request.

        Args:
            action: OpenAppAction instance

        Returns:
            Dictionary representation suitable for JSON encoding
        """
        payload = {
            "action_type": action.action_type,
        }

        # Add optional fields if present
        if action.bid is not None:
            payload["bid"] = action.bid
        if action.text is not None:
            payload["text"] = action.text
        if action.value is not None:
            payload["value"] = action.value
        if action.url is not None:
            payload["url"] = action.url
        if action.direction is not None:
            payload["direction"] = action.direction
        if action.metadata:
            payload["metadata"] = action.metadata

        return payload

    def _parse_result(self, payload: Dict) -> StepResult[OpenAppObservation]:
        """
        Parse server response into StepResult[OpenAppObservation].

        Args:
            payload: JSON response from server

        Returns:
            StepResult with OpenAppObservation
        """
        obs_data = payload.get("observation", {})
        observation = OpenAppObservation(
            html=obs_data.get("html", ""),
            url=obs_data.get("url", ""),
            open_pages_urls=obs_data.get("open_pages_urls", []),
            active_page_index=obs_data.get("active_page_index", 0),
            screenshot=obs_data.get("screenshot"),
            axtree_txt=obs_data.get("axtree_txt", ""),
            app_state=obs_data.get("app_state", {}),
            task_info=obs_data.get("task_info"),
            last_action_error=obs_data.get("last_action_error"),
            done=payload.get("done", False),
            reward=payload.get("reward"),
            metadata=obs_data.get("metadata", {}),
        )

        return StepResult(
            observation=observation,
            reward=payload.get("reward"),
            done=payload.get("done", False),
        )

    def _parse_state(self, payload: Dict) -> State:
        """
        Parse server response into State object.

        Args:
            payload: JSON response from /state endpoint

        Returns:
            State object with episode_id and step_count
        """
        return State(
            episode_id=payload.get("episode_id"),
            step_count=payload.get("step_count", 0),
        )