Spaces:
Runtime error
Runtime error
File size: 4,688 Bytes
42cc6d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Android Environment HTTP Client.
This module provides the client for connecting to an Android Environment server
over HTTP.
"""
from typing import Any, Dict
from core.client_types import StepResult
from core.env_server.types import State
from core.http_env_client import HTTPEnvClient
from .models import AndroidAction, AndroidObservation
class AndroidEnv(HTTPEnvClient[AndroidAction, AndroidObservation]):
"""
HTTP client for the Android Environment.
This client connects to an AndroidEnvironment HTTP server running in a
container with an Android emulator. It provides methods to interact with
Android applications through touchscreen gestures.
Example:
>>> # Connect to a running server
>>> client = AndroidEnv(base_url="http://localhost:8000")
>>> result = client.reset()
>>> print(result.observation.screen_width, result.observation.screen_height)
>>>
>>> # Tap on the screen
>>> result = client.step(
... AndroidAction(tool_name="tap", parameters={"x": 0.5, "y": 0.3})
... )
>>> print(result.reward, result.done)
Example with Docker:
>>> # Automatically start container and connect
>>> client = AndroidEnv.from_docker_image(
... "android-env:latest",
... environment={
... "ANDROID_AVD_NAME": "Pixel_6_API_33",
... "ANDROID_TASK_PATH": "/workspace/tasks/my_task.textproto"
... }
... )
>>> result = client.reset()
>>> result = client.step(
... AndroidAction(tool_name="tap", parameters={"x": 0.5, "y": 0.5})
... )
>>> # View screen image (base64)
>>> print(result.observation.screen_image[:50]) # First 50 chars
>>> client.close()
Example with high-level gestures:
>>> # Swipe gesture
>>> result = client.step(AndroidAction(
... tool_name="swipe",
... parameters={"x1": 0.5, "y1": 0.8, "x2": 0.5, "y2": 0.2}
... ))
>>>
>>> # Type text (if supported by task)
>>> result = client.step(AndroidAction(
... tool_name="type_text",
... parameters={"text": "Hello Android"}
... ))
>>>
>>> # Press system button
>>> result = client.step(AndroidAction(
... tool_name="press_button",
... parameters={"button": "HOME"}
... ))
"""
def _step_payload(self, action: AndroidAction) -> Dict:
"""
Convert AndroidAction to JSON payload for step request.
Args:
action: AndroidAction instance with tool_name and parameters.
Returns:
Dictionary representation suitable for JSON encoding.
"""
return {
"tool_name": action.tool_name,
"parameters": action.parameters,
"metadata": action.metadata,
}
def _parse_result(self, payload: Dict) -> StepResult[AndroidObservation]:
"""
Parse server response into StepResult[AndroidObservation].
Args:
payload: JSON response from server.
Returns:
StepResult with AndroidObservation containing screen state.
"""
obs_data = payload.get("observation", {})
observation = AndroidObservation(
screen_image=obs_data.get("screen_image", ""),
screen_width=obs_data.get("screen_width", 0),
screen_height=obs_data.get("screen_height", 0),
timestamp_ms=obs_data.get("timestamp_ms", 0),
orientation=obs_data.get("orientation", 0),
extras=obs_data.get("extras", {}),
pixels_shape=obs_data.get("pixels_shape"),
done=obs_data.get("done", False),
reward=obs_data.get("reward"),
metadata=obs_data.get("metadata", {}),
)
return StepResult(
observation=observation,
reward=obs_data.get("reward"),
done=obs_data.get("done", False),
)
def _parse_state(self, payload: Dict) -> State:
"""
Parse server response into State object.
Args:
payload: JSON response from /state endpoint.
Returns:
State object with episode_id and step_count.
"""
return State(
episode_id=payload.get("episode_id"),
step_count=payload.get("step_count", 0),
)
|