File size: 2,978 Bytes
cf6c0e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
"""HTTPEnvClient for the VisionCoder OpenEnv environment."""
from __future__ import annotations

import base64
import io
from typing import Optional

import httpx
from PIL import Image

from openenv.models import Action, Observation, RenderRequest, RenderResponse, State


class VisionCoderClient:
    """Synchronous HTTP client for the VisionCoder OpenEnv server."""

    def __init__(self, base_url: str = "http://localhost:8080", timeout: float = 120.0):
        self._base_url = base_url.rstrip("/")
        self._client = httpx.Client(base_url=self._base_url, timeout=timeout)

    # ------------------------------------------------------------------
    # Core OpenEnv interface
    # ------------------------------------------------------------------

    def reset(self, difficulty: str = "mixed") -> Observation:
        """Start a new episode. Returns Observation with session_id and reference screenshot."""
        resp = self._client.post("/reset", params={"difficulty": difficulty})
        resp.raise_for_status()
        return Observation(**resp.json())

    def step(self, action: Action) -> Observation:
        """Submit HTML. Returns reward, render_low, render_full, done."""
        resp = self._client.post("/step", json=action.model_dump())
        resp.raise_for_status()
        return Observation(**resp.json())

    def render(self, html: str) -> RenderResponse:
        """Render HTML to images without scoring (Developer tool call)."""
        resp = self._client.post("/render", json=RenderRequest(html=html).model_dump())
        resp.raise_for_status()
        return RenderResponse(**resp.json())

    def state(self) -> State:
        """Return current episode metadata from the server."""
        resp = self._client.get("/state")
        resp.raise_for_status()
        return State(**resp.json())

    def close(self) -> None:
        """Signal session end and release HTTP resources."""
        try:
            self._client.delete("/close")
        finally:
            self._client.close()

    # ------------------------------------------------------------------
    # Convenience helpers
    # ------------------------------------------------------------------

    def decode_image(self, b64: Optional[str]) -> Optional[Image.Image]:
        """Decode a base64 PNG string into a PIL Image."""
        if b64 is None:
            return None
        return Image.open(io.BytesIO(base64.b64decode(b64))).convert("RGB")

    def decode_screenshot(self, obs: Observation) -> Optional[Image.Image]:
        """Decode the reference screenshot from a reset() Observation."""
        return self.decode_image(obs.screenshot_b64)

    # ------------------------------------------------------------------
    # Context manager support
    # ------------------------------------------------------------------

    def __enter__(self) -> VisionCoderClient:
        return self

    def __exit__(self, *args: object) -> None:
        self.close()