| """BrowserGym Environment implementation for OpenEnv. |
| |
| This module wraps the BrowserGym framework to provide a compatible interface |
| with OpenEnv's Environment ABC. BrowserGym includes multiple benchmarks: |
| - MiniWoB++: Training environment with 100+ simple web tasks |
| - WebArena: Realistic evaluation with 812 complex tasks |
| - VisualWebArena: Visual web navigation tasks |
| - WorkArena: Enterprise task automation |
| """ |
|
|
| import importlib |
| import logging |
| from typing import Any, Dict, Optional |
| from uuid import uuid4 |
|
|
| import gymnasium as gym |
|
|
| from openenv.core.env_server.interfaces import Environment |
| from browsergym_env.models import ( |
| BrowserGymAction, |
| BrowserGymObservation, |
| BrowserGymState, |
| ) |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| def _get_axtree_txt(obs: Dict[str, Any]) -> str: |
| """Extract accessibility tree text from BrowserGym observation. |
| |
| BrowserGym returns raw `axtree_object` which needs to be converted to text |
| using the `flatten_axtree_to_str` utility function. |
| """ |
| |
| if "axtree_txt" in obs and obs["axtree_txt"]: |
| return obs["axtree_txt"] |
|
|
| |
| if "axtree_object" in obs and obs["axtree_object"]: |
| try: |
| from browsergym.utils.obs import flatten_axtree_to_str |
|
|
| return flatten_axtree_to_str(obs["axtree_object"]) |
| except ImportError: |
| logger.warning("browsergym.utils.obs not available, cannot convert axtree_object to text") |
| except Exception as e: |
| logger.warning(f"Failed to convert axtree_object to text: {e}") |
|
|
| return "" |
|
|
|
|
| def _get_pruned_html(obs: Dict[str, Any]) -> str: |
| """Extract pruned HTML from BrowserGym observation. |
| |
| BrowserGym returns raw `dom_object` which needs to be converted to text |
| and then pruned using the `flatten_dom_to_str` and `prune_html` utilities. |
| """ |
| |
| if "pruned_html" in obs and obs["pruned_html"]: |
| return obs["pruned_html"] |
|
|
| |
| if "dom_object" in obs and obs["dom_object"]: |
| try: |
| from browsergym.utils.obs import flatten_dom_to_str, prune_html |
|
|
| dom_str = flatten_dom_to_str(obs["dom_object"]) |
| return prune_html(dom_str) |
| except ImportError: |
| logger.warning("browsergym.utils.obs not available, cannot convert dom_object to pruned_html") |
| except Exception as e: |
| logger.warning(f"Failed to convert dom_object to pruned_html: {e}") |
|
|
| return "" |
|
|
|
|
| _MINIWOB_LOAD_HELP = ( |
| "MiniWoB tasks require the MiniWoB HTML bundle to be served over HTTP. " |
| "The official BrowserGym Docker image handles this automatically by " |
| "serving the bundle on port 8888. For custom or non-Docker deployments, " |
| "clone the MiniWoB++ repository, start a static server inside " |
| "`miniwob-plusplus/miniwob/html` (e.g. `python -m http.server 8888`), and " |
| "set the MINIWOB_URL environment variable to the served base URL such as " |
| "`http://localhost:8888/miniwob/`." |
| ) |
|
|
|
|
| class BrowserGymEnvironment(Environment): |
| """BrowserGym environment wrapper for OpenEnv. |
| |
| This environment wraps BrowserGym's Gymnasium-compatible environments to |
| provide unified access to multiple web navigation benchmarks. |
| """ |
| |
| SUPPORTS_CONCURRENT_SESSIONS: bool = False |
|
|
| def __init__( |
| self, |
| benchmark: str = "miniwob", |
| task_name: Optional[str] = None, |
| headless: bool = True, |
| viewport_width: int = 1280, |
| viewport_height: int = 720, |
| timeout: float = 10000.0, |
| **gym_kwargs: Any, |
| ): |
| """Initialize the BrowserGym environment. |
| |
| Args: |
| benchmark: Benchmark to use ('miniwob', 'webarena', 'visualwebarena', etc.) |
| task_name: Specific task within the benchmark (e.g., 'click-test', 'click-button') |
| If None, will use first available task |
| headless: Whether to run browser in headless mode |
| viewport_width: Browser viewport width |
| viewport_height: Browser viewport height |
| timeout: Action timeout in milliseconds |
| **gym_kwargs: Additional arguments passed to gym.make() |
| """ |
| super().__init__() |
|
|
| self.benchmark = benchmark |
| self.task_name = task_name |
| self.headless = headless |
| self.viewport_width = viewport_width |
| self.viewport_height = viewport_height |
| self.timeout = timeout |
| self.gym_kwargs = dict(gym_kwargs) |
|
|
| |
| if task_name: |
| self.env_id = f"browsergym/{benchmark}.{task_name}" |
| else: |
| self.env_id = f"browsergym/{benchmark}" |
|
|
| |
| benchmark_modules = { |
| "miniwob": "browsergym.miniwob", |
| "webarena": "browsergym.webarena", |
| "visualwebarena": "browsergym.visualwebarena", |
| "workarena": "browsergym.workarena", |
| } |
| module_path = benchmark_modules.get(benchmark) |
| try: |
| if module_path: |
| importlib.import_module(module_path) |
| else: |
| importlib.import_module("browsergym") |
| except ModuleNotFoundError as import_error: |
| message = ( |
| "Failed to import BrowserGym benchmark " |
| f"'{benchmark}': {import_error}\n" |
| "Install the matching browsergym package " |
| f"(e.g., browsergym-{benchmark})." |
| ) |
| raise ValueError(message) from import_error |
|
|
| |
| try: |
| self.gym_env = gym.make( |
| self.env_id, |
| headless=headless, |
| viewport={"width": viewport_width, "height": viewport_height}, |
| timeout=timeout, |
| **self.gym_kwargs, |
| ) |
| except Exception as e: |
| message = ( |
| "Failed to create BrowserGym environment " |
| f"'{self.env_id}': {e}\n" |
| "Make sure the benchmark package is installed " |
| f"(e.g., pip install browsergym-{benchmark})." |
| ) |
| raise ValueError(message) from e |
|
|
| |
| self._state = BrowserGymState( |
| episode_id=str(uuid4()), |
| step_count=0, |
| benchmark=benchmark, |
| task_name=task_name or "", |
| ) |
|
|
| self._last_obs: Optional[Dict[str, Any]] = None |
| self._last_info: Optional[Dict[str, Any]] = None |
|
|
| def reset( |
| self, |
| seed: Optional[int] = None, |
| task_name: Optional[str] = None, |
| ) -> BrowserGymObservation: |
| """Reset the environment with a specific task. |
| |
| Args: |
| seed: Random seed for reproducibility |
| task_name: Override task name for this episode |
| |
| Returns: |
| Initial observation for the task |
| """ |
| |
| self._state = BrowserGymState( |
| episode_id=str(uuid4()), |
| step_count=0, |
| benchmark=self.benchmark, |
| task_name=task_name or self.task_name or "", |
| ) |
|
|
| |
| reset_options = {} |
| if seed is not None: |
| reset_options["seed"] = seed |
|
|
| |
| try: |
| obs, info = self.gym_env.reset(**reset_options) |
| except AttributeError as err: |
| if "context" in str(err) and hasattr(self.gym_env, "close"): |
| |
| |
| self.gym_env.close() |
| obs, info = self.gym_env.reset(**reset_options) |
| else: |
| raise |
| except Exception as err: |
| message = str(err) |
| if self.benchmark == "miniwob" and "core is not defined" in message: |
| raise ValueError(_MINIWOB_LOAD_HELP) from err |
| raise |
|
|
| self._last_obs = obs |
| self._last_info = info |
|
|
| |
| return self._create_observation(obs, info, done=False, reward=0.0) |
|
|
| def step(self, action: BrowserGymAction) -> BrowserGymObservation: |
| """Execute an action in the environment. |
| |
| Args: |
| action: The action to execute |
| |
| Returns: |
| Observation after executing the action |
| """ |
| self._state.step_count += 1 |
|
|
| |
| try: |
| obs, reward, terminated, truncated, info = self.gym_env.step(action.action_str) |
|
|
| self._last_obs = obs |
| self._last_info = info |
|
|
| |
| done = terminated or truncated |
| self._state.cum_reward += float(reward) |
|
|
| |
| if "goal" in info: |
| self._state.goal = str(info["goal"]) |
|
|
| return self._create_observation(obs, info, done=done, reward=float(reward)) |
|
|
| except Exception as e: |
| |
| error_msg = str(e) |
| return BrowserGymObservation( |
| text=self._last_obs.get("text", "") if self._last_obs else "", |
| url=self._last_obs.get("url", "") if self._last_obs else "", |
| goal=self._state.goal, |
| error=error_msg, |
| last_action_error=True, |
| done=False, |
| reward=0.0, |
| ) |
|
|
| def _create_observation( |
| self, |
| obs: Dict[str, Any], |
| info: Dict[str, Any], |
| done: bool, |
| reward: float, |
| ) -> BrowserGymObservation: |
| """Convert BrowserGym observation to OpenEnv format. |
| |
| Args: |
| obs: BrowserGym observation dict |
| info: BrowserGym info dict |
| done: Whether episode is done |
| reward: Reward for the step |
| |
| Returns: |
| BrowserGymObservation |
| """ |
| |
| |
| axtree_txt = _get_axtree_txt(obs) if isinstance(obs, dict) else "" |
| pruned_html = _get_pruned_html(obs) if isinstance(obs, dict) else "" |
|
|
| |
| text = axtree_txt or pruned_html |
| if not text and isinstance(obs, str): |
| text = obs |
|
|
| |
| url = "" |
| if isinstance(obs, dict): |
| url = obs.get("url", "") |
|
|
| |
| goal = "" |
| if isinstance(obs, dict): |
| |
| goal_object = obs.get("goal_object", []) |
| if goal_object: |
| |
| goal_texts = [] |
| for msg in goal_object: |
| if isinstance(msg, dict): |
| content = msg.get("content", "") |
| if isinstance(content, str): |
| goal_texts.append(content) |
| elif isinstance(content, list): |
| for item in content: |
| if isinstance(item, dict) and item.get("type") == "text": |
| goal_texts.append(item.get("text", "")) |
| goal = " ".join(goal_texts) |
| |
| if not goal: |
| goal = obs.get("goal", "") |
|
|
| |
| self._state.current_url = url |
| self._state.goal = goal |
|
|
| |
| screenshot = obs.get("screenshot") if isinstance(obs, dict) else None |
|
|
| |
| last_action_error = False |
| if isinstance(obs, dict): |
| last_action_error = bool(obs.get("last_action_error")) |
|
|
| |
| |
| |
| browsergym_metadata = {} |
| if isinstance(obs, dict): |
| |
| browsergym_metadata["browsergym_obs"] = { |
| k: v for k, v in obs.items() if k not in ("dom_object", "axtree_object", "screenshot") |
| } |
| browsergym_metadata["browsergym_info"] = info |
|
|
| return BrowserGymObservation( |
| text=text, |
| url=url, |
| screenshot=screenshot, |
| goal=goal, |
| axtree_txt=axtree_txt, |
| pruned_html=pruned_html, |
| error="", |
| last_action_error=last_action_error, |
| done=done, |
| reward=reward, |
| metadata=browsergym_metadata, |
| ) |
|
|
| @property |
| def state(self) -> BrowserGymState: |
| """Get the current environment state.""" |
| return self._state |
|
|
| def close(self) -> None: |
| """Clean up environment resources.""" |
| if hasattr(self, "gym_env"): |
| self.gym_env.close() |
|
|