Spaces:

openenv
/

browsergym_env

Running

App Files Files Community

sergiopaniego HF Staff commited on 25 days ago

Commit

4dfae67

verified ·

1 Parent(s): 9dfa3bc

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

Dockerfile +156 -0
README.md +556 -4
__init__.py +72 -0
client.py +122 -0
models.py +77 -0
openenv.yaml +5 -0
openenv_browsergym_env.egg-info/PKG-INFO +21 -0
openenv_browsergym_env.egg-info/SOURCES.txt +23 -0
openenv_browsergym_env.egg-info/dependency_links.txt +1 -0
openenv_browsergym_env.egg-info/entry_points.txt +2 -0
openenv_browsergym_env.egg-info/requires.txt +17 -0
openenv_browsergym_env.egg-info/top_level.txt +1 -0
pyproject.toml +48 -0
server/__init__.py +1 -0
server/app.py +50 -0
server/browsergym_environment.py +375 -0
server/requirements.txt +10 -0
server/start.sh +29 -0
uv.lock +0 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,156 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# Multi-stage build using openenv-base
+# This Dockerfile is flexible and works for both:
+# - In-repo environments (with local src/core)
+# - Standalone environments (with openenv-core from pip)
+# The build script (openenv build) handles context detection and sets appropriate build args.
+ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
+FROM ${BASE_IMAGE} AS builder
+WORKDIR /app
+# Build argument to control whether we're building standalone or in-repo
+ARG BUILD_MODE=in-repo
+# Copy environment code (always at root of build context)
+COPY . /app/env
+# For in-repo builds, openenv-core is already in the pyproject.toml dependencies
+# For standalone builds, openenv-core will be installed from pip via pyproject.toml
+WORKDIR /app/env
+# Ensure uv is available (for local builds where base image lacks it)
+RUN if ! command -v uv >/dev/null 2>&1; then \
+        curl -LsSf https://astral.sh/uv/install.sh | sh && \
+        mv /root/.local/bin/uv /usr/local/bin/uv && \
+        mv /root/.local/bin/uvx /usr/local/bin/uvx; \
+    fi
+# Install git and system dependencies for Playwright browsers
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    wget \
+    curl \
+    # Playwright browser dependencies
+    libnss3 \
+    libnspr4 \
+    libatk1.0-0 \
+    libatk-bridge2.0-0 \
+    libcups2 \
+    libdrm2 \
+    libdbus-1-3 \
+    libxkbcommon0 \
+    libatspi2.0-0 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxfixes3 \
+    libxrandr2 \
+    libgbm1 \
+    libpango-1.0-0 \
+    libcairo2 \
+    libasound2 \
+    libxshmfence1 \
+    fonts-unifont \
+    fonts-noto-color-emoji \
+    && rm -rf /var/lib/apt/lists/*
+# Install dependencies using uv sync
+# First pass: install dependencies without the project (for better caching)
+# Second pass: install the project itself
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-install-project --no-editable; \
+    else \
+        uv sync --no-install-project --no-editable; \
+    fi
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-editable; \
+    else \
+        uv sync --no-editable; \
+    fi
+# Install Playwright browsers (Chromium by default)
+RUN .venv/bin/python -m playwright install chromium
+# Install MiniWoB++ tasks
+RUN git clone --depth 1 https://github.com/Farama-Foundation/miniwob-plusplus.git /app/miniwob-plusplus
+# Final runtime stage
+FROM ${BASE_IMAGE}
+WORKDIR /app
+# Install runtime system libraries for Playwright
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libnss3 \
+    libnspr4 \
+    libatk1.0-0 \
+    libatk-bridge2.0-0 \
+    libcups2 \
+    libdrm2 \
+    libdbus-1-3 \
+    libxkbcommon0 \
+    libatspi2.0-0 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxfixes3 \
+    libxrandr2 \
+    libgbm1 \
+    libpango-1.0-0 \
+    libcairo2 \
+    libasound2 \
+    libxshmfence1 \
+    fonts-unifont \
+    fonts-noto-color-emoji \
+    && rm -rf /var/lib/apt/lists/*
+# Copy the virtual environment from builder
+COPY --from=builder /app/env/.venv /app/.venv
+# Copy the environment code
+COPY --from=builder /app/env /app/env
+# Copy MiniWoB++ tasks
+COPY --from=builder /app/miniwob-plusplus /app/miniwob-plusplus
+# Copy Playwright browsers from builder
+COPY --from=builder /root/.cache/ms-playwright /root/.cache/ms-playwright
+# Set PATH to use the virtual environment
+ENV PATH="/app/.venv/bin:$PATH"
+# Set PYTHONPATH so imports work correctly
+ENV PYTHONPATH="/app/env:$PYTHONPATH"
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV BROWSERGYM_BENCHMARK=miniwob
+ENV BROWSERGYM_TASK_NAME="click-test"
+ENV BROWSERGYM_HEADLESS=true
+ENV BROWSERGYM_VIEWPORT_WIDTH=1280
+ENV BROWSERGYM_VIEWPORT_HEIGHT=720
+ENV BROWSERGYM_TIMEOUT=10000
+ENV BROWSERGYM_PORT=8000
+ENV MINIWOB_HTML_DIR=/app/miniwob-plusplus/miniwob/html
+ENV MINIWOB_HTTP_PORT=8888
+ENV MINIWOB_URL=http://127.0.0.1:8888/miniwob/
+ENV ENABLE_WEB_INTERFACE=true
+# Expose ports
+EXPOSE 8000
+EXPOSE 8888
+# Health check using Python (more portable than curl/wget)
+HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
+# Run the server using start script or directly
+CMD ["sh", "-c", "cd /app/env && if [ -f server/start.sh ]; then chmod +x server/start.sh && ./server/start.sh; else uvicorn server.app:app --host 0.0.0.0 --port 8000; fi"]

README.md CHANGED Viewed

@@ -1,10 +1,562 @@
 ---
-title: Browsergym Env
-emoji: 🔥
 colorFrom: blue
-colorTo: red
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: BrowserGym Environment Server
+emoji: 🌐
 colorFrom: blue
+colorTo: purple
 sdk: docker
 pinned: false
+app_port: 8000
+base_path: /web
+tags:
+  - openenv
+  - browsergym
+  - web-automation
+  - reinforcement-learning
 ---
+# BrowserGym Environment
+BrowserGym is a unified framework for web-based agent tasks that provides access to multiple benchmarks under a single Gymnasium-compatible API. This integration brings the complete training-to-evaluation pipeline for web agents into OpenEnv.
+## Why BrowserGym?
+BrowserGym provides a complete pipeline for developing web agents: train on simple tasks, then evaluate on realistic websites.
+**What are these benchmarks?**
+- **MiniWoB++ (Training)**: 100+ synthetic web tasks like "click this button", "fill out this form", "select from dropdown". Each task is a simple webpage with a clear objective. Fast resets, randomized variations, dense rewards. Perfect for learning basic web navigation skills. **No external setup needed** - tasks run in isolated browser sessions.
+- **WebArena (Evaluation)**: 812 tasks on real websites (e-commerce, forums, GitLab, Wikipedia). Tasks like "find the cheapest laptop and add to cart" or "create a merge request for bug #123". Multistep, requires reasoning, sparse rewards. Tests if your agent can handle actual websites. **Requires running 7 backend services** (shopping site, GitLab instance, etc.).
+- **VisualWebArena**: Similar to WebArena but requires visual understanding - agents need to interpret images, identify UI elements visually, handle multimodal content.
+- **WorkArena**: Enterprise software tasks (CRM, project management, business workflows). Tests automation on corporate-style applications.
+**The training → evaluation pipeline:**
+1. Train on MiniWoB (simple, controlled, fast iterations)
+2. Evaluate on WebArena (complex, realistic, measures real-world capability)
+**Key advantage**: You can start training immediately with MiniWoB. No need to set up infrastructure just to test if your code works.
+## Quick Start - Training (MiniWoB)
+### No Setup Required! 🎉
+```python
+from browsergym_env import BrowserGymEnv, BrowserGymAction
+# Create environment for MiniWoB training task
+env = BrowserGymEnv.from_docker_image(
+    "ghcr.io/openenv/browsergym-env:latest",
+    environment={
+        "BROWSERGYM_BENCHMARK": "miniwob",
+        "BROWSERGYM_TASK_NAME": "click-test",  # or "click-button", "click-dialog", etc.
+    }
+)
+# Train your agent!
+for episode in range(1000):
+    result = env.reset()
+    print(f"Goal: {result.observation.goal}")
+    done = False
+    while not done:
+        # Your agent decides what to do
+        action_str = agent.get_action(result.observation.text)
+        action = BrowserGymAction(action_str=action_str)
+        result = env.step(action)
+        done = result.done
+        print(f"Reward: {result.reward}")
+env.close()
+```
+### Available Tasks by Benchmark
+#### MiniWoB++ Tasks (Training - 100+ tasks)
+MiniWoB tasks are organized by difficulty and type. Here are the main categories:
+**Click Tasks** (Basic interaction)
+| Task Name | Description | Difficulty |
+|-----------|-------------|------------|
+| `click-test` | Click a single button | ⭐ Easy |
+| `click-button` | Click button with specific text | ⭐ Easy |
+| `click-button-sequence` | Click buttons in order | ⭐⭐ Medium |
+| `click-checkboxes` | Select specific checkboxes | ⭐⭐ Medium |
+| `click-checkboxes-soft` | Select checkboxes (multiple valid) | ⭐⭐ Medium |
+| `click-checkboxes-large` | Many checkboxes to select from | ⭐⭐ Medium |
+| `click-checkboxes-transfer` | Transfer learning variation | ⭐⭐ Medium |
+| `click-dialog` | Click correct button in dialog | ⭐ Easy |
+| `click-dialog-2` | More complex dialog | ⭐⭐ Medium |
+| `click-link` | Click on a link | ⭐ Easy |
+| `click-option` | Select from dropdown | ⭐⭐ Medium |
+| `click-pie` | Click on pie chart slice | ⭐⭐ Medium |
+| `click-scroll-list` | Click item in scrollable list | ⭐⭐⭐ Hard |
+| `click-shades` | Click on specific color shade | ⭐⭐ Medium |
+| `click-shape` | Click on specific shape | ⭐⭐ Medium |
+| `click-tab` | Switch between tabs | ⭐⭐ Medium |
+| `click-tab-2` | More complex tab switching | ⭐⭐⭐ Hard |
+| `click-widget` | Click on UI widget | ⭐⭐ Medium |
+**Text Entry Tasks** (Typing and forms)
+| Task Name | Description | Difficulty |
+|-----------|-------------|------------|
+| `enter-text` | Type text into input field | ⭐ Easy |
+| `enter-text-dynamic` | Dynamic text entry | ⭐⭐ Medium |
+| `enter-text-2` | Multiple text fields | ⭐⭐ Medium |
+| `enter-password` | Fill password field | ⭐ Easy |
+| `enter-date` | Enter a date | ⭐⭐ Medium |
+| `enter-time` | Enter a time | ⭐⭐ Medium |
+| `login-user` | Complete login form | ⭐⭐ Medium |
+| `login-user-popup` | Login via popup | ⭐⭐⭐ Hard |
+**Navigation Tasks** (Multi-step interaction)
+| Task Name | Description | Difficulty |
+|-----------|-------------|------------|
+| `navigate-tree` | Navigate through tree structure | ⭐⭐⭐ Hard |
+| `search-engine` | Use search interface | ⭐⭐ Medium |
+| `use-autocomplete` | Interact with autocomplete | ⭐⭐⭐ Hard |
+| `book-flight` | Book a flight (complex form) | ⭐⭐⭐⭐ Very Hard |
+| `choose-date` | Pick date from calendar | ⭐⭐⭐ Hard |
+| `choose-date-easy` | Simplified date picker | ⭐⭐ Medium |
+| `choose-date-medium` | Medium difficulty date picker | ⭐⭐⭐ Hard |
+| `choose-list` | Select from long list | ⭐⭐ Medium |
+**Visual/Spatial Tasks** (Requires visual understanding)
+| Task Name | Description | Difficulty |
+|-----------|-------------|------------|
+| `count-sides` | Count sides of shape | ⭐⭐ Medium |
+| `count-shape` | Count specific shapes | ⭐⭐ Medium |
+| `find-word` | Find word in text | ⭐⭐ Medium |
+| `focus-text` | Focus on text element | ⭐ Easy |
+| `focus-text-2` | More complex focus task | ⭐⭐ Medium |
+| `grid-coordinate` | Click grid coordinate | ⭐⭐ Medium |
+| `guess-number` | Guess a number game | ⭐⭐⭐ Hard |
+| `identify-shape` | Identify shape type | ⭐⭐ Medium |
+| `read-table` | Extract info from table | ⭐⭐⭐ Hard |
+| `read-table-2` | More complex table reading | ⭐⭐⭐ Hard |
+**Email/Social Tasks** (Realistic scenarios)
+| Task Name | Description | Difficulty |
+|-----------|-------------|------------|
+| `email-inbox` | Manage email inbox | ⭐⭐⭐⭐ Very Hard |
+| `email-inbox-forward` | Forward emails | ⭐⭐⭐⭐ Very Hard |
+| `email-inbox-nl` | Natural language email task | ⭐⭐⭐⭐ Very Hard |
+| `email-inbox-star-reply` | Star and reply to emails | ⭐⭐⭐⭐ Very Hard |
+| `social-media` | Social media interaction | ⭐⭐⭐⭐ Very Hard |
+| `social-media-some` | Partial social media task | ⭐⭐⭐ Hard |
+**Total:** 100+ tasks across all categories
+**Usage:**
+```python
+# Easy task for quick testing
+env = BrowserGymEnv(environment={"BROWSERGYM_TASK_NAME": "click-test"})
+# Medium difficulty for training
+env = BrowserGymEnv(environment={"BROWSERGYM_TASK_NAME": "click-checkboxes"})
+# Hard task for evaluation
+env = BrowserGymEnv(environment={"BROWSERGYM_TASK_NAME": "email-inbox"})
+```
+#### WebArena Tasks (Evaluation - 812 tasks)
+WebArena tasks are organized by website and difficulty. Tasks are numbered 0-811.
+**By Website:**
+| Website | Task Count | Description | Example Tasks |
+|---------|------------|-------------|---------------|
+| Shopping | ~200 | E-commerce site | Search products, add to cart, checkout |
+| Shopping Admin | ~150 | Admin panel | Manage products, orders, customers |
+| Reddit | ~150 | Forum/social | Post, comment, search discussions |
+| GitLab | ~200 | Code repository | Create issues, merge requests, review code |
+| Wikipedia | ~100 | Knowledge base | Search, read, extract information |
+| Map | ~12 | Location service | Find places, get directions |
+**By Difficulty:**
+| Difficulty | Task Count | Steps Required | Example |
+|------------|------------|----------------|---------|
+| Easy | ~200 | 1-5 steps | "Find the price of product X" |
+| Medium | ~400 | 5-15 steps | "Add cheapest laptop to cart" |
+| Hard | ~212 | 15+ steps | "Create merge request for bug fix" |
+**Usage:**
+```python
+# Task 0 (usually easy)
+env = BrowserGymEnv(environment={
+    "BROWSERGYM_BENCHMARK": "webarena",
+    "BROWSERGYM_TASK_NAME": "0",
+    "SHOPPING": "http://your-server:7770",
+    # ... other URLs
+})
+# Task 156 (GitLab merge request)
+env = BrowserGymEnv(environment={
+    "BROWSERGYM_BENCHMARK": "webarena",
+    "BROWSERGYM_TASK_NAME": "156",
+    # ... URLs
+})
+```
+**Note:** WebArena tasks require the full backend infrastructure. See [WebArena setup guide](https://github.com/web-arena-x/webarena/tree/main/environment_docker).
+#### VisualWebArena Tasks (910 tasks)
+Similar to WebArena but requires visual understanding. Tasks involve:
+- Image-based reasoning
+- Visual element identification
+- Multimodal interaction (text + images)
+#### WorkArena Tasks
+Enterprise software automation tasks:
+- CRM operations
+- Project management
+- Business workflows
+**Full task lists:**
+- [MiniWoB++ tasks](https://github.com/Farama-Foundation/miniwob-plusplus/tree/master/miniwob/environment)
+- [WebArena tasks](https://github.com/web-arena-x/webarena/blob/main/config_files/)
+- [BrowserGym documentation](https://github.com/ServiceNow/BrowserGym)
+## Evaluation (WebArena)
+### Prerequisites
+WebArena requires setting up backend infrastructure. See the [WebArena documentation](https://github.com/web-arena-x/webarena/tree/main/environment_docker).
+### Usage
+```python
+from envs.browsergym_env import BrowserGymEnv, BrowserGymAction
+# Create environment for WebArena evaluation
+env = BrowserGymEnv.from_docker_image(
+    "ghcr.io/openenv/browsergym-env:latest",
+    environment={
+        "BROWSERGYM_BENCHMARK": "webarena",
+        "BROWSERGYM_TASK_NAME": "0",  # Task ID
+        # WebArena backend URLs (required)
+        "SHOPPING": "http://your-server:7770",
+        "SHOPPING_ADMIN": "http://your-server:7780/admin",
+        "REDDIT": "http://your-server:9999",
+        "GITLAB": "http://your-server:8023",
+        "MAP": "http://your-server:3000",
+        "WIKIPEDIA": "http://your-server:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing",
+        "HOMEPAGE": "http://your-server:4399",
+    }
+)
+# Evaluate your trained agent
+result = env.reset()
+while not result.done:
+    action_str = agent.get_action(result.observation)
+    action = BrowserGymAction(action_str=action_str)
+    result = env.step(action)
+print(f"Success: {result.reward}")
+env.close()
+```
+## Building the Docker Image
+### Prerequisites
+1. **Base Image**: Build the OpenEnv base image first:
+```bash
+# From the OpenEnv repository root
+docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
+```
+### Build the BrowserGym Environment
+```bash
+# From the OpenEnv repository root
+docker build -t browsergym-env:latest -f envs/browsergym_env/server/Dockerfile .
+```
+### Run the Server
+#### For MiniWoB (Training):
+```bash
+docker run -p 8000:8000 \
+  -e BROWSERGYM_BENCHMARK="miniwob" \
+  -e BROWSERGYM_TASK_NAME="click-test" \
+  browsergym-env:latest
+```
+#### For WebArena (Evaluation):
+```bash
+docker run -p 8000:8000 \
+  -e BROWSERGYM_BENCHMARK="webarena" \
+  -e BROWSERGYM_TASK_NAME="0" \
+  -e SHOPPING="http://your-server:7770" \
+  -e SHOPPING_ADMIN="http://your-server:7780/admin" \
+  -e REDDIT="http://your-server:9999" \
+  -e GITLAB="http://your-server:8023" \
+  -e MAP="http://your-server:3000" \
+  -e WIKIPEDIA="http://your-server:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing" \
+  -e HOMEPAGE="http://your-server:4399" \
+  browsergym-env:latest
+```
+## Environment Details
+### Action
+Actions in BrowserGym are natural language strings that describe browser operations:
+```python
+from envs.browsergym_env import BrowserGymAction
+# Click actions
+action = BrowserGymAction(action_str="click('Submit button')")
+action = BrowserGymAction(action_str="click('element_id_123')")
+# Type actions
+action = BrowserGymAction(action_str="fill('username', 'john@example.com')")
+action = BrowserGymAction(action_str="fill('password', 'secret123')")
+# Navigate actions
+action = BrowserGymAction(action_str="goto('https://example.com')")
+# Keyboard actions
+action = BrowserGymAction(action_str="press('Enter')")
+action = BrowserGymAction(action_str="press('Tab')")
+# Scroll actions
+action = BrowserGymAction(action_str="scroll('down')")
+```
+### Observation
+Observations contain multiple modalities:
+```python
+result = env.step(action)
+obs = result.observation
+# Text observations
+print(obs.text)          # Primary text representation (AXTree or DOM)
+print(obs.axtree_txt)    # Accessibility tree
+print(obs.pruned_html)   # Pruned HTML (interactive elements only)
+# Page metadata
+print(obs.url)           # Current URL
+print(obs.goal)          # Task goal/instruction
+# Visual (if enabled)
+if obs.screenshot is not None:
+    print(obs.screenshot.shape)  # [height, width, channels]
+# Error handling
+if obs.last_action_error:
+    print(f"Action failed: {obs.error}")
+# Episode status
+print(obs.done)          # True if episode ended
+print(obs.reward)        # Reward for the step
+# Access full BrowserGym data (includes timestamps, etc.)
+print(obs.metadata["browsergym_obs"])  # Full observation dict from BrowserGym
+print(obs.metadata["browsergym_info"]) # Full info dict (timestamps, page state, etc.)
+```
+#### Advanced: Accessing Raw BrowserGym Data
+For VisualWebArena or custom training, you may need additional data like timestamps or browser state. The full BrowserGym observation and info dicts are preserved in `metadata`:
+```python
+result = env.step(action)
+# Access timestamps (if available)
+info = result.observation.metadata["browsergym_info"]
+if "timestamp" in info:
+    print(f"Action timestamp: {info['timestamp']}")
+# Access additional observation fields
+obs_dict = result.observation.metadata["browsergym_obs"]
+if "dom_object" in obs_dict:
+    dom = obs_dict["dom_object"]
+    # Work with raw DOM object
+# Access page performance data
+if "performance" in info:
+    print(f"Page load time: {info['performance']}")
+```
+### State
+The environment state tracks progress:
+```python
+state = env.state()
+print(f"Benchmark: {state.benchmark}")     # 'miniwob', 'webarena', etc.
+print(f"Task: {state.task_name}")          # Task name/ID
+print(f"Episode: {state.episode_id}")      # Unique episode ID
+print(f"Steps: {state.step_count}")        # Number of steps taken
+print(f"Total Reward: {state.cum_reward}") # Cumulative reward
+print(f"Goal: {state.goal}")               # Task instruction
+print(f"URL: {state.current_url}")         # Current page URL
+```
+## Configuration
+Environment variables:
+### Common Settings
+- `BROWSERGYM_BENCHMARK`: Benchmark to use (`miniwob`, `webarena`, `visualwebarena`, `workarena`)
+- `BROWSERGYM_TASK_NAME`: Specific task name (optional, will use first available if not set)
+- `BROWSERGYM_HEADLESS`: Run browser in headless mode (default: `true`)
+- `BROWSERGYM_VIEWPORT_WIDTH`: Browser viewport width (default: `1280`)
+- `BROWSERGYM_VIEWPORT_HEIGHT`: Browser viewport height (default: `720`)
+- `BROWSERGYM_TIMEOUT`: Action timeout in milliseconds (default: `10000`)
+### WebArena-Specific (only needed for WebArena benchmark)
+- `SHOPPING`: Shopping website URL
+- `SHOPPING_ADMIN`: Shopping admin panel URL
+- `REDDIT`: Reddit-like forum URL
+- `GITLAB`: GitLab instance URL
+- `MAP`: Map service URL
+- `WIKIPEDIA`: Wikipedia instance URL
+- `HOMEPAGE`: Homepage URL
+## Supported Benchmarks
+### 1. MiniWoB++ (Training) ✅ Recommended for Training
+- **100+ tasks** ranging from simple (click buttons) to complex (form filling, navigation)
+- **Fast**: Instant resets, quick episodes
+- **Randomized**: Task variations for generalization
+- **No setup**: Works out-of-the-box
+- **Dense rewards**: Immediate feedback for learning
+**Use Case**: Train agents on fundamental web navigation skills
+### 2. WebArena (Evaluation) 📊 Benchmark
+- **812 realistic tasks** across 6 websites
+- **Complex**: Multi-step reasoning, real web interfaces
+- **Requires setup**: Need to run 7 backend services
+- **Sparse rewards**: Binary success/failure
+- **Evaluation-focused**: Test real-world performance
+**Use Case**: Evaluate agents on realistic web tasks
+### 3. VisualWebArena (Evaluation) 👁️ Visual Benchmark
+- **910 tasks** requiring visual understanding
+- **Multimodal**: Both text and visual observations
+- **Requires setup**: Similar to WebArena
+- **Challenging**: Requires visual reasoning
+**Use Case**: Test visual web navigation capabilities
+### 4. WorkArena (Evaluation) 💼 Enterprise Benchmark
+- **Enterprise tasks**: CRM, project management, etc.
+- **Realistic workflows**: Real enterprise software
+- **Requires setup**: Enterprise software instances
+**Use Case**: Evaluate on business automation tasks
+## Typical Training Pipeline
+```python
+from envs.browsergym_env import BrowserGymEnv, BrowserGymAction
+# Stage 1: Train on MiniWoB (simple tasks, fast)
+train_env = BrowserGymEnv.from_docker_image(
+    "browsergym-env:latest",
+    environment={
+        "BROWSERGYM_BENCHMARK": "miniwob",
+        "BROWSERGYM_TASK_NAME": "click-button",
+    }
+)
+# Train your agent (RL, imitation learning, etc.)
+agent.train(train_env, num_episodes=10000)
+train_env.close()
+# Stage 2: Evaluate on WebArena (complex tasks, realistic)
+eval_env = BrowserGymEnv.from_docker_image(
+    "browsergym-env:latest",
+    environment={
+        "BROWSERGYM_BENCHMARK": "webarena",
+        "BROWSERGYM_TASK_NAME": "0",
+        # ... WebArena URLs
+    }
+)
+# Test performance
+success_rate = agent.evaluate(eval_env, num_tasks=812)
+print(f"WebArena Success Rate: {success_rate:.2%}")
+eval_env.close()
+```
+## Development & Testing
+### Running Tests
+```bash
+# From the OpenEnv repository root
+pytest tests/envs/test_browsergym_env.py
+```
+### Local Development
+```bash
+# Install in development mode
+cd /path/to/OpenEnv
+pip install -e .
+# Install BrowserGym
+pip install browsergym browsergym-miniwob browsergym-webarena
+# Run the server locally
+cd envs/browsergym_env/server
+export BROWSERGYM_BENCHMARK=miniwob
+export BROWSERGYM_TASK_NAME=click-test
+python app.py
+```
+## Project Structure
+```
+browsergym_env/
+├── __init__.py              # Module exports
+├── models.py                # Action, Observation, State dataclasses
+├── client.py                # HTTPEnvClient implementation
+├── README.md                # This file
+└── server/
+    ├── __init__.py
+    ├── app.py               # FastAPI application
+    ├── browsergym_environment.py  # Environment implementation
+    ├── Dockerfile           # Container specification
+    └── requirements.txt     # Python dependencies
+```
+## References
+- [BrowserGym GitHub](https://github.com/ServiceNow/BrowserGym)
+- [MiniWoB++ Paper](https://arxiv.org/abs/1802.08802)
+- [WebArena Paper](https://arxiv.org/abs/2307.13854)
+- [WebArena Website](https://webarena.dev/)
+- [VisualWebArena Paper](https://jykoh.com/vwa)
+- [OpenEnv Documentation](https://github.com/meta-pytorch/OpenEnv)

__init__.py ADDED Viewed

	@@ -0,0 +1,72 @@

+"""BrowserGym Environment for OpenEnv.
+BrowserGym is a unified framework for web-based agent tasks that provides
+access to multiple benchmarks under a single Gymnasium-compatible API.
+Included Benchmarks:
+- **MiniWoB++**: 100+ simple web tasks for training (no external infrastructure!)
+- **WebArena**: 812 realistic evaluation tasks (requires backend setup)
+- **VisualWebArena**: Visual web navigation tasks
+- **WorkArena**: Enterprise task automation
+Key Features:
+- Unified API across all benchmarks
+- Gymnasium-compatible interface
+- Support for multiple observation types (text, visual, DOM)
+- Action spaces for natural language commands
+- Perfect for training (MiniWoB) and evaluation (WebArena)
+Training Example (MiniWoB - works immediately):
+    ```python
+    from envs.browsergym_env import BrowserGymEnv, BrowserGymAction
+    # Create training environment - no backend setup needed!
+    env = BrowserGymEnv.from_docker_image(
+        "browsergym-env:latest",
+        environment={
+            "BROWSERGYM_BENCHMARK": "miniwob",
+            "BROWSERGYM_TASK_NAME": "click-test",
+        }
+    )
+    # Train your agent
+    for episode in range(1000):
+        result = env.reset()
+        while not result.done:
+            action = agent.get_action(result.observation)
+            result = env.step(action)
+    env.close()
+    ```
+Evaluation Example (WebArena - requires backend):
+    ```python
+    from envs.browsergym_env import BrowserGymEnv, BrowserGymAction
+    # Create evaluation environment
+    env = BrowserGymEnv.from_docker_image(
+        "browsergym-env:latest",
+        environment={
+            "BROWSERGYM_BENCHMARK": "webarena",
+            "BROWSERGYM_TASK_NAME": "0",
+            "SHOPPING": "http://your-server:7770",
+            # ... other backend URLs
+        }
+    )
+    # Evaluate your trained agent
+    result = env.reset()
+    # ... run evaluation
+    env.close()
+    ```
+"""
+from .client import BrowserGymEnv
+from .models import BrowserGymAction, BrowserGymObservation, BrowserGymState
+__all__ = [
+    "BrowserGymEnv",
+    "BrowserGymAction",
+    "BrowserGymObservation",
+    "BrowserGymState",
+]

client.py ADDED Viewed

	@@ -0,0 +1,122 @@

+"""Client for the BrowserGym environment."""
+from typing import Any, Dict
+from openenv.core.client_types import StepResult
+from openenv.core.env_client import EnvClient
+from .models import (
+    BrowserGymAction,
+    BrowserGymObservation,
+    BrowserGymState,
+)
+class BrowserGymEnv(EnvClient[BrowserGymAction, BrowserGymObservation, BrowserGymState]):
+    """Client for interacting with the BrowserGym environment.
+    BrowserGym provides unified access to multiple web navigation benchmarks:
+    - MiniWoB++: 100+ training tasks (no external infrastructure needed!)
+    - WebArena: 812 evaluation tasks (requires backend setup)
+    - VisualWebArena: Visual navigation tasks
+    - WorkArena: Enterprise automation tasks
+    Example usage for TRAINING (MiniWoB - works out of the box):
+        ```python
+        from envs.browsergym_env import BrowserGymEnv, BrowserGymAction
+        # Create environment for MiniWoB training task
+        env = BrowserGymEnv.from_docker_image(
+            "browsergym-env:latest",
+            environment={
+                "BROWSERGYM_BENCHMARK": "miniwob",
+                "BROWSERGYM_TASK_NAME": "click-test",
+            }
+        )
+        # Reset and get initial observation
+        result = env.reset()
+        print(f"Task: {result.observation.goal}")
+        print(f"Page: {result.observation.text[:200]}")
+        # Take actions
+        action = BrowserGymAction(action_str="click('Submit button')")
+        result = env.step(action)
+        print(f"Reward: {result.reward}")
+        print(f"Done: {result.done}")
+        env.close()
+        ```
+    Example usage for EVALUATION (WebArena - requires backend):
+        ```python
+        from envs.browsergym_env import BrowserGymEnv, BrowserGymAction
+        # Create environment for WebArena evaluation
+        env = BrowserGymEnv.from_docker_image(
+            "browsergym-env:latest",
+            environment={
+                "BROWSERGYM_BENCHMARK": "webarena",
+                "BROWSERGYM_TASK_NAME": "0",  # Task 0
+                # WebArena backend URLs
+                "SHOPPING": "http://your-server:7770",
+                "GITLAB": "http://your-server:8023",
+                # ... other URLs
+            }
+        )
+        result = env.reset()
+        # ... interact with environment
+        env.close()
+        ```
+    Available benchmarks:
+        - miniwob: MiniWoB++ tasks (training, no setup required)
+        - webarena: WebArena tasks (evaluation, requires backend)
+        - visualwebarena: Visual WebArena tasks (evaluation, requires backend)
+        - workarena: WorkArena tasks (evaluation, requires backend)
+    """
+    def _step_payload(self, action: BrowserGymAction) -> Dict[str, Any]:
+        """Convert a BrowserGymAction to the JSON payload for the server."""
+        return {
+            "action_str": action.action_str,
+            "metadata": action.metadata,
+        }
+    def _parse_result(self, payload: Dict[str, Any]) -> StepResult[BrowserGymObservation]:
+        """Parse the server response into a StepResult."""
+        obs_data = payload.get("observation", {})
+        observation = BrowserGymObservation(
+            text=obs_data.get("text", ""),
+            url=obs_data.get("url", ""),
+            screenshot=obs_data.get("screenshot"),
+            goal=obs_data.get("goal", ""),
+            axtree_txt=obs_data.get("axtree_txt", ""),
+            pruned_html=obs_data.get("pruned_html", ""),
+            error=obs_data.get("error", ""),
+            last_action_error=obs_data.get("last_action_error", False),
+            done=payload.get("done", False),
+            reward=payload.get("reward"),
+            metadata=obs_data.get("metadata", {}),
+        )
+        return StepResult(
+            observation=observation,
+            reward=payload.get("reward"),
+            done=payload.get("done", False),
+        )
+    def _parse_state(self, payload: Dict[str, Any]) -> BrowserGymState:
+        """Parse the server state response into a BrowserGymState object."""
+        return BrowserGymState(
+            episode_id=payload.get("episode_id"),
+            step_count=payload.get("step_count", 0),
+            benchmark=payload.get("benchmark", ""),
+            task_name=payload.get("task_name", ""),
+            task_id=payload.get("task_id"),
+            goal=payload.get("goal", ""),
+            current_url=payload.get("current_url", ""),
+            max_steps=payload.get("max_steps"),
+            cum_reward=payload.get("cum_reward", 0.0),
+        )

models.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""Data models for the BrowserGym environment.
+BrowserGym is a unified framework for web-based agent tasks, combining multiple
+benchmarks including MiniWoB (training), WebArena (evaluation), VisualWebArena,
+and more under a single Gymnasium-compatible API.
+"""
+from typing import List, Optional
+from pydantic import Field
+from openenv.core.env_server.types import Action, Observation, State
+class BrowserGymAction(Action):
+    """Action to be executed in the BrowserGym environment.
+    BrowserGym supports high-level natural language actions that can be parsed
+    into browser operations.
+    Example actions:
+    - "click('Submit button')"
+    - "fill('username', 'john@example.com')"
+    - "goto('https://example.com')"
+    - "scroll(down)"
+    - "send_keys('Enter')"
+    """
+    action_str: str = Field(..., description="Natural language action string (e.g., \"click('Submit')\")")
+class BrowserGymObservation(Observation):
+    """Observation returned from the BrowserGym environment.
+    Contains multiple observation modalities including text (accessibility tree
+    or DOM), visual (screenshot), and page metadata.
+    """
+    text: str = Field(default="", description="Text representation of the page (accessibility tree or DOM)")
+    url: str = Field(default="", description="Current URL of the page")
+    screenshot: Optional[List[List[List[int]]]] = Field(
+        default=None,
+        description="Screenshot as numpy array [height, width, channels] (if visual observation enabled)"
+    )
+    goal: str = Field(default="", description="Task goal/instruction for the current episode")
+    axtree_txt: str = Field(default="", description="Full accessibility tree as text")
+    pruned_html: str = Field(default="", description="Pruned HTML content (interactive elements only)")
+    error: str = Field(default="", description="Error message if action execution failed")
+    last_action_error: bool = Field(default=False, description="Whether the last action resulted in an error")
+class BrowserGymState(State):
+    """State of the BrowserGym environment.
+    Tracks the current benchmark, task, and progress through an episode.
+    """
+    benchmark: str = Field(default="", description="Benchmark name (e.g., 'miniwob', 'webarena', 'visualwebarena')")
+    task_name: str = Field(default="", description="Specific task within the benchmark (e.g., 'click-test', 'click-button')")
+    task_id: Optional[str] = Field(default=None, description="Task ID for evaluation benchmarks (e.g., WebArena task number)")
+    goal: str = Field(default="", description="Task goal/instruction")
+    current_url: str = Field(default="", description="Current URL of the active page")
+    max_steps: Optional[int] = Field(default=None, description="Maximum steps allowed for this task")
+    cum_reward: float = Field(default=0.0, description="Cumulative reward for the current episode")

openenv.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+name: browsergym_env
+version: "0.1.0"
+description: "BrowserGym environment for web automation tasks using Playwright"
+action: BrowserGymAction
+observation: BrowserGymObservation

openenv_browsergym_env.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,21 @@

+Metadata-Version: 2.4
+Name: openenv-browsergym_env
+Version: 0.1.0
+Summary: BrowserGym Environment for OpenEnv - Web automation using Playwright
+Requires-Python: >=3.10
+Requires-Dist: openenv-core[core]>=0.2.0
+Requires-Dist: fastapi>=0.104.0
+Requires-Dist: uvicorn[standard]>=0.24.0
+Requires-Dist: pydantic>=2.0.0
+Requires-Dist: requests>=2.25.0
+Requires-Dist: browsergym-core>=0.2.0
+Requires-Dist: browsergym-miniwob>=0.2.0
+Requires-Dist: browsergym-webarena>=0.2.0
+Requires-Dist: gymnasium>=0.29.0
+Requires-Dist: playwright>=1.40.0
+Requires-Dist: greenlet>=3.1.0
+Requires-Dist: Pillow>=10.0.0
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0.0; extra == "dev"
+Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
+Requires-Dist: ipykernel>=6.29.5; extra == "dev"

openenv_browsergym_env.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+README.md
+__init__.py
+client.py
+models.py
+openenv.yaml
+pyproject.toml
+./README.md
+./__init__.py
+./client.py
+./models.py
+./openenv.yaml
+./server/__init__.py
+./server/app.py
+./server/browsergym_environment.py
+openenv_browsergym_env.egg-info/PKG-INFO
+openenv_browsergym_env.egg-info/SOURCES.txt
+openenv_browsergym_env.egg-info/dependency_links.txt
+openenv_browsergym_env.egg-info/entry_points.txt
+openenv_browsergym_env.egg-info/requires.txt
+openenv_browsergym_env.egg-info/top_level.txt
+server/__init__.py
+server/app.py
+server/browsergym_environment.py

openenv_browsergym_env.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

openenv_browsergym_env.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [console_scripts]
2	+ server = browsergym_env.server.app:main

openenv_browsergym_env.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+openenv-core[core]>=0.2.0
+fastapi>=0.104.0
+uvicorn[standard]>=0.24.0
+pydantic>=2.0.0
+requests>=2.25.0
+browsergym-core>=0.2.0
+browsergym-miniwob>=0.2.0
+browsergym-webarena>=0.2.0
+gymnasium>=0.29.0
+playwright>=1.40.0
+greenlet>=3.1.0
+Pillow>=10.0.0
+[dev]
+pytest>=8.0.0
+pytest-cov>=4.0.0
+ipykernel>=6.29.5

openenv_browsergym_env.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ browsergym_env

pyproject.toml ADDED Viewed

	@@ -0,0 +1,48 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+[build-system]
+requires = ["setuptools>=45", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "openenv-browsergym"
+version = "0.1.0"
+description = "BrowserGym Environment for OpenEnv - Web automation using Playwright"
+requires-python = ">=3.10"
+dependencies = [
+    # Core OpenEnv dependencies (required for server functionality)
+    "openenv-core @ git+https://github.com/meta-pytorch/OpenEnv.git@main",
+    "fastapi>=0.115.0",
+    "uvicorn[standard]>=0.24.0",
+    "pydantic>=2.0.0",
+    "requests>=2.31.0",
+    # Environment-specific dependencies
+    "browsergym-core>=0.2.0",
+    "browsergym-miniwob>=0.2.0",
+    "browsergym-webarena>=0.2.0",
+    "gymnasium>=0.29.0",
+    "playwright>=1.40.0",
+    "greenlet>=3.1.0",  # Required for Python 3.13 compatibility
+    "Pillow>=10.0.0",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0.0",
+    "pytest-cov>=4.0.0",
+    "ipykernel>=6.29.5",
+]
+[project.scripts]
+server = "browsergym_env.server.app:main"
+[tool.setuptools]
+packages = ["browsergym_env", "browsergym_env.server"]
+package-dir = { "browsergym_env" = ".", "browsergym_env.server" = "server" }
+[tool.setuptools.package-data]
+browsergym_env = ["**/*.yaml", "**/*.yml", "**/*.md"]

server/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """BrowserGym environment server module."""

server/app.py ADDED Viewed

	@@ -0,0 +1,50 @@

+"""FastAPI server for the BrowserGym environment."""
+import os
+from openenv.core.env_server.http_server import create_app
+from browsergym_env.models import BrowserGymAction, BrowserGymObservation
+from browsergym_env.server.browsergym_environment import BrowserGymEnvironment
+# Get configuration from environment variables
+benchmark = os.environ.get("BROWSERGYM_BENCHMARK", "miniwob")
+task_name = os.environ.get("BROWSERGYM_TASK_NAME")  # Optional, can be None
+headless = os.environ.get("BROWSERGYM_HEADLESS", "true").lower() == "true"
+viewport_width = int(os.environ.get("BROWSERGYM_VIEWPORT_WIDTH", "1280"))
+viewport_height = int(os.environ.get("BROWSERGYM_VIEWPORT_HEIGHT", "720"))
+timeout = float(os.environ.get("BROWSERGYM_TIMEOUT", "10000"))
+port = int(os.environ.get("BROWSERGYM_PORT", "8000"))
+# Factory function to create BrowserGymEnvironment instances
+def create_browsergym_environment():
+    """Factory function that creates BrowserGymEnvironment with config."""
+    return BrowserGymEnvironment(
+        benchmark=benchmark,
+        task_name=task_name,
+        headless=headless,
+        viewport_width=viewport_width,
+        viewport_height=viewport_height,
+        timeout=timeout,
+    )
+# Create the FastAPI app
+# Pass the factory function instead of an instance for WebSocket session support
+app = create_app(
+    create_browsergym_environment,
+    BrowserGymAction,
+    BrowserGymObservation,
+    env_name="browsergym_env",
+)
+def main():
+    """Main entry point for running the server."""
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=port)
+if __name__ == "__main__":
+    main()

server/browsergym_environment.py ADDED Viewed

	@@ -0,0 +1,375 @@

+"""BrowserGym Environment implementation for OpenEnv.
+This module wraps the BrowserGym framework to provide a compatible interface
+with OpenEnv's Environment ABC. BrowserGym includes multiple benchmarks:
+- MiniWoB++: Training environment with 100+ simple web tasks
+- WebArena: Realistic evaluation with 812 complex tasks
+- VisualWebArena: Visual web navigation tasks
+- WorkArena: Enterprise task automation
+"""
+import importlib
+import logging
+from typing import Any, Dict, Optional
+from uuid import uuid4
+import gymnasium as gym
+from openenv.core.env_server.interfaces import Environment
+from browsergym_env.models import (
+    BrowserGymAction,
+    BrowserGymObservation,
+    BrowserGymState,
+)
+logger = logging.getLogger(__name__)
+def _get_axtree_txt(obs: Dict[str, Any]) -> str:
+    """Extract accessibility tree text from BrowserGym observation.
+    BrowserGym returns raw `axtree_object` which needs to be converted to text
+    using the `flatten_axtree_to_str` utility function.
+    """
+    # If already processed as text, return directly
+    if "axtree_txt" in obs and obs["axtree_txt"]:
+        return obs["axtree_txt"]
+    # Try to convert from raw axtree_object
+    if "axtree_object" in obs and obs["axtree_object"]:
+        try:
+            from browsergym.utils.obs import flatten_axtree_to_str
+            return flatten_axtree_to_str(obs["axtree_object"])
+        except ImportError:
+            logger.warning("browsergym.utils.obs not available, cannot convert axtree_object to text")
+        except Exception as e:
+            logger.warning(f"Failed to convert axtree_object to text: {e}")
+    return ""
+def _get_pruned_html(obs: Dict[str, Any]) -> str:
+    """Extract pruned HTML from BrowserGym observation.
+    BrowserGym returns raw `dom_object` which needs to be converted to text
+    and then pruned using the `flatten_dom_to_str` and `prune_html` utilities.
+    """
+    # If already processed as pruned_html, return directly
+    if "pruned_html" in obs and obs["pruned_html"]:
+        return obs["pruned_html"]
+    # Try to convert from raw dom_object
+    if "dom_object" in obs and obs["dom_object"]:
+        try:
+            from browsergym.utils.obs import flatten_dom_to_str, prune_html
+            dom_str = flatten_dom_to_str(obs["dom_object"])
+            return prune_html(dom_str)
+        except ImportError:
+            logger.warning("browsergym.utils.obs not available, cannot convert dom_object to pruned_html")
+        except Exception as e:
+            logger.warning(f"Failed to convert dom_object to pruned_html: {e}")
+    return ""
+_MINIWOB_LOAD_HELP = (
+    "MiniWoB tasks require the MiniWoB HTML bundle to be served over HTTP. "
+    "The official BrowserGym Docker image handles this automatically by "
+    "serving the bundle on port 8888. For custom or non-Docker deployments, "
+    "clone the MiniWoB++ repository, start a static server inside "
+    "`miniwob-plusplus/miniwob/html` (e.g. `python -m http.server 8888`), and "
+    "set the MINIWOB_URL environment variable to the served base URL such as "
+    "`http://localhost:8888/miniwob/`."
+)
+class BrowserGymEnvironment(Environment):
+    """BrowserGym environment wrapper for OpenEnv.
+    This environment wraps BrowserGym's Gymnasium-compatible environments to
+    provide unified access to multiple web navigation benchmarks.
+    """
+    def __init__(
+        self,
+        benchmark: str = "miniwob",
+        task_name: Optional[str] = None,
+        headless: bool = True,
+        viewport_width: int = 1280,
+        viewport_height: int = 720,
+        timeout: float = 10000.0,
+        **gym_kwargs: Any,
+    ):
+        """Initialize the BrowserGym environment.
+        Args:
+            benchmark: Benchmark to use ('miniwob', 'webarena', 'visualwebarena', etc.)
+            task_name: Specific task within the benchmark (e.g., 'click-test', 'click-button')
+                      If None, will use first available task
+            headless: Whether to run browser in headless mode
+            viewport_width: Browser viewport width
+            viewport_height: Browser viewport height
+            timeout: Action timeout in milliseconds
+            **gym_kwargs: Additional arguments passed to gym.make()
+        """
+        super().__init__()
+        self.benchmark = benchmark
+        self.task_name = task_name
+        self.headless = headless
+        self.viewport_width = viewport_width
+        self.viewport_height = viewport_height
+        self.timeout = timeout
+        self.gym_kwargs = dict(gym_kwargs)
+        # Build environment ID
+        if task_name:
+            self.env_id = f"browsergym/{benchmark}.{task_name}"
+        else:
+            self.env_id = f"browsergym/{benchmark}"
+        # force import the benchmark module
+        benchmark_modules = {
+            "miniwob": "browsergym.miniwob",
+            "webarena": "browsergym.webarena",
+            "visualwebarena": "browsergym.visualwebarena",
+            "workarena": "browsergym.workarena",
+        }
+        module_path = benchmark_modules.get(benchmark)
+        try:
+            if module_path:
+                importlib.import_module(module_path)
+            else:
+                importlib.import_module("browsergym")
+        except ModuleNotFoundError as import_error:
+            message = (
+                "Failed to import BrowserGym benchmark "
+                f"'{benchmark}': {import_error}\n"
+                "Install the matching browsergym package "
+                f"(e.g., browsergym-{benchmark})."
+            )
+            raise ValueError(message) from import_error
+        # Create the BrowserGym environment
+        try:
+            self.gym_env = gym.make(
+                self.env_id,
+                headless=headless,
+                viewport={"width": viewport_width, "height": viewport_height},
+                timeout=timeout,
+                **self.gym_kwargs,
+            )
+        except Exception as e:  # noqa: BLE001 - gym.make
+            message = (
+                "Failed to create BrowserGym environment "
+                f"'{self.env_id}': {e}\n"
+                "Make sure the benchmark package is installed "
+                f"(e.g., pip install browsergym-{benchmark})."
+            )
+            raise ValueError(message) from e
+        # State tracking
+        self._state = BrowserGymState(
+            episode_id=str(uuid4()),
+            step_count=0,
+            benchmark=benchmark,
+            task_name=task_name or "",
+        )
+        self._last_obs: Optional[Dict[str, Any]] = None
+        self._last_info: Optional[Dict[str, Any]] = None
+    def reset(
+        self,
+        seed: Optional[int] = None,
+        task_name: Optional[str] = None,
+    ) -> BrowserGymObservation:
+        """Reset the environment with a specific task.
+        Args:
+            seed: Random seed for reproducibility
+            task_name: Override task name for this episode
+        Returns:
+            Initial observation for the task
+        """
+        # Generate new episode ID
+        self._state = BrowserGymState(
+            episode_id=str(uuid4()),
+            step_count=0,
+            benchmark=self.benchmark,
+            task_name=task_name or self.task_name or "",
+        )
+        # Reset options
+        reset_options = {}
+        if seed is not None:
+            reset_options["seed"] = seed
+        # Reset the gym environment
+        try:
+            obs, info = self.gym_env.reset(**reset_options)
+        except AttributeError as err:
+            if "context" in str(err) and hasattr(self.gym_env, "close"):
+                # BrowserGym can leave partially initialized state after a
+                # failed reset. Close the hanging resources and try once more.
+                self.gym_env.close()
+                obs, info = self.gym_env.reset(**reset_options)
+            else:
+                raise
+        except Exception as err:  # noqa: BLE001 - browsergym
+            message = str(err)
+            if self.benchmark == "miniwob" and "core is not defined" in message:
+                raise ValueError(_MINIWOB_LOAD_HELP) from err
+            raise
+        self._last_obs = obs
+        self._last_info = info
+        # Extract observation details
+        return self._create_observation(obs, info, done=False, reward=0.0)
+    def step(self, action: BrowserGymAction) -> BrowserGymObservation:
+        """Execute an action in the environment.
+        Args:
+            action: The action to execute
+        Returns:
+            Observation after executing the action
+        """
+        self._state.step_count += 1
+        # Execute action in gym environment
+        try:
+            obs, reward, terminated, truncated, info = self.gym_env.step(action.action_str)
+            self._last_obs = obs
+            self._last_info = info
+            # Update state
+            done = terminated or truncated
+            self._state.cum_reward += float(reward)
+            # Extract goal from info if available
+            if "goal" in info:
+                self._state.goal = str(info["goal"])
+            return self._create_observation(obs, info, done=done, reward=float(reward))
+        except Exception as e:
+            # Handle action execution errors
+            error_msg = str(e)
+            return BrowserGymObservation(
+                text=self._last_obs.get("text", "") if self._last_obs else "",
+                url=self._last_obs.get("url", "") if self._last_obs else "",
+                goal=self._state.goal,
+                error=error_msg,
+                last_action_error=True,
+                done=False,
+                reward=0.0,
+            )
+    def _create_observation(
+        self,
+        obs: Dict[str, Any],
+        info: Dict[str, Any],
+        done: bool,
+        reward: float,
+    ) -> BrowserGymObservation:
+        """Convert BrowserGym observation to OpenEnv format.
+        Args:
+            obs: BrowserGym observation dict
+            info: BrowserGym info dict
+            done: Whether episode is done
+            reward: Reward for the step
+        Returns:
+            BrowserGymObservation
+        """
+        # Generate text representations from raw BrowserGym objects
+        # BrowserGym returns axtree_object and dom_object which need conversion
+        axtree_txt = _get_axtree_txt(obs) if isinstance(obs, dict) else ""
+        pruned_html = _get_pruned_html(obs) if isinstance(obs, dict) else ""
+        # Extract text observation - prefer axtree_txt, fallback to pruned_html
+        text = axtree_txt or pruned_html
+        if not text and isinstance(obs, str):
+            text = obs
+        # Extract URL from obs (BrowserGym stores it there)
+        url = ""
+        if isinstance(obs, dict):
+            url = obs.get("url", "")
+        # Extract goal/instruction from goal_object or legacy goal field
+        goal = ""
+        if isinstance(obs, dict):
+            # New format: goal_object is a list of messages
+            goal_object = obs.get("goal_object", [])
+            if goal_object:
+                # Extract text content from goal messages
+                goal_texts = []
+                for msg in goal_object:
+                    if isinstance(msg, dict):
+                        content = msg.get("content", "")
+                        if isinstance(content, str):
+                            goal_texts.append(content)
+                        elif isinstance(content, list):
+                            for item in content:
+                                if isinstance(item, dict) and item.get("type") == "text":
+                                    goal_texts.append(item.get("text", ""))
+                goal = " ".join(goal_texts)
+            # Fallback to legacy goal field
+            if not goal:
+                goal = obs.get("goal", "")
+        # Update state
+        self._state.current_url = url
+        self._state.goal = goal
+        # Extract additional observation modalities
+        screenshot = obs.get("screenshot") if isinstance(obs, dict) else None
+        # Extract last_action_error from obs (BrowserGym includes this)
+        last_action_error = False
+        if isinstance(obs, dict):
+            last_action_error = bool(obs.get("last_action_error"))
+        # Store full BrowserGym observation and info in metadata
+        # This preserves timestamps, additional fields, and any future extensions
+        # Note: We exclude large objects (dom_object, axtree_object) to reduce payload size
+        browsergym_metadata = {}
+        if isinstance(obs, dict):
+            # Include useful fields but exclude large raw objects
+            browsergym_metadata["browsergym_obs"] = {
+                k: v for k, v in obs.items() if k not in ("dom_object", "axtree_object", "screenshot")
+            }
+        browsergym_metadata["browsergym_info"] = info
+        return BrowserGymObservation(
+            text=text,
+            url=url,
+            screenshot=screenshot,
+            goal=goal,
+            axtree_txt=axtree_txt,
+            pruned_html=pruned_html,
+            error="",
+            last_action_error=last_action_error,
+            done=done,
+            reward=reward,
+            metadata=browsergym_metadata,
+        )
+    @property
+    def state(self) -> BrowserGymState:
+        """Get the current environment state."""
+        return self._state
+    def close(self) -> None:
+        """Clean up environment resources."""
+        if hasattr(self, "gym_env"):
+            self.gym_env.close()

server/requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+browsergym>=0.10.0
+browsergym-core>=0.10.0
+browsergym-miniwob>=0.10.0
+browsergym-webarena>=0.10.0
+gymnasium>=0.29.0
+playwright>=1.40.0
+Pillow>=10.0.0
+beautifulsoup4>=4.12.0
+fastapi>=0.104.0
+uvicorn[standard]>=0.24.0

server/start.sh ADDED Viewed

	@@ -0,0 +1,29 @@

+#!/usr/bin/env bash
+set -euo pipefail
+MINIWOB_HTML_DIR=${MINIWOB_HTML_DIR:-/app/miniwob-plusplus/miniwob/html}
+MINIWOB_HTTP_PORT=${MINIWOB_HTTP_PORT:-8888}
+BROWSERGYM_PORT=${BROWSERGYM_PORT:-8000}
+if [ ! -d "${MINIWOB_HTML_DIR}" ]; then
+    echo "MiniWoB HTML directory not found at ${MINIWOB_HTML_DIR}" >&2
+    exit 1
+fi
+python -m http.server "${MINIWOB_HTTP_PORT}" --bind 0.0.0.0 --directory "${MINIWOB_HTML_DIR}" &
+HTTP_SERVER_PID=$!
+sleep 1
+if ! kill -0 "${HTTP_SERVER_PID}" 2>/dev/null; then
+    echo "Failed to start MiniWoB static server on port ${MINIWOB_HTTP_PORT}" >&2
+    exit 1
+fi
+cleanup() {
+    kill "${HTTP_SERVER_PID}" 2>/dev/null || true
+}
+trap cleanup EXIT INT TERM
+exec uvicorn server.app:app --host 0.0.0.0 --port "${BROWSERGYM_PORT}"

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff