ankitdhiman commited on
Commit
c39ecaf
Β·
verified Β·
1 Parent(s): 2856862

Initial browser-use RL environment

Browse files
Dockerfile ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim AS builder
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system deps for building
6
+ RUN apt-get update && \
7
+ apt-get install -y --no-install-recommends git curl && \
8
+ rm -rf /var/lib/apt/lists/*
9
+
10
+ # Install uv
11
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
12
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
13
+ mv /root/.local/bin/uvx /usr/local/bin/uvx
14
+
15
+ # Copy project files
16
+ COPY pyproject.toml uv.lock ./
17
+ COPY __init__.py client.py models.py openenv.yaml ./
18
+ COPY server/ ./server/
19
+ COPY mock_sites/ ./mock_sites/
20
+ COPY README.md ./
21
+
22
+ # Install dependencies
23
+ RUN uv sync --frozen --no-editable || uv sync --no-editable
24
+
25
+ # Install Playwright Chromium
26
+ RUN /app/.venv/bin/playwright install chromium && \
27
+ /app/.venv/bin/playwright install-deps chromium
28
+
29
+ # ── Runtime stage ────────────────────────────────────────────────────
30
+ FROM python:3.12-slim
31
+
32
+ WORKDIR /app
33
+
34
+ # Install Chromium runtime dependencies + curl for healthcheck
35
+ RUN apt-get update && \
36
+ apt-get install -y --no-install-recommends \
37
+ libnss3 libnspr4 libatk1.0-0 libatk-bridge2.0-0 \
38
+ libcups2 libdrm2 libdbus-1-3 libxkbcommon0 \
39
+ libatspi2.0-0 libxcomposite1 libxdamage1 libxfixes3 \
40
+ libxrandr2 libgbm1 libpango-1.0-0 libcairo2 libasound2 \
41
+ libwayland-client0 fonts-noto-color-emoji curl && \
42
+ rm -rf /var/lib/apt/lists/*
43
+
44
+ # Create non-root user (HF Spaces runs as uid 1000)
45
+ RUN useradd -m -u 1000 appuser
46
+
47
+ # Copy venv and app code from builder
48
+ COPY --from=builder /app/.venv /app/.venv
49
+ COPY --from=builder /app /app
50
+ COPY --from=builder /root/.cache/ms-playwright /home/appuser/.cache/ms-playwright
51
+
52
+ # Fix permissions
53
+ RUN chown -R appuser:appuser /app /home/appuser/.cache
54
+
55
+ ENV PATH="/app/.venv/bin:$PATH"
56
+ ENV PYTHONPATH="/app:$PYTHONPATH"
57
+ ENV PLAYWRIGHT_BROWSERS_PATH="/home/appuser/.cache/ms-playwright"
58
+
59
+ USER appuser
60
+
61
+ EXPOSE 7860
62
+
63
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=15s --retries=3 \
64
+ CMD curl -f http://localhost:7860/health || exit 1
65
+
66
+ CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,84 @@
1
- ---
2
- title: Dalaal Env
3
- emoji: πŸ“š
4
- colorFrom: pink
5
- colorTo: yellow
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dalaal-env: Browser-Use RL Environment
2
+
3
+ An OpenEnv-compatible reinforcement learning environment where agents learn to interact with web pages through an **accessibility tree** interface.
4
+
5
+ The agent observes a structured text representation of the page (like a screen reader) and takes discrete actions β€” click, type, scroll, select β€” to complete browser tasks.
6
+
7
+ ## Architecture
8
+
9
+ ```
10
+ Agent (LLM) ←→ OpenEnv Client ←→ WebSocket ←→ OpenEnv Server ←→ Playwright (headless Chromium)
11
+ ```
12
+
13
+ ## Observation Space
14
+
15
+ The agent sees the page as a numbered accessibility tree:
16
+
17
+ ```
18
+ [1] heading "My Todo List" level=1
19
+ [2] textbox "Add a new todo"
20
+ [3] button "Add"
21
+ [4] checkbox "Complete: Walk the dog" checked=false
22
+ [5] button "Delete: Walk the dog"
23
+ ```
24
+
25
+ ## Action Space
26
+
27
+ | Action | Parameters | Description |
28
+ |--------|-----------|-------------|
29
+ | `click` | `element_id` | Click an element by its tree ID |
30
+ | `type` | `element_id`, `text` | Clear and type text into an input |
31
+ | `select_option` | `element_id`, `text` | Select a dropdown option by label |
32
+ | `press_key` | `key` | Press a keyboard key (Enter, Tab, etc.) |
33
+ | `scroll` | `direction` | Scroll up or down |
34
+ | `go_back` | β€” | Browser back |
35
+ | `done` | β€” | Signal task completion |
36
+
37
+ ## Available Tasks
38
+
39
+ | Task ID | Description | Max Steps |
40
+ |---------|-------------|-----------|
41
+ | `todo_add` | Add "Buy milk" to a todo list | 10 |
42
+ | `todo_add_and_complete` | Add "Buy milk" and mark complete | 15 |
43
+ | `login` | Log in with credentials | 10 |
44
+ | `search_and_click` | Search and click first result | 10 |
45
+ | `add_to_cart` | Add headphones to cart | 10 |
46
+ | `add_to_cart_and_checkout` | Add to cart and checkout | 15 |
47
+ | `fill_registration` | Fill a multi-field registration form | 15 |
48
+
49
+ ## Reward
50
+
51
+ - **+1.0** on task success (verified by DOM-based success criteria)
52
+ - **-0.01** per step (encourages efficiency)
53
+ - **0.0** on failure or timeout
54
+ - Final score clamped to [0, 1]
55
+
56
+ ## Quick Start
57
+
58
+ ```bash
59
+ # Install
60
+ uv sync
61
+
62
+ # Install Playwright browser
63
+ uv run playwright install chromium
64
+
65
+ # Run server
66
+ uv run uvicorn server.app:app --host 0.0.0.0 --port 8000
67
+
68
+ # Run inference
69
+ export HF_TOKEN=your_token
70
+ export MODEL_NAME=Qwen/Qwen2.5-72B-Instruct
71
+ uv run python inference.py
72
+ ```
73
+
74
+ ## Docker
75
+
76
+ ```bash
77
+ cd server && docker build -t dalaal-env .
78
+ ```
79
+
80
+ ## Validation
81
+
82
+ ```bash
83
+ uv run openenv validate
84
+ ```
__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ """Dalaal Browser-Use Environment."""
2
+
3
+ from .client import DalaalEnvEnv
4
+ from .models import DalaalEnvAction, DalaalEnvObservation
5
+
6
+ __all__ = [
7
+ "DalaalEnvAction",
8
+ "DalaalEnvObservation",
9
+ "DalaalEnvEnv",
10
+ ]
client.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Dalaal Browser-Use Environment Client."""
2
+
3
+ from typing import Any, Dict, Optional
4
+
5
+ from openenv.core import EnvClient
6
+ from openenv.core.client_types import StepResult
7
+ from openenv.core.env_server.types import State
8
+
9
+ from .models import DalaalEnvAction, DalaalEnvObservation
10
+
11
+
12
+ class DalaalEnvEnv(EnvClient[DalaalEnvAction, DalaalEnvObservation, State]):
13
+ """
14
+ Client for the Dalaal Browser-Use Environment.
15
+
16
+ This client maintains a persistent WebSocket connection to the environment
17
+ server, enabling efficient multi-step browser interactions.
18
+
19
+ Example:
20
+ >>> async with DalaalEnvEnv(base_url="http://localhost:8000") as env:
21
+ ... result = await env.reset(task="todo_add")
22
+ ... print(result.observation.accessibility_tree)
23
+ ... result = await env.step(DalaalEnvAction(action_type="click", element_id=3))
24
+
25
+ Example with Docker:
26
+ >>> env = await DalaalEnvEnv.from_docker_image("dalaal-env:latest")
27
+ >>> result = await env.reset(task="login")
28
+ """
29
+
30
+ def _step_payload(self, action: DalaalEnvAction) -> Dict[str, Any]:
31
+ """Convert DalaalEnvAction to JSON payload."""
32
+ payload: Dict[str, Any] = {"action_type": action.action_type}
33
+ if action.element_id is not None:
34
+ payload["element_id"] = action.element_id
35
+ if action.text is not None:
36
+ payload["text"] = action.text
37
+ if action.key is not None:
38
+ payload["key"] = action.key
39
+ if action.direction is not None:
40
+ payload["direction"] = action.direction
41
+ return payload
42
+
43
+ def _parse_result(self, payload: Dict) -> StepResult[DalaalEnvObservation]:
44
+ """Parse server response into StepResult."""
45
+ obs_data = payload.get("observation", {})
46
+ observation = DalaalEnvObservation(
47
+ url=obs_data.get("url", ""),
48
+ title=obs_data.get("title", ""),
49
+ accessibility_tree=obs_data.get("accessibility_tree", ""),
50
+ task_description=obs_data.get("task_description", ""),
51
+ last_action_error=obs_data.get("last_action_error"),
52
+ step_count=obs_data.get("step_count", 0),
53
+ max_steps=obs_data.get("max_steps", 20),
54
+ done=payload.get("done", False),
55
+ reward=payload.get("reward"),
56
+ metadata=obs_data.get("metadata", {}),
57
+ )
58
+ return StepResult(
59
+ observation=observation,
60
+ reward=payload.get("reward"),
61
+ done=payload.get("done", False),
62
+ )
63
+
64
+ def _parse_state(self, payload: Dict) -> State:
65
+ """Parse server response into State."""
66
+ return State(
67
+ episode_id=payload.get("episode_id"),
68
+ step_count=payload.get("step_count", 0),
69
+ )
inference.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Inference Script β€” Dalaal Browser-Use Environment
3
+ ===================================
4
+ MANDATORY
5
+ - Before submitting, ensure the following variables are defined in your environment configuration:
6
+ API_BASE_URL The API endpoint for the LLM.
7
+ MODEL_NAME The model identifier to use for inference.
8
+ HF_TOKEN Your Hugging Face / API key.
9
+ LOCAL_IMAGE_NAME The name of the local image to use for the environment if you are using from_docker_image()
10
+
11
+ - The inference script must be named `inference.py` and placed in the root directory of the project
12
+ - Participants must use OpenAI Client for all LLM calls using above variables
13
+
14
+ STDOUT FORMAT
15
+ - The script must emit exactly three line types to stdout, in this order:
16
+
17
+ [START] task=<task_name> env=<benchmark> model=<model_name>
18
+ [STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
19
+ [END] success=<true|false> steps=<n> score=<score> rewards=<r1,r2,...,rn>
20
+ """
21
+
22
+ import asyncio
23
+ import json
24
+ import os
25
+ import textwrap
26
+ from typing import List, Optional
27
+
28
+ from openai import OpenAI
29
+
30
+ from dalaal_env import DalaalEnvAction, DalaalEnvEnv
31
+
32
+ IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
33
+ API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
34
+ API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
35
+ MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
36
+
37
+ TASK_NAME = os.getenv("DALAAL_TASK", "todo_add")
38
+ BENCHMARK = "dalaal_env"
39
+ MAX_STEPS = 15
40
+ TEMPERATURE = 0.0
41
+ MAX_TOKENS = 300
42
+
43
+ SYSTEM_PROMPT = textwrap.dedent("""\
44
+ You are a browser automation agent. You interact with web pages by reading
45
+ an accessibility tree and issuing actions.
46
+
47
+ ACCESSIBILITY TREE FORMAT:
48
+ Each element has an [ID] followed by its role and properties:
49
+ [1] heading "Page Title"
50
+ [2] textbox "Search" value=""
51
+ [3] button "Submit"
52
+ [4] checkbox "Accept terms" checked=false
53
+
54
+ AVAILABLE ACTIONS (respond with exactly one JSON object):
55
+ - Click an element: {"action_type": "click", "element_id": <id>}
56
+ - Type into an element: {"action_type": "type", "element_id": <id>, "text": "<text>"}
57
+ - Select a dropdown option: {"action_type": "select_option", "element_id": <id>, "text": "<option label>"}
58
+ - Press a key: {"action_type": "press_key", "key": "<key name>"}
59
+ - Scroll: {"action_type": "scroll", "direction": "up" or "down"}
60
+ - Go back: {"action_type": "go_back"}
61
+ - Signal task complete: {"action_type": "done"}
62
+
63
+ RULES:
64
+ - Respond with ONLY a JSON object. No explanation, no markdown, no extra text.
65
+ - Use element IDs from the accessibility tree.
66
+ - When you believe the task is complete, use {"action_type": "done"}.
67
+ - If you see an error, try a different approach.
68
+ """)
69
+
70
+
71
+ def log_start(task: str, env: str, model: str) -> None:
72
+ print(f"[START] task={task} env={env} model={model}", flush=True)
73
+
74
+
75
+ def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
76
+ error_val = error if error else "null"
77
+ done_val = str(done).lower()
78
+ print(
79
+ f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
80
+ flush=True,
81
+ )
82
+
83
+
84
+ def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
85
+ rewards_str = ",".join(f"{r:.2f}" for r in rewards)
86
+ print(
87
+ f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}",
88
+ flush=True,
89
+ )
90
+
91
+
92
+ def parse_action(text: str) -> DalaalEnvAction:
93
+ """Parse LLM response into a DalaalEnvAction."""
94
+ text = text.strip()
95
+ # Strip markdown code fences if present
96
+ if text.startswith("```"):
97
+ lines = text.split("\n")
98
+ lines = [l for l in lines if not l.startswith("```")]
99
+ text = "\n".join(lines).strip()
100
+
101
+ data = json.loads(text)
102
+ return DalaalEnvAction(**data)
103
+
104
+
105
+ def build_user_prompt(
106
+ task: str,
107
+ tree: str,
108
+ url: str,
109
+ step: int,
110
+ max_steps: int,
111
+ last_error: Optional[str],
112
+ ) -> str:
113
+ parts = [
114
+ f"TASK: {task}",
115
+ f"STEP: {step}/{max_steps}",
116
+ f"URL: {url}",
117
+ ]
118
+ if last_error:
119
+ parts.append(f"LAST ACTION ERROR: {last_error}")
120
+ parts.append(f"ACCESSIBILITY TREE:\n{tree}")
121
+ parts.append("Respond with your next action as a JSON object.")
122
+ return "\n\n".join(parts)
123
+
124
+
125
+ def get_action_from_llm(
126
+ client: OpenAI,
127
+ task: str,
128
+ tree: str,
129
+ url: str,
130
+ step: int,
131
+ max_steps: int,
132
+ last_error: Optional[str],
133
+ ) -> DalaalEnvAction:
134
+ """Call the LLM and parse the response into an action."""
135
+ user_prompt = build_user_prompt(task, tree, url, step, max_steps, last_error)
136
+
137
+ try:
138
+ completion = client.chat.completions.create(
139
+ model=MODEL_NAME,
140
+ messages=[
141
+ {"role": "system", "content": SYSTEM_PROMPT},
142
+ {"role": "user", "content": user_prompt},
143
+ ],
144
+ temperature=TEMPERATURE,
145
+ max_tokens=MAX_TOKENS,
146
+ stream=False,
147
+ )
148
+ text = (completion.choices[0].message.content or "").strip()
149
+ return parse_action(text)
150
+ except Exception as exc:
151
+ print(f"[DEBUG] LLM/parse error: {exc}", flush=True)
152
+ # Fallback: signal done to avoid infinite loops
153
+ return DalaalEnvAction(action_type="done")
154
+
155
+
156
+ async def main() -> None:
157
+ client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
158
+
159
+ if IMAGE_NAME:
160
+ env = await DalaalEnvEnv.from_docker_image(IMAGE_NAME)
161
+ else:
162
+ env = DalaalEnvEnv(base_url=os.getenv("DALAAL_ENV_URL", "http://localhost:8000"))
163
+ await env.connect()
164
+
165
+ rewards: List[float] = []
166
+ steps_taken = 0
167
+ score = 0.0
168
+ success = False
169
+
170
+ log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME)
171
+
172
+ try:
173
+ result = await env.reset(task=TASK_NAME)
174
+ obs = result.observation
175
+
176
+ for step in range(1, MAX_STEPS + 1):
177
+ if result.done:
178
+ break
179
+
180
+ action = get_action_from_llm(
181
+ client=client,
182
+ task=obs.task_description,
183
+ tree=obs.accessibility_tree,
184
+ url=obs.url,
185
+ step=step,
186
+ max_steps=obs.max_steps,
187
+ last_error=obs.last_action_error,
188
+ )
189
+
190
+ result = await env.step(action)
191
+ obs = result.observation
192
+
193
+ reward = result.reward or 0.0
194
+ done = result.done
195
+ error = obs.last_action_error
196
+
197
+ rewards.append(reward)
198
+ steps_taken = step
199
+
200
+ action_str = f"{action.action_type}({action.element_id or action.text or action.key or ''})"
201
+ log_step(step=step, action=action_str, reward=reward, done=done, error=error)
202
+
203
+ if done:
204
+ break
205
+
206
+ # Final reward is the last reward (which encodes success)
207
+ if rewards and rewards[-1] > 0:
208
+ score = rewards[-1]
209
+ success = True
210
+ else:
211
+ score = 0.0
212
+ success = False
213
+
214
+ score = min(max(score, 0.0), 1.0)
215
+
216
+ finally:
217
+ try:
218
+ await env.close()
219
+ except Exception as e:
220
+ print(f"[DEBUG] env.close() error: {e}", flush=True)
221
+ log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
222
+
223
+
224
+ if __name__ == "__main__":
225
+ asyncio.run(main())
mock_sites/ecommerce/index.html ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>ShopEasy - Online Store</title>
7
+ <style>
8
+ * { box-sizing: border-box; margin: 0; padding: 0; }
9
+ body { font-family: system-ui, sans-serif; background: #f5f5f5; }
10
+ header { background: #1a1a2e; color: white; padding: 16px 24px; display: flex; justify-content: space-between; align-items: center; }
11
+ header h1 { font-size: 22px; }
12
+ .cart-btn { background: #e94560; padding: 8px 16px; border-radius: 20px; color: white; border: none; cursor: pointer; font-size: 14px; }
13
+ .products { display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 20px; padding: 24px; max-width: 1000px; margin: 0 auto; }
14
+ .product-card { background: white; border-radius: 8px; overflow: hidden; box-shadow: 0 2px 8px rgba(0,0,0,0.08); }
15
+ .product-card .img-placeholder { height: 160px; background: #e0e0e0; display: flex; align-items: center; justify-content: center; font-size: 48px; }
16
+ .product-info { padding: 16px; }
17
+ .product-info h3 { margin-bottom: 8px; }
18
+ .product-info .price { color: #e94560; font-size: 20px; font-weight: bold; margin-bottom: 12px; }
19
+ .product-info button { width: 100%; padding: 10px; background: #1a1a2e; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 14px; }
20
+ .product-info button.added { background: #4CAF50; }
21
+ #checkout-page { display: none; padding: 40px; text-align: center; }
22
+ #checkout-page h2 { color: #4CAF50; margin-bottom: 16px; }
23
+ .cart-summary { display: none; background: white; padding: 20px; margin: 20px auto; max-width: 600px; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.08); }
24
+ .cart-summary h2 { margin-bottom: 16px; }
25
+ .cart-summary .cart-item { display: flex; justify-content: space-between; padding: 8px 0; border-bottom: 1px solid #eee; }
26
+ .checkout-btn { padding: 12px 24px; background: #e94560; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 16px; margin-top: 16px; }
27
+ </style>
28
+ </head>
29
+ <body>
30
+ <div id="shop-page">
31
+ <header>
32
+ <h1>ShopEasy</h1>
33
+ <button class="cart-btn" onclick="showCart()" aria-label="Shopping Cart">Cart (<span id="cart-count">0</span>)</button>
34
+ </header>
35
+
36
+ <div class="products" id="products">
37
+ <div class="product-card">
38
+ <div class="img-placeholder">🎧</div>
39
+ <div class="product-info">
40
+ <h3>Wireless Headphones</h3>
41
+ <div class="price">$79.99</div>
42
+ <button onclick="addToCart(this, 'Wireless Headphones', 79.99)" aria-label="Add to cart: Wireless Headphones">Add to Cart</button>
43
+ </div>
44
+ </div>
45
+ <div class="product-card">
46
+ <div class="img-placeholder">⌨️</div>
47
+ <div class="product-info">
48
+ <h3>Mechanical Keyboard</h3>
49
+ <div class="price">$129.99</div>
50
+ <button onclick="addToCart(this, 'Mechanical Keyboard', 129.99)" aria-label="Add to cart: Mechanical Keyboard">Add to Cart</button>
51
+ </div>
52
+ </div>
53
+ <div class="product-card">
54
+ <div class="img-placeholder">πŸ–±οΈ</div>
55
+ <div class="product-info">
56
+ <h3>Ergonomic Mouse</h3>
57
+ <div class="price">$49.99</div>
58
+ <button onclick="addToCart(this, 'Ergonomic Mouse', 49.99)" aria-label="Add to cart: Ergonomic Mouse">Add to Cart</button>
59
+ </div>
60
+ </div>
61
+ <div class="product-card">
62
+ <div class="img-placeholder">πŸ“±</div>
63
+ <div class="product-info">
64
+ <h3>Phone Stand</h3>
65
+ <div class="price">$19.99</div>
66
+ <button onclick="addToCart(this, 'Phone Stand', 19.99)" aria-label="Add to cart: Phone Stand">Add to Cart</button>
67
+ </div>
68
+ </div>
69
+ </div>
70
+
71
+ <div class="cart-summary" id="cart-summary">
72
+ <h2>Your Cart</h2>
73
+ <div id="cart-items"></div>
74
+ <button class="checkout-btn" onclick="checkout()" aria-label="Proceed to Checkout">Proceed to Checkout</button>
75
+ </div>
76
+ </div>
77
+
78
+ <div id="checkout-page">
79
+ <h2>Checkout Complete!</h2>
80
+ <p>Thank you for your order. Your items will be shipped soon.</p>
81
+ </div>
82
+
83
+ <script>
84
+ const cart = [];
85
+
86
+ function addToCart(btn, name, price) {
87
+ cart.push({ name, price });
88
+ document.getElementById('cart-count').textContent = cart.length;
89
+ btn.textContent = 'Added!';
90
+ btn.classList.add('added');
91
+ setTimeout(() => {
92
+ btn.textContent = 'Add to Cart';
93
+ btn.classList.remove('added');
94
+ }, 1500);
95
+ }
96
+
97
+ function showCart() {
98
+ const summary = document.getElementById('cart-summary');
99
+ if (cart.length === 0) return;
100
+ summary.style.display = 'block';
101
+ const itemsHtml = cart.map(item => `
102
+ <div class="cart-item">
103
+ <span>${item.name}</span>
104
+ <span>$${item.price.toFixed(2)}</span>
105
+ </div>
106
+ `).join('');
107
+ const total = cart.reduce((s, i) => s + i.price, 0);
108
+ document.getElementById('cart-items').innerHTML = itemsHtml + `
109
+ <div class="cart-item" style="font-weight:bold; border-top: 2px solid #333; margin-top: 8px; padding-top: 8px;">
110
+ <span>Total</span>
111
+ <span>$${total.toFixed(2)}</span>
112
+ </div>
113
+ `;
114
+ }
115
+
116
+ function checkout() {
117
+ document.getElementById('shop-page').style.display = 'none';
118
+ document.getElementById('checkout-page').style.display = 'block';
119
+ }
120
+ </script>
121
+ </body>
122
+ </html>
mock_sites/login_form/index.html ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Login</title>
7
+ <style>
8
+ * { box-sizing: border-box; margin: 0; padding: 0; }
9
+ body { font-family: system-ui, sans-serif; display: flex; justify-content: center; align-items: center; min-height: 100vh; background: #f0f2f5; }
10
+ .login-card { background: white; padding: 40px; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); width: 100%; max-width: 400px; }
11
+ h1 { margin-bottom: 24px; color: #333; text-align: center; }
12
+ .form-group { margin-bottom: 16px; }
13
+ label { display: block; margin-bottom: 6px; color: #555; font-weight: 500; }
14
+ input[type="text"], input[type="password"] { width: 100%; padding: 10px; border: 1px solid #ddd; border-radius: 4px; font-size: 16px; }
15
+ button[type="submit"] { width: 100%; padding: 12px; background: #1976D2; color: white; border: none; border-radius: 4px; font-size: 16px; cursor: pointer; margin-top: 8px; }
16
+ .error { color: #f44336; font-size: 14px; margin-top: 8px; display: none; }
17
+ #success-message { text-align: center; display: none; }
18
+ #success-message h2 { color: #4CAF50; margin-bottom: 10px; }
19
+ </style>
20
+ </head>
21
+ <body>
22
+ <div class="login-card">
23
+ <div id="login-form-container">
24
+ <h1>Sign In</h1>
25
+ <form onsubmit="handleLogin(event)">
26
+ <div class="form-group">
27
+ <label for="username">Username</label>
28
+ <input type="text" id="username" name="username" placeholder="Enter username" required aria-label="Username">
29
+ </div>
30
+ <div class="form-group">
31
+ <label for="password">Password</label>
32
+ <input type="password" id="password" name="password" placeholder="Enter password" required aria-label="Password">
33
+ </div>
34
+ <div class="error" id="error-msg">Invalid username or password.</div>
35
+ <button type="submit">Log In</button>
36
+ </form>
37
+ </div>
38
+ <div id="success-message">
39
+ <h2>Welcome, admin!</h2>
40
+ <p>You have successfully logged in.</p>
41
+ </div>
42
+ </div>
43
+
44
+ <script>
45
+ function handleLogin(e) {
46
+ e.preventDefault();
47
+ const user = document.getElementById('username').value;
48
+ const pass = document.getElementById('password').value;
49
+
50
+ if (user === 'admin' && pass === 'secret123') {
51
+ document.getElementById('login-form-container').style.display = 'none';
52
+ document.getElementById('success-message').style.display = 'block';
53
+ } else {
54
+ document.getElementById('error-msg').style.display = 'block';
55
+ }
56
+ }
57
+ </script>
58
+ </body>
59
+ </html>
mock_sites/registration_form/index.html ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Create Account</title>
7
+ <style>
8
+ * { box-sizing: border-box; margin: 0; padding: 0; }
9
+ body { font-family: system-ui, sans-serif; display: flex; justify-content: center; align-items: center; min-height: 100vh; background: #f0f2f5; }
10
+ .card { background: white; padding: 40px; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); width: 100%; max-width: 450px; }
11
+ h1 { margin-bottom: 24px; color: #333; text-align: center; }
12
+ .form-group { margin-bottom: 16px; }
13
+ label { display: block; margin-bottom: 6px; color: #555; font-weight: 500; }
14
+ input[type="text"], input[type="email"], input[type="tel"], select { width: 100%; padding: 10px; border: 1px solid #ddd; border-radius: 4px; font-size: 16px; }
15
+ button[type="submit"] { width: 100%; padding: 12px; background: #4CAF50; color: white; border: none; border-radius: 4px; font-size: 16px; cursor: pointer; margin-top: 8px; }
16
+ .error { color: #f44336; font-size: 13px; margin-top: 4px; display: none; }
17
+ #success-message { display: none; text-align: center; }
18
+ #success-message h2 { color: #4CAF50; margin-bottom: 12px; }
19
+ #success-message .details { background: #f5f5f5; padding: 16px; border-radius: 4px; text-align: left; margin-top: 16px; }
20
+ #success-message .details p { margin: 4px 0; }
21
+ </style>
22
+ </head>
23
+ <body>
24
+ <div class="card">
25
+ <div id="form-container">
26
+ <h1>Create Account</h1>
27
+ <form onsubmit="handleSubmit(event)">
28
+ <div class="form-group">
29
+ <label for="fullname">Full Name</label>
30
+ <input type="text" id="fullname" name="fullname" placeholder="Enter your full name" required aria-label="Full Name">
31
+ <div class="error" id="name-error">Please enter your name</div>
32
+ </div>
33
+ <div class="form-group">
34
+ <label for="email">Email Address</label>
35
+ <input type="email" id="email" name="email" placeholder="Enter your email" required aria-label="Email Address">
36
+ <div class="error" id="email-error">Please enter a valid email</div>
37
+ </div>
38
+ <div class="form-group">
39
+ <label for="phone">Phone Number</label>
40
+ <input type="tel" id="phone" name="phone" placeholder="Enter phone number" aria-label="Phone Number">
41
+ </div>
42
+ <div class="form-group">
43
+ <label for="country">Country</label>
44
+ <select id="country" name="country" required aria-label="Country">
45
+ <option value="">Select a country</option>
46
+ <option value="US">United States</option>
47
+ <option value="UK">United Kingdom</option>
48
+ <option value="CA">Canada</option>
49
+ <option value="DE">Germany</option>
50
+ <option value="IN">India</option>
51
+ <option value="JP">Japan</option>
52
+ </select>
53
+ </div>
54
+ <button type="submit">Create Account</button>
55
+ </form>
56
+ </div>
57
+ <div id="success-message">
58
+ <h2>Account Created!</h2>
59
+ <p>Your account has been successfully created.</p>
60
+ <div class="details">
61
+ <p><strong>Name:</strong> <span id="show-name"></span></p>
62
+ <p><strong>Email:</strong> <span id="show-email"></span></p>
63
+ <p><strong>Country:</strong> <span id="show-country"></span></p>
64
+ </div>
65
+ </div>
66
+ </div>
67
+
68
+ <script>
69
+ function handleSubmit(e) {
70
+ e.preventDefault();
71
+ const name = document.getElementById('fullname').value.trim();
72
+ const email = document.getElementById('email').value.trim();
73
+ const country = document.getElementById('country').value;
74
+
75
+ let valid = true;
76
+ if (!name) { document.getElementById('name-error').style.display = 'block'; valid = false; }
77
+ else { document.getElementById('name-error').style.display = 'none'; }
78
+ if (!email || !email.includes('@')) { document.getElementById('email-error').style.display = 'block'; valid = false; }
79
+ else { document.getElementById('email-error').style.display = 'none'; }
80
+
81
+ if (!valid || !country) return;
82
+
83
+ document.getElementById('show-name').textContent = name;
84
+ document.getElementById('show-email').textContent = email;
85
+ document.getElementById('show-country').textContent = document.getElementById('country').options[document.getElementById('country').selectedIndex].text;
86
+
87
+ document.getElementById('form-container').style.display = 'none';
88
+ document.getElementById('success-message').style.display = 'block';
89
+ }
90
+ </script>
91
+ </body>
92
+ </html>
mock_sites/search_engine/index.html ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>SearchIt - Web Search</title>
7
+ <style>
8
+ * { box-sizing: border-box; margin: 0; padding: 0; }
9
+ body { font-family: system-ui, sans-serif; background: #fff; }
10
+ .search-header { padding: 20px; border-bottom: 1px solid #e0e0e0; display: flex; align-items: center; gap: 16px; }
11
+ .logo { font-size: 24px; font-weight: bold; color: #1a73e8; }
12
+ .search-bar { display: flex; gap: 8px; flex: 1; max-width: 600px; }
13
+ .search-bar input { flex: 1; padding: 10px; border: 1px solid #ddd; border-radius: 20px; font-size: 16px; }
14
+ .search-bar button { padding: 10px 20px; background: #1a73e8; color: white; border: none; border-radius: 20px; cursor: pointer; }
15
+ .search-home { display: flex; flex-direction: column; align-items: center; justify-content: center; min-height: 80vh; }
16
+ .search-home .logo { font-size: 48px; margin-bottom: 24px; }
17
+ .search-home .search-bar { width: 100%; max-width: 580px; }
18
+ .search-home .search-bar input { padding: 14px 20px; }
19
+ .results-page { display: none; }
20
+ .results { padding: 20px 20px 20px 160px; max-width: 800px; }
21
+ .result-item { margin-bottom: 24px; }
22
+ .result-item a { color: #1a0dab; font-size: 18px; text-decoration: none; }
23
+ .result-item a:hover { text-decoration: underline; }
24
+ .result-item .url { color: #006621; font-size: 14px; }
25
+ .result-item .snippet { color: #545454; font-size: 14px; margin-top: 4px; }
26
+ #result-page { display: none; padding: 40px; }
27
+ #result-page h1 { margin-bottom: 16px; }
28
+ #result-page p { line-height: 1.6; color: #333; }
29
+ </style>
30
+ </head>
31
+ <body>
32
+ <div id="home-page" class="search-home">
33
+ <div class="logo">SearchIt</div>
34
+ <div class="search-bar">
35
+ <input type="text" id="search-input" placeholder="Search the web..." aria-label="Search">
36
+ <button onclick="performSearch()" aria-label="Search">Search</button>
37
+ </div>
38
+ </div>
39
+
40
+ <div id="results-container" class="results-page">
41
+ <div class="search-header">
42
+ <div class="logo">SearchIt</div>
43
+ <div class="search-bar">
44
+ <input type="text" id="search-input-2" aria-label="Search">
45
+ <button onclick="performSearch()" aria-label="Search">Search</button>
46
+ </div>
47
+ </div>
48
+ <div class="results" id="results"></div>
49
+ </div>
50
+
51
+ <div id="result-page">
52
+ <h1 id="article-title"></h1>
53
+ <p id="article-content"></p>
54
+ </div>
55
+
56
+ <script>
57
+ const articles = {
58
+ "machine learning": [
59
+ { title: "Introduction to Machine Learning - ML Guide", url: "https://mlguide.org/intro", snippet: "Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed." },
60
+ { title: "Machine Learning Course - Stanford Online", url: "https://stanford.edu/ml-course", snippet: "Free online machine learning course covering supervised learning, unsupervised learning, and best practices." },
61
+ { title: "What is Machine Learning? - TechExplained", url: "https://techexplained.com/ml", snippet: "A comprehensive guide to understanding machine learning algorithms, applications, and the future of AI." }
62
+ ],
63
+ "default": [
64
+ { title: "Welcome to the Web - Example", url: "https://example.com", snippet: "This is an example search result for your query." },
65
+ { title: "Learn More - Wikipedia", url: "https://wikipedia.org", snippet: "Wikipedia is a free online encyclopedia with articles on almost every topic." }
66
+ ]
67
+ };
68
+
69
+ function performSearch() {
70
+ const query = (document.getElementById('search-input').value || document.getElementById('search-input-2').value).trim().toLowerCase();
71
+ if (!query) return;
72
+
73
+ document.getElementById('home-page').style.display = 'none';
74
+ document.getElementById('results-container').style.display = 'block';
75
+ document.getElementById('search-input-2').value = query;
76
+
77
+ const results = articles[query] || articles["default"];
78
+ const html = results.map((r, i) => `
79
+ <div class="result-item">
80
+ <div class="url">${r.url}</div>
81
+ <a href="#" onclick="openResult('${r.title}', '${r.snippet}'); return false;">${r.title}</a>
82
+ <div class="snippet">${r.snippet}</div>
83
+ </div>
84
+ `).join('');
85
+ document.getElementById('results').innerHTML = html;
86
+ }
87
+
88
+ function openResult(title, content) {
89
+ document.getElementById('results-container').style.display = 'none';
90
+ document.getElementById('result-page').style.display = 'block';
91
+ document.getElementById('article-title').textContent = title;
92
+ document.getElementById('article-content').textContent = content;
93
+ }
94
+
95
+ document.getElementById('search-input').addEventListener('keydown', function(e) {
96
+ if (e.key === 'Enter') performSearch();
97
+ });
98
+ </script>
99
+ </body>
100
+ </html>
mock_sites/todo_app/index.html ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Todo App</title>
7
+ <style>
8
+ * { box-sizing: border-box; margin: 0; padding: 0; }
9
+ body { font-family: system-ui, sans-serif; max-width: 600px; margin: 40px auto; padding: 20px; background: #f5f5f5; }
10
+ h1 { margin-bottom: 20px; color: #333; }
11
+ .add-form { display: flex; gap: 8px; margin-bottom: 20px; }
12
+ .add-form input { flex: 1; padding: 10px; border: 1px solid #ddd; border-radius: 4px; font-size: 16px; }
13
+ .add-form button { padding: 10px 20px; background: #4CAF50; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 16px; }
14
+ .todo-list { list-style: none; }
15
+ .todo-item { display: flex; align-items: center; gap: 10px; padding: 12px; background: white; margin-bottom: 8px; border-radius: 4px; border: 1px solid #e0e0e0; }
16
+ .todo-item.completed .todo-text { text-decoration: line-through; color: #999; }
17
+ .todo-text { flex: 1; font-size: 16px; }
18
+ .todo-item button { padding: 6px 12px; background: #f44336; color: white; border: none; border-radius: 4px; cursor: pointer; }
19
+ .todo-item input[type="checkbox"] { width: 20px; height: 20px; cursor: pointer; }
20
+ </style>
21
+ </head>
22
+ <body>
23
+ <h1>My Todo List</h1>
24
+ <div class="add-form">
25
+ <input type="text" id="todo-input" placeholder="Add a new todo..." aria-label="Add a new todo">
26
+ <button id="add-btn" onclick="addTodo()">Add</button>
27
+ </div>
28
+ <ul class="todo-list" id="todo-list" role="list">
29
+ <li class="todo-item" role="listitem">
30
+ <input type="checkbox" aria-label="Complete: Walk the dog" onchange="toggleTodo(this)">
31
+ <span class="todo-text">Walk the dog</span>
32
+ <button onclick="deleteTodo(this)" aria-label="Delete: Walk the dog">Delete</button>
33
+ </li>
34
+ <li class="todo-item" role="listitem">
35
+ <input type="checkbox" aria-label="Complete: Read a book" onchange="toggleTodo(this)">
36
+ <span class="todo-text">Read a book</span>
37
+ <button onclick="deleteTodo(this)" aria-label="Delete: Read a book">Delete</button>
38
+ </li>
39
+ </ul>
40
+
41
+ <script>
42
+ function addTodo() {
43
+ const input = document.getElementById('todo-input');
44
+ const text = input.value.trim();
45
+ if (!text) return;
46
+
47
+ const li = document.createElement('li');
48
+ li.className = 'todo-item';
49
+ li.setAttribute('role', 'listitem');
50
+ li.innerHTML = `
51
+ <input type="checkbox" aria-label="Complete: ${text}" onchange="toggleTodo(this)">
52
+ <span class="todo-text">${text}</span>
53
+ <button onclick="deleteTodo(this)" aria-label="Delete: ${text}">Delete</button>
54
+ `;
55
+ document.getElementById('todo-list').appendChild(li);
56
+ input.value = '';
57
+ }
58
+
59
+ function toggleTodo(checkbox) {
60
+ const item = checkbox.closest('.todo-item');
61
+ item.classList.toggle('completed', checkbox.checked);
62
+ }
63
+
64
+ function deleteTodo(btn) {
65
+ btn.closest('.todo-item').remove();
66
+ }
67
+
68
+ document.getElementById('todo-input').addEventListener('keydown', function(e) {
69
+ if (e.key === 'Enter') addTodo();
70
+ });
71
+ </script>
72
+ </body>
73
+ </html>
models.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Data models for the Dalaal Browser-Use Environment.
3
+
4
+ The dalaal_env environment provides a browser interaction environment
5
+ where agents learn to navigate and interact with web pages using
6
+ an accessibility tree representation.
7
+ """
8
+
9
+ from typing import Literal, Optional
10
+
11
+ from openenv.core.env_server.types import Action, Observation
12
+ from pydantic import Field
13
+
14
+
15
+ class DalaalEnvAction(Action):
16
+ """Action for the Browser-Use environment.
17
+
18
+ The agent interacts with the browser by specifying an action type
19
+ and relevant parameters. Elements are referenced by their ID in
20
+ the accessibility tree.
21
+ """
22
+
23
+ action_type: Literal[
24
+ "click",
25
+ "type",
26
+ "select_option",
27
+ "press_key",
28
+ "scroll",
29
+ "go_back",
30
+ "done",
31
+ ] = Field(..., description="Type of browser action to perform")
32
+
33
+ element_id: Optional[int] = Field(
34
+ default=None,
35
+ description="ID of the element in the accessibility tree (for click, type, select_option)",
36
+ )
37
+ text: Optional[str] = Field(
38
+ default=None,
39
+ description="Text to type or option to select",
40
+ )
41
+ key: Optional[str] = Field(
42
+ default=None,
43
+ description="Key to press (e.g. 'Enter', 'Tab', 'Escape')",
44
+ )
45
+ direction: Optional[Literal["up", "down"]] = Field(
46
+ default=None,
47
+ description="Scroll direction",
48
+ )
49
+
50
+
51
+ class DalaalEnvObservation(Observation):
52
+ """Observation from the Browser-Use environment.
53
+
54
+ Contains the accessibility tree of the current page, which provides
55
+ a structured text representation of all interactive elements with
56
+ assigned IDs that the agent can reference in actions.
57
+ """
58
+
59
+ url: str = Field(default="", description="Current page URL")
60
+ title: str = Field(default="", description="Current page title")
61
+ accessibility_tree: str = Field(
62
+ default="", description="Text accessibility tree with element IDs"
63
+ )
64
+ task_description: str = Field(
65
+ default="", description="Natural language description of the current task"
66
+ )
67
+ last_action_error: Optional[str] = Field(
68
+ default=None, description="Error message from last action, if any"
69
+ )
70
+ step_count: int = Field(default=0, description="Current step number")
71
+ max_steps: int = Field(default=20, description="Maximum steps for this task")
openenv.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ spec_version: 1
2
+ name: dalaal_env
3
+ type: space
4
+ runtime: fastapi
5
+ app: server.app:app
6
+ port: 8000
pyproject.toml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=45", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "openenv-dalaal-env"
7
+ version = "0.1.0"
8
+ description = "Browser-Use RL environment where agents learn to interact with web pages through accessibility tree observations"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ dependencies = [
12
+ "openenv-core[core]>=0.2.2",
13
+ "playwright>=1.40.0",
14
+ "fastapi>=0.115.0",
15
+ "uvicorn>=0.24.0",
16
+ "openai>=2.30.0",
17
+ "huggingface-hub>=1.9.0",
18
+ ]
19
+
20
+ [project.optional-dependencies]
21
+ dev = [
22
+ "pytest>=8.0.0",
23
+ "pytest-asyncio>=0.23.0",
24
+ ]
25
+
26
+ [project.scripts]
27
+ server = "dalaal_env.server.app:main"
28
+
29
+ [tool.setuptools]
30
+ include-package-data = true
31
+ packages = ["dalaal_env", "dalaal_env.server"]
32
+ package-dir = { "dalaal_env" = ".", "dalaal_env.server" = "server" }
33
+
34
+ [tool.setuptools.package-data]
35
+ "dalaal_env" = ["mock_sites/**/*.html"]
server/Dockerfile ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
2
+ FROM ${BASE_IMAGE} AS builder
3
+
4
+ WORKDIR /app
5
+
6
+ RUN apt-get update && \
7
+ apt-get install -y --no-install-recommends git && \
8
+ rm -rf /var/lib/apt/lists/*
9
+
10
+ ARG BUILD_MODE=in-repo
11
+ ARG ENV_NAME=dalaal_env
12
+
13
+ COPY . /app/env
14
+
15
+ WORKDIR /app/env
16
+
17
+ RUN if ! command -v uv >/dev/null 2>&1; then \
18
+ curl -LsSf https://astral.sh/uv/install.sh | sh && \
19
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
20
+ mv /root/.local/bin/uvx /usr/local/bin/uvx; \
21
+ fi
22
+
23
+ RUN --mount=type=cache,target=/root/.cache/uv \
24
+ if [ -f uv.lock ]; then \
25
+ uv sync --frozen --no-install-project --no-editable; \
26
+ else \
27
+ uv sync --no-install-project --no-editable; \
28
+ fi
29
+
30
+ RUN --mount=type=cache,target=/root/.cache/uv \
31
+ if [ -f uv.lock ]; then \
32
+ uv sync --frozen --no-editable; \
33
+ else \
34
+ uv sync --no-editable; \
35
+ fi
36
+
37
+ # Install Playwright and Chromium browser
38
+ RUN /app/env/.venv/bin/playwright install chromium && \
39
+ /app/env/.venv/bin/playwright install-deps chromium
40
+
41
+ # Final runtime stage
42
+ FROM ${BASE_IMAGE}
43
+
44
+ WORKDIR /app
45
+
46
+ # Install Chromium runtime dependencies
47
+ RUN apt-get update && \
48
+ apt-get install -y --no-install-recommends \
49
+ libnss3 libnspr4 libatk1.0-0 libatk-bridge2.0-0 \
50
+ libcups2 libdrm2 libdbus-1-3 libxkbcommon0 \
51
+ libatspi2.0-0 libxcomposite1 libxdamage1 libxfixes3 \
52
+ libxrandr2 libgbm1 libpango-1.0-0 libcairo2 libasound2 \
53
+ libwayland-client0 fonts-noto-color-emoji curl && \
54
+ rm -rf /var/lib/apt/lists/*
55
+
56
+ # Copy the virtual environment from builder (includes Playwright browsers)
57
+ COPY --from=builder /app/env/.venv /app/.venv
58
+ COPY --from=builder /root/.cache/ms-playwright /root/.cache/ms-playwright
59
+
60
+ # Copy the environment code
61
+ COPY --from=builder /app/env /app/env
62
+
63
+ ENV PATH="/app/.venv/bin:$PATH"
64
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
65
+
66
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \
67
+ CMD curl -f http://localhost:8000/health || exit 1
68
+
69
+ CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
server/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """Dalaal Browser-Use environment server components."""
2
+
3
+ from .dalaal_env_environment import DalaalEnvEnvironment
4
+
5
+ __all__ = ["DalaalEnvEnvironment"]
server/accessibility.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Accessibility tree extraction from Playwright pages using Chrome DevTools Protocol.
3
+
4
+ Converts the browser DOM into a numbered text representation that LLM agents
5
+ can reason about and reference by element ID.
6
+
7
+ Example output:
8
+ [1] heading "My Todo List"
9
+ [2] textbox "Add a new todo..." value=""
10
+ [3] button "Add"
11
+ [4] checkbox "Buy groceries" checked=false
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from dataclasses import dataclass, field
17
+ from typing import Optional
18
+
19
+ from playwright.async_api import Page
20
+
21
+
22
+ SKIP_ROLES = frozenset({
23
+ "none", "generic", "RootWebArea", "LineBreak",
24
+ "InlineTextBox", "StaticText", "paragraph",
25
+ "MenuListPopup", "group",
26
+ })
27
+
28
+ INTERACTIVE_ROLES = frozenset({
29
+ "button", "link", "textbox", "checkbox", "radio",
30
+ "combobox", "searchbox", "option", "tab", "menuitem",
31
+ "switch", "slider", "spinbutton",
32
+ })
33
+
34
+
35
+ @dataclass
36
+ class AccessibilityNode:
37
+ """A node in the parsed accessibility tree."""
38
+ id: int
39
+ role: str
40
+ name: str
41
+ value: Optional[str] = None
42
+ checked: Optional[str] = None
43
+ selected: Optional[bool] = None
44
+ expanded: Optional[bool] = None
45
+ disabled: Optional[bool] = None
46
+ focused: Optional[bool] = None
47
+ level: Optional[int] = None
48
+
49
+
50
+ class AccessibilityTree:
51
+ """Manages accessibility tree extraction and element ID mapping."""
52
+
53
+ def __init__(self):
54
+ self._nodes: dict[int, AccessibilityNode] = {}
55
+ self._counter = 0
56
+
57
+ def clear(self):
58
+ self._nodes.clear()
59
+ self._counter = 0
60
+
61
+ def get_node(self, element_id: int) -> Optional[AccessibilityNode]:
62
+ return self._nodes.get(element_id)
63
+
64
+ async def extract(self, page: Page) -> str:
65
+ """Extract accessibility tree from page via CDP and return text representation."""
66
+ self.clear()
67
+
68
+ cdp = await page.context.new_cdp_session(page)
69
+ try:
70
+ result = await cdp.send("Accessibility.getFullAXTree")
71
+ finally:
72
+ await cdp.detach()
73
+
74
+ raw_nodes = result.get("nodes", [])
75
+ lines = []
76
+
77
+ for raw in raw_nodes:
78
+ role = raw.get("role", {}).get("value", "")
79
+ if role in SKIP_ROLES:
80
+ continue
81
+
82
+ name = raw.get("name", {}).get("value", "")
83
+ props = {}
84
+ for p in raw.get("properties", []):
85
+ val = p.get("value", {})
86
+ if "value" in val:
87
+ props[p["name"]] = val["value"]
88
+
89
+ # Skip nodes with no name and non-interactive roles
90
+ if not name and role not in INTERACTIVE_ROLES:
91
+ continue
92
+
93
+ self._counter += 1
94
+ node = AccessibilityNode(
95
+ id=self._counter,
96
+ role=role,
97
+ name=name,
98
+ value=props.get("value"),
99
+ checked=props.get("checked"),
100
+ selected=props.get("selected"),
101
+ expanded=props.get("expanded"),
102
+ disabled=props.get("disabled"),
103
+ focused=props.get("focused"),
104
+ level=props.get("level"),
105
+ )
106
+ self._nodes[node.id] = node
107
+ lines.append(self._render_node(node))
108
+
109
+ return "\n".join(lines) if lines else "[empty page]"
110
+
111
+ def _render_node(self, node: AccessibilityNode) -> str:
112
+ """Render a single node as text."""
113
+ parts = [f"[{node.id}] {node.role}"]
114
+
115
+ if node.name:
116
+ parts.append(f'"{node.name}"')
117
+ if node.value is not None:
118
+ parts.append(f'value="{node.value}"')
119
+ if node.checked is not None:
120
+ parts.append(f"checked={node.checked}")
121
+ if node.selected is not None:
122
+ parts.append(f"selected={str(node.selected).lower()}")
123
+ if node.expanded is not None:
124
+ parts.append(f"expanded={str(node.expanded).lower()}")
125
+ if node.disabled is not None and node.disabled:
126
+ parts.append("disabled")
127
+ if node.focused is not None and node.focused:
128
+ parts.append("focused")
129
+ if node.level is not None:
130
+ parts.append(f"level={node.level}")
131
+
132
+ return " ".join(parts)
server/app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI application for the Dalaal Browser-Use Environment.
3
+
4
+ Endpoints:
5
+ - POST /reset: Reset the environment (pass task name in body)
6
+ - POST /step: Execute a browser action
7
+ - GET /state: Get current environment state
8
+ - GET /schema: Get action/observation schemas
9
+ - WS /ws: WebSocket endpoint for persistent sessions
10
+ """
11
+
12
+ try:
13
+ from openenv.core.env_server.http_server import create_app
14
+ except Exception as e:
15
+ raise ImportError(
16
+ "openenv is required. Install with: uv sync"
17
+ ) from e
18
+
19
+ try:
20
+ from ..models import DalaalEnvAction, DalaalEnvObservation
21
+ from .dalaal_env_environment import DalaalEnvEnvironment
22
+ except (ImportError, SystemError):
23
+ try:
24
+ from models import DalaalEnvAction, DalaalEnvObservation
25
+ from server.dalaal_env_environment import DalaalEnvEnvironment
26
+ except ImportError:
27
+ from dalaal_env.models import DalaalEnvAction, DalaalEnvObservation
28
+ from dalaal_env.server.dalaal_env_environment import DalaalEnvEnvironment
29
+
30
+
31
+ app = create_app(
32
+ DalaalEnvEnvironment,
33
+ DalaalEnvAction,
34
+ DalaalEnvObservation,
35
+ env_name="dalaal_env",
36
+ max_concurrent_envs=1,
37
+ )
38
+
39
+
40
+ def main(host: str = "0.0.0.0", port: int = 8000):
41
+ """Entry point for direct execution."""
42
+ import uvicorn
43
+
44
+ uvicorn.run(app, host=host, port=port)
45
+
46
+
47
+ if __name__ == "__main__":
48
+ main()
server/dalaal_env_environment.py ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Dalaal Browser-Use Environment Implementation.
3
+
4
+ An RL environment where agents interact with web pages through
5
+ an accessibility-tree interface, learning to perform browser tasks
6
+ like form filling, navigation, and multi-step workflows.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import asyncio
12
+ import os
13
+ from typing import Optional
14
+ from uuid import uuid4
15
+
16
+ from openenv.core.env_server.interfaces import Environment
17
+ from openenv.core.env_server.types import State
18
+ from playwright.async_api import async_playwright, Browser, BrowserContext, Page
19
+
20
+ try:
21
+ from ..models import DalaalEnvAction, DalaalEnvObservation
22
+ except (ImportError, SystemError):
23
+ try:
24
+ from models import DalaalEnvAction, DalaalEnvObservation
25
+ except ImportError:
26
+ from dalaal_env.models import DalaalEnvAction, DalaalEnvObservation
27
+
28
+ from .accessibility import AccessibilityTree
29
+ from .tasks import Task, get_task, list_tasks, get_mock_sites_dir
30
+
31
+ # Step penalty to encourage efficiency
32
+ STEP_PENALTY = -0.01
33
+
34
+
35
+ class DalaalEnvEnvironment(Environment):
36
+ """
37
+ Browser-Use RL environment powered by Playwright.
38
+
39
+ The agent observes the page through an accessibility tree and takes
40
+ discrete actions (click, type, scroll, etc.) to complete tasks.
41
+
42
+ Each episode presents a task on a bundled mock website. The agent
43
+ receives +1.0 reward for task success (minus step penalties) and
44
+ 0.0 for failure/timeout.
45
+ """
46
+
47
+ SUPPORTS_CONCURRENT_SESSIONS: bool = True
48
+
49
+ def __init__(self):
50
+ self._state = State(episode_id=str(uuid4()), step_count=0)
51
+ self._playwright = None
52
+ self._browser: Optional[Browser] = None
53
+ self._context: Optional[BrowserContext] = None
54
+ self._page: Optional[Page] = None
55
+ self._a11y = AccessibilityTree()
56
+ self._task: Optional[Task] = None
57
+ self._last_error: Optional[str] = None
58
+ self._total_reward: float = 0.0
59
+ self._mock_sites_dir = get_mock_sites_dir()
60
+
61
+ async def _ensure_browser(self):
62
+ """Launch browser if not already running."""
63
+ if self._browser is None:
64
+ self._playwright = await async_playwright().start()
65
+ self._browser = await self._playwright.chromium.launch(
66
+ headless=True,
67
+ args=["--no-sandbox", "--disable-dev-shm-usage"],
68
+ )
69
+
70
+ async def _new_page(self) -> Page:
71
+ """Create a fresh browser context and page."""
72
+ if self._context:
73
+ await self._context.close()
74
+ self._context = await self._browser.new_context(
75
+ viewport={"width": 1280, "height": 720},
76
+ )
77
+ self._page = await self._context.new_page()
78
+ return self._page
79
+
80
+ async def _build_observation(self, done: bool = False, reward: float = 0.0) -> DalaalEnvObservation:
81
+ """Build observation from current page state."""
82
+ tree_text = await self._a11y.extract(self._page)
83
+ url = self._page.url if self._page else ""
84
+ title = await self._page.title() if self._page else ""
85
+
86
+ return DalaalEnvObservation(
87
+ url=url,
88
+ title=title,
89
+ accessibility_tree=tree_text,
90
+ task_description=self._task.description if self._task else "",
91
+ last_action_error=self._last_error,
92
+ step_count=self._state.step_count,
93
+ max_steps=self._task.max_steps if self._task else 20,
94
+ done=done,
95
+ reward=reward,
96
+ )
97
+
98
+ async def reset_async(self, seed=None, episode_id=None, **kwargs) -> DalaalEnvObservation:
99
+ """Reset environment with a new task.
100
+
101
+ Args:
102
+ seed: Random seed (unused currently)
103
+ episode_id: Custom episode ID
104
+ **kwargs: Must include 'task' with a valid task ID.
105
+ Defaults to 'todo_add' if not specified.
106
+ """
107
+ task_id = kwargs.get("task", "todo_add")
108
+ self._task = get_task(task_id)
109
+ self._state = State(
110
+ episode_id=episode_id or str(uuid4()),
111
+ step_count=0,
112
+ )
113
+ self._last_error = None
114
+ self._total_reward = 0.0
115
+
116
+ await self._ensure_browser()
117
+ page = await self._new_page()
118
+
119
+ # Load the mock site
120
+ site_path = os.path.join(self._mock_sites_dir, self._task.site_file)
121
+ await page.goto(f"file://{site_path}")
122
+ await page.wait_for_load_state("domcontentloaded")
123
+
124
+ return await self._build_observation()
125
+
126
+ def reset(self, seed=None, episode_id=None, **kwargs) -> DalaalEnvObservation:
127
+ """Sync reset - delegates to async version."""
128
+ return asyncio.get_event_loop().run_until_complete(
129
+ self.reset_async(seed=seed, episode_id=episode_id, **kwargs)
130
+ )
131
+
132
+ async def step_async(self, action: DalaalEnvAction, timeout_s=30, **kwargs) -> DalaalEnvObservation:
133
+ """Execute a browser action and return the new observation."""
134
+ self._state.step_count += 1
135
+ self._last_error = None
136
+
137
+ try:
138
+ await self._execute_action(action)
139
+ except Exception as e:
140
+ self._last_error = str(e)
141
+
142
+ # Small wait for page to settle after action
143
+ await asyncio.sleep(0.3)
144
+
145
+ # Check if agent signaled done
146
+ if action.action_type == "done":
147
+ success = await self._check_success()
148
+ reward = (1.0 + self._total_reward) if success else 0.0
149
+ reward = max(0.0, min(1.0, reward))
150
+ return await self._build_observation(done=True, reward=reward)
151
+
152
+ # Check step limit
153
+ if self._state.step_count >= self._task.max_steps:
154
+ success = await self._check_success()
155
+ reward = (1.0 + self._total_reward) if success else 0.0
156
+ reward = max(0.0, min(1.0, reward))
157
+ return await self._build_observation(done=True, reward=reward)
158
+
159
+ # Normal step: small penalty
160
+ self._total_reward += STEP_PENALTY
161
+ return await self._build_observation(done=False, reward=STEP_PENALTY)
162
+
163
+ def step(self, action: DalaalEnvAction, timeout_s=30, **kwargs) -> DalaalEnvObservation:
164
+ """Sync step - delegates to async version."""
165
+ return asyncio.get_event_loop().run_until_complete(
166
+ self.step_async(action, timeout_s=timeout_s, **kwargs)
167
+ )
168
+
169
+ async def _execute_action(self, action: DalaalEnvAction):
170
+ """Execute the given action on the browser page."""
171
+ page = self._page
172
+ if page is None:
173
+ raise RuntimeError("No page open. Call reset() first.")
174
+
175
+ if action.action_type == "click":
176
+ await self._action_click(page, action)
177
+ elif action.action_type == "type":
178
+ await self._action_type(page, action)
179
+ elif action.action_type == "select_option":
180
+ await self._action_select(page, action)
181
+ elif action.action_type == "press_key":
182
+ await self._action_press_key(page, action)
183
+ elif action.action_type == "scroll":
184
+ await self._action_scroll(page, action)
185
+ elif action.action_type == "go_back":
186
+ await page.go_back()
187
+ elif action.action_type == "done":
188
+ pass # handled in step_async
189
+
190
+ async def _action_click(self, page: Page, action: DalaalEnvAction):
191
+ if action.element_id is None:
192
+ raise ValueError("click requires element_id")
193
+ node = self._a11y.get_node(action.element_id)
194
+ if node is None:
195
+ raise ValueError(f"No element with id={action.element_id}")
196
+ locator = self._get_locator(page, node)
197
+ await locator.click(timeout=5000)
198
+
199
+ async def _action_type(self, page: Page, action: DalaalEnvAction):
200
+ if action.element_id is None:
201
+ raise ValueError("type requires element_id")
202
+ if action.text is None:
203
+ raise ValueError("type requires text")
204
+ node = self._a11y.get_node(action.element_id)
205
+ if node is None:
206
+ raise ValueError(f"No element with id={action.element_id}")
207
+ locator = self._get_locator(page, node)
208
+ await locator.click(timeout=5000)
209
+ await locator.fill(action.text, timeout=5000)
210
+
211
+ async def _action_select(self, page: Page, action: DalaalEnvAction):
212
+ if action.element_id is None:
213
+ raise ValueError("select_option requires element_id")
214
+ if action.text is None:
215
+ raise ValueError("select_option requires text")
216
+ node = self._a11y.get_node(action.element_id)
217
+ if node is None:
218
+ raise ValueError(f"No element with id={action.element_id}")
219
+ locator = self._get_locator(page, node)
220
+ await locator.select_option(label=action.text, timeout=5000)
221
+
222
+ async def _action_press_key(self, page: Page, action: DalaalEnvAction):
223
+ key = action.key or "Enter"
224
+ await page.keyboard.press(key)
225
+
226
+ async def _action_scroll(self, page: Page, action: DalaalEnvAction):
227
+ direction = action.direction or "down"
228
+ delta = -300 if direction == "up" else 300
229
+ await page.mouse.wheel(0, delta)
230
+
231
+ def _get_locator(self, page: Page, node):
232
+ """Get a Playwright locator for an accessibility tree node."""
233
+ role = node.role
234
+ name = node.name
235
+
236
+ # Map accessibility roles to Playwright's get_by_role
237
+ role_map = {
238
+ "button": "button",
239
+ "link": "link",
240
+ "textbox": "textbox",
241
+ "checkbox": "checkbox",
242
+ "radio": "radio",
243
+ "combobox": "combobox",
244
+ "heading": "heading",
245
+ "listitem": "listitem",
246
+ "option": "option",
247
+ "tab": "tab",
248
+ "menuitem": "menuitem",
249
+ "searchbox": "searchbox",
250
+ }
251
+
252
+ playwright_role = role_map.get(role)
253
+ if playwright_role and name:
254
+ return page.get_by_role(playwright_role, name=name)
255
+ elif playwright_role:
256
+ return page.get_by_role(playwright_role)
257
+ elif name:
258
+ return page.get_by_text(name, exact=True)
259
+ else:
260
+ raise ValueError(f"Cannot locate element: role={role}, name={name}")
261
+
262
+ async def _check_success(self) -> bool:
263
+ """Check if the current task's success criteria are met."""
264
+ if self._task is None or self._page is None:
265
+ return False
266
+ try:
267
+ result = await self._page.evaluate(self._task.success_check_js)
268
+ return bool(result)
269
+ except Exception:
270
+ return False
271
+
272
+ async def close(self):
273
+ """Clean up browser resources."""
274
+ if self._context:
275
+ await self._context.close()
276
+ self._context = None
277
+ if self._browser:
278
+ await self._browser.close()
279
+ self._browser = None
280
+ if self._playwright:
281
+ await self._playwright.stop()
282
+ self._playwright = None
283
+
284
+ @property
285
+ def state(self) -> State:
286
+ return self._state
server/requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ openenv[core]>=0.2.0
2
+ fastapi>=0.115.0
3
+ uvicorn>=0.24.0
4
+ playwright>=1.40.0
server/tasks.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Task registry for the Dalaal Browser-Use Environment.
3
+
4
+ Each task defines a goal, a mock site to load, and JavaScript-based
5
+ success criteria that are evaluated in the browser context.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ from dataclasses import dataclass
12
+
13
+
14
+ @dataclass
15
+ class Task:
16
+ """A browser task with success criteria."""
17
+
18
+ id: str
19
+ description: str
20
+ site_file: str # relative path from mock_sites/ to HTML file
21
+ max_steps: int
22
+ success_check_js: str # JS expression returning true/false
23
+
24
+
25
+ # Resolve the mock_sites directory relative to this file
26
+ _MOCK_SITES_DIR = os.path.join(
27
+ os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
28
+ "mock_sites",
29
+ )
30
+
31
+
32
+ def get_mock_sites_dir() -> str:
33
+ return _MOCK_SITES_DIR
34
+
35
+
36
+ TASKS: dict[str, Task] = {}
37
+
38
+
39
+ def _register(task: Task):
40
+ TASKS[task.id] = task
41
+
42
+
43
+ # ── Todo App Tasks ──────────────────────────────────────────────────
44
+
45
+ _register(Task(
46
+ id="todo_add",
47
+ description='Add a new todo item called "Buy milk" to the todo list.',
48
+ site_file="todo_app/index.html",
49
+ max_steps=10,
50
+ success_check_js="""
51
+ (() => {
52
+ const items = document.querySelectorAll('.todo-text');
53
+ return Array.from(items).some(el => el.textContent.trim().toLowerCase() === 'buy milk');
54
+ })()
55
+ """,
56
+ ))
57
+
58
+ _register(Task(
59
+ id="todo_add_and_complete",
60
+ description='Add a todo item called "Buy milk" and mark it as completed.',
61
+ site_file="todo_app/index.html",
62
+ max_steps=15,
63
+ success_check_js="""
64
+ (() => {
65
+ const items = document.querySelectorAll('.todo-item.completed');
66
+ return Array.from(items).some(el => el.querySelector('.todo-text')?.textContent.trim().toLowerCase() === 'buy milk');
67
+ })()
68
+ """,
69
+ ))
70
+
71
+ # ── Login Form Tasks ────────────────────────────────────────────────
72
+
73
+ _register(Task(
74
+ id="login",
75
+ description='Log in with username "admin" and password "secret123".',
76
+ site_file="login_form/index.html",
77
+ max_steps=10,
78
+ success_check_js="""
79
+ document.getElementById('success-message') !== null &&
80
+ document.getElementById('success-message').style.display !== 'none'
81
+ """,
82
+ ))
83
+
84
+ # ── Search Engine Tasks ─────────────────────────────────────────────
85
+
86
+ _register(Task(
87
+ id="search_and_click",
88
+ description='Search for "machine learning" and click on the first result link.',
89
+ site_file="search_engine/index.html",
90
+ max_steps=10,
91
+ success_check_js="""
92
+ document.getElementById('result-page') !== null &&
93
+ document.getElementById('result-page').style.display !== 'none'
94
+ """,
95
+ ))
96
+
97
+ # ── E-commerce Tasks ────────────────────────────────────────────────
98
+
99
+ _register(Task(
100
+ id="add_to_cart",
101
+ description='Add the "Wireless Headphones" product to your shopping cart.',
102
+ site_file="ecommerce/index.html",
103
+ max_steps=10,
104
+ success_check_js="""
105
+ (() => {
106
+ const cartCount = document.getElementById('cart-count');
107
+ return cartCount && parseInt(cartCount.textContent) > 0;
108
+ })()
109
+ """,
110
+ ))
111
+
112
+ _register(Task(
113
+ id="add_to_cart_and_checkout",
114
+ description='Add the "Wireless Headphones" to your cart and proceed to checkout.',
115
+ site_file="ecommerce/index.html",
116
+ max_steps=15,
117
+ success_check_js="""
118
+ document.getElementById('checkout-page') !== null &&
119
+ document.getElementById('checkout-page').style.display !== 'none'
120
+ """,
121
+ ))
122
+
123
+ # ── Registration Form Tasks ─────────────────────────────────────────
124
+
125
+ _register(Task(
126
+ id="fill_registration",
127
+ description='Fill the registration form with: Name "John Doe", Email "john@example.com", select country "United States", and submit.',
128
+ site_file="registration_form/index.html",
129
+ max_steps=15,
130
+ success_check_js="""
131
+ document.getElementById('success-message') !== null &&
132
+ document.getElementById('success-message').style.display !== 'none'
133
+ """,
134
+ ))
135
+
136
+
137
+ def get_task(task_id: str) -> Task:
138
+ """Get a task by ID. Raises KeyError if not found."""
139
+ if task_id not in TASKS:
140
+ available = ", ".join(sorted(TASKS.keys()))
141
+ raise KeyError(f"Unknown task '{task_id}'. Available tasks: {available}")
142
+ return TASKS[task_id]
143
+
144
+
145
+ def list_tasks() -> list[str]:
146
+ """Return all available task IDs."""
147
+ return sorted(TASKS.keys())
uv.lock ADDED
The diff for this file is too large to render. See raw diff