`_.
-
-.. note::
-
- This project is under active development.
-
-Contents
---------
-
-.. toctree::
- :maxdepth: 2
-
- usage
- api
- tutorials
diff --git a/BrowserGym/docs/src/tutorials.rst b/BrowserGym/docs/src/tutorials.rst
deleted file mode 100644
index 01f5bcdb67769cd04b4d1e7c01e11fa9d0741bf6..0000000000000000000000000000000000000000
--- a/BrowserGym/docs/src/tutorials.rst
+++ /dev/null
@@ -1,26 +0,0 @@
-Tutorials
-=========
-
-This section provides tutorials to help build new environments and tasks.
-
-.. grid:: 2
- :gutter: 2
-
- .. grid-item-card:: Walkthrough
- :link: examples/walkthrough.html
-
- :bdg-primary:`Getting started`
-
- .. grid-item-card:: Create a custom task
- :link: examples/create_custom_task.html
-
- :bdg-primary:`Custom task`
-
-
-
-.. toctree::
- :maxdepth: 1
- :hidden:
-
- examples/walkthrough.rst
- examples/create_custom_task.rst
diff --git a/BrowserGym/docs/src/usage.rst b/BrowserGym/docs/src/usage.rst
deleted file mode 100644
index 038ca6d93508754bea95f4f7d8c7ddabbfc8e3f6..0000000000000000000000000000000000000000
--- a/BrowserGym/docs/src/usage.rst
+++ /dev/null
@@ -1,42 +0,0 @@
-Usage
-=====
-
-.. _installation:
-
-Installation
-------------
-
-To use BrowserGym, first install it using pip:
-
-.. code-block:: console
-
- pip install browsergym
-
-Then, a required step is to setup playwright by running
-
-.. code-block:: console
-
- playwright install chromium
-
-Example code
-------------
-
-Boilerplate code to run an agent on an interactive, open-ended task:
-
-.. code-block:: python
-
- import gymnasium as gym
- import browsergym.core # register the openended task as a gym environment
-
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": "https://www.google.com/"}, # starting URL
- wait_for_user_message=True, # wait for a user message after each agent message sent to the chat
- )
-
- obs, info = env.reset()
- done = False
- while not done:
- action = ... # implement your agent here
- obs, reward, terminated, truncated, info = env.step(action)
- done = terminated or truncated
diff --git a/BrowserGym/pyproject.toml b/BrowserGym/pyproject.toml
deleted file mode 100644
index 68b016a511ba078a6a0614b47af3d6026d04f83c..0000000000000000000000000000000000000000
--- a/BrowserGym/pyproject.toml
+++ /dev/null
@@ -1,33 +0,0 @@
-[project]
-name = "browsergym-meta"
-description = "BrowserGym: a gym environment for web task automation in the Chromium browser"
-dynamic = ["version"]
-[tool.setuptools]
-packages = [] # meta distribution, packages are included as dependencies
-[tool.black]
-line-length = 100
-include = '\.pyi?$'
-exclude = '''
-/(
- \.eggs
- | \.git
- | \.hg
- | \.mypy_cache
- | \.nox
- | \.tox
- | \.venv
- | _build
- | buck-out
- | build
- | dist
-)/
-'''
-
-[tool.pytest.ini_options]
-filterwarnings = [
- 'ignore::UserWarning:gymnasium.*:', # too many "The obs is not within the observation space." warnings.
-]
-markers = [
- "slow: marks tests as slow (deselect with '-m \"not slow\"')",
- "serial: mark test to be run sequantially (deselect with '-m \"not serial\"')"
-]
diff --git a/BrowserGym/tests/__init__.py b/BrowserGym/tests/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/BrowserGym/tests/assistantbench/__init__.py b/BrowserGym/tests/assistantbench/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/BrowserGym/tests/assistantbench/data/fallback_gpt4_seeplanact_predictions.jsonl b/BrowserGym/tests/assistantbench/data/fallback_gpt4_seeplanact_predictions.jsonl
deleted file mode 100644
index 04c1bf8931e9aca6493316f7f4703ba2f4c95d92..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/assistantbench/data/fallback_gpt4_seeplanact_predictions.jsonl
+++ /dev/null
@@ -1,33 +0,0 @@
-{"id": "2aa5dd83fbcd0dce9a3dd4592106e5b5edf738008d932e357d477bba80e59ccf", "answer": "\\( \\frac{2}{7} \\times 100 \\approx 28.57 \\)", "gold_answer": "14.2", "score": 0, "has_ans": 1.0}
-{"id": "2ddae3b7a208e3c25f14d82d7a1faaaa1832fbf950b4dac345e755c4c361f294", "answer": 800000.0, "gold_answer": "1010000", "score": 0.7669061178326222, "has_ans": 1.0}
-{"id": "4e615af6f0348597b4133cc1ec5418bb3f35328e3d95e23a275027cee97b5e09", "answer": [], "gold_answer": "Adrenalinpark K\u00f6ln", "score": 0.0, "has_ans": 0}
-{"id": "c7afe00869f98cf363fd83677ac41757ed5e57f03eacc3d1304feb0a92084bd1", "answer": "Knives Out", "gold_answer": "Glass Onion: A Knives Out Mystery", "score": 0.5714285714285715, "has_ans": 1.0}
-{"id": "57d9dc6935e8a40b02e7f8ec81768fe70e68a0c05f6866927c9fda38db38a486", "answer": "-$108", "gold_answer": "45", "score": 0, "has_ans": 1.0}
-{"id": "748899d9d70c09beb3bd48ac8a3658bdcfd2f9114fe6dc4c4b8d2f9541ef4607", "answer": [{"sender": "dhl", "price (usd)": "50"}, {"sender": "fedex", "price (usd)": "60"}], "gold_answer": "{\"sender\": \"DHL\", \"price (usd)\": \"55-70\"}\n{\"sender\": \"Fedex\", \"price (usd)\": \"62-95\"}\n{\"sender\": \"USPS\", \"price (usd)\": \"73.4-78.15\"}", "score": 0.3333333333333333, "has_ans": 1.0}
-{"id": "9e31099fffa6a3891c94934fd4fc2f3f522d51c1904ff3561f3a10e4bf245821", "answer": "oshrat binyamin", "gold_answer": "Shiran Nawi, Yoni Osherov, Daniel Lereya", "score": 0.0, "has_ans": 1.0}
-{"id": "291b53e665b4dd4365cde995042db4a6f6fecef3fe3a6f4482f23d61bd673918", "answer": "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/gcf_002288925.1_asm228892v2/gcf_002288925.1_asm228892v2_genomic.gff.gz", "gold_answer": "https://ftp.ensembl.org/pub/release-101/gff3/delphinapterus_leucas/Delphinapterus_leucas.ASM228892v3.101.gff3.gz", "score": 0.0, "has_ans": 1.0}
-{"id": "8fa42360185068216f2919935148d4e1ad28ddc18da0abd0f4bb0b6b6f84b127", "answer": "vgt", "gold_answer": "VGT", "score": 1.0, "has_ans": 1.0}
-{"id": "3af8028c2a59e28ca88baff0e6d91f2a9f170c5ef91003f1c8406755a2760ad4", "answer": "Oko, Thief of Crowns", "gold_answer": "Oko, Thief of Crowns", "score": 1.0, "has_ans": 1.0}
-{"id": "6b06d186921b8b390c65aebd0d16f09f60a47d2f1288ebe36953f734e84c0a3c", "answer": "", "gold_answer": "1148 sqft", "score": 0.0, "has_ans": 0.0}
-{"id": "9bdca8677af1e25cb7b0c7992dc62670c3e58e4afcd5ae60bcaa2483556bba00", "answer": ["'{\"sender\": \"usps\", \"price (usd)\": 25}'"], "gold_answer": "{\"sender\": \"USPS\", \"price (usd)\": \"41.75\"}", "score": 0, "has_ans": 1.0}
-{"id": "557e78eceec08ca8b0da5f9fdaca6e1c7ec6140a8ce600983ee716327dab005e", "answer": "Wolly Mammoth", "gold_answer": "For Pete's Sake", "score": 0.0, "has_ans": 1.0}
-{"id": "fb9ba3ab6a13d0adc677f993e90d54914a5cdf211305a1bba6bf16ec4ccb9b7c", "answer": "Instagram", "gold_answer": "Linkedin", "score": 0.0, "has_ans": 1.0}
-{"id": "52f7224e9c79431e7926afe317782711a0028750693e7456cde22ef6f4bd8bd5", "answer": "Nosferatu the Vampyre", "gold_answer": "Nosferatu the Vampyre", "score": 1.0, "has_ans": 1.0}
-{"id": "0ec4371851b96837b0a81b3dd3df401415061bb532fbafeb4609f3337c358508", "answer": ["anytime fitness", "point pleasant wellness center"], "gold_answer": "The Root Sports & Fitness Center\nMuscle Headz Gym", "score": 0.16666666666666666, "has_ans": 1.0}
-{"id": "6f224e7730ed027cbac73aebb1aea7f954053082041b02b19f4ff126a0a8a208", "answer": "Gina DiGioia", "gold_answer": "Gina DiGioia", "score": 1.0, "has_ans": 1.0}
-{"id": "99da66d8af02491f98b98c56b26c709e773b5a2ad945fb280375951ba600de09", "answer": 250.0, "gold_answer": "395", "score": 0.5425751529611245, "has_ans": 1.0}
-{"id": "ccec2229ced20a4b0cb4897e3a99120a3017ea030903e01c9bda6b13d40b0b14", "answer": "", "gold_answer": "McDonald's", "score": 0.0, "has_ans": 0.0}
-{"id": "9baaa267c95f9d8b75741ee9169c50563d297cfa592c20deaffd30dbc5984c74", "answer": 16.67, "gold_answer": "31.67", "score": 0.3582408362121543, "has_ans": 1.0}
-{"id": "6e3be83d1949fa52cba03fb1ce4b5b3bf7e37a83fd7d67694b10b2e439d90cf8", "answer": "wall street boxing & fitness", "gold_answer": "Renzo Gracie Jiu-Jitsu Wall Street", "score": 0.4, "has_ans": 1.0}
-{"id": "e2dc3a6b10b762e8aba7fa4d4e70f757f6d04dcbc8b56c48fc53fd9928d31d07", "answer": 40.0, "gold_answer": "30", "score": 0.7123179275482192, "has_ans": 1.0}
-{"id": "f88066d274e265edd6cd9d61cd80a41accb3a14baf2297652fdd05cdf716d455", "answer": "lower yosemite fall trail", "gold_answer": "Yosemite Falls\nBridalveil Fall", "score": 0.16666666666666666, "has_ans": 1.0}
-{"id": "e6bc98089608217e45b6956a46518fe3cce64a799b3ac43c6974c449ae14c408", "answer": 2140000.0, "gold_answer": "3080000", "score": 0.635876232048277, "has_ans": 1.0}
-{"id": "8ad84bd6fe38481ba49e7ad1f6fbd43219a999074e5c6fc940003281f55ec65b", "answer": ["trader joe's", "whole foods market", "aldi"], "gold_answer": "Potash Markets - Clark Street", "score": 0.0, "has_ans": 1.0}
-{"id": "55f4258484c5b398956133128a50462a767da211f8f72aa5ac5bbffb9bcbba1a", "answer": "Becker", "gold_answer": "CSI: Cyber", "score": 0.0, "has_ans": 1.0}
-{"id": "4dbedc5e1a0205e14b7ff3ba89bce3060dab15d0ada3b7e1351a6f2aa8287aec", "answer": 95.0, "gold_answer": "$55", "score": 0.4534562936319301, "has_ans": 1.0}
-{"id": "929b45f34805280d77c61d1e093e3d4e551d77ddb6ecd73552b12b1af286388d", "answer": "http://hgdownload.soe.ucsc.edu/goldenpath/canfam3/bigzips/", "gold_answer": "ftp://ftp.broadinstitute.org/distribution/assemblies/mammals/dog/canFam3.1/", "score": 0.0, "has_ans": 1.0}
-{"id": "cca4776df3c73e7f9430a2e624aafad056b14322a0b7ca6c0c22b7e7f3f0890a", "answer": "monica c. lozano", "gold_answer": "Wanda Austin\nRonald D. Sugar\nSue Wagner", "score": 0.0, "has_ans": 1.0}
-{"id": "efc0f3a47e9ed2ecdbcc037c2093865fe6e39f4d413a5d1ccdc7357160a4606b", "answer": "fidelity emerging asia fund (fseax)", "gold_answer": "Fidelity\u00ae Emerging Markets Index Fund (FPADX)", "score": 0.3636363636363636, "has_ans": 1.0}
-{"id": "b36ef2d8f2643b80e74a44ce3403f674ecb2aed7fd36afeaa289061a59feef92", "answer": "crunch fitness - east village", "gold_answer": "CrossFit East River\nAvea Pilates", "score": 0.14285714285714288, "has_ans": 1.0}
-{"id": "a9074997e698f912b9e751779ea19c1e92fa148404e90e0ae997acea3f9559b0", "answer": ["uncle tom's trail", "mount washburn", "fairy falls"], "gold_answer": "Trout lake trail\nArtist Point\nFountain Paint Pot\nLone Star Geyser\nStorm Point Trail", "score": 0.06666666666666667, "has_ans": 1.0}
-{"id": "797f7a5b65ca28b7e7156e7db1e9f117bd4a021de0cd512bfdbb0be897d89eab", "answer": ["red bamboo", "quantum leap"], "gold_answer": "Shanghai villa", "score": 0.0, "has_ans": 1.0}
\ No newline at end of file
diff --git a/BrowserGym/tests/assistantbench/test_env_general.py b/BrowserGym/tests/assistantbench/test_env_general.py
deleted file mode 100644
index 6a2c2e44f3849ad2b49e36893f6c50a705a34595..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/assistantbench/test_env_general.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import logging
-import os
-import random
-
-import gymnasium as gym
-import playwright.sync_api
-import pytest
-from tenacity import retry, retry_if_exception_type, stop_after_attempt
-
-# register gym environments
-import browsergym.assistantbench
-
-__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
-__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
-
-
-from browsergym.assistantbench import TEST_AB_TASK_IDS, VALID_AB_TASK_IDS
-
-rng = random.Random(1)
-valid_task_ids = rng.sample(VALID_AB_TASK_IDS, 10)
-test_task_ids = rng.sample(TEST_AB_TASK_IDS, 10)
-
-
-@retry(
- stop=stop_after_attempt(5),
- retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
- reraise=True,
- before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
-)
-@pytest.mark.parametrize("task_id", valid_task_ids + test_task_ids)
-@pytest.mark.slow
-def test_valid_env(task_id):
- env = gym.make(
- f"browsergym/{task_id}",
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- )
- obs, info = env.reset()
- assert not obs["last_action_error"]
-
- obs, reward, terminated, truncated, info = env.step("noop(0)")
- assert not obs["last_action_error"]
- assert not (terminated or truncated)
-
- obs, reward, terminated, truncated, info = env.step('send_msg_to_user("something")')
- assert not obs["last_action_error"]
- assert terminated
-
- env.close()
diff --git a/BrowserGym/tests/assistantbench/test_evaluation.py b/BrowserGym/tests/assistantbench/test_evaluation.py
deleted file mode 100644
index 4973d7158f780b0397ff669f7051e44fcfd8d0a5..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/assistantbench/test_evaluation.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import json
-import pathlib
-
-import gymnasium as gym
-import pytest
-
-from browsergym.assistantbench.evaluation.evaluator import question_scorer
-from browsergym.experiments.benchmark.metadata.utils import (
- task_list_from_metadata,
- task_metadata,
-)
-
-__DATA_DIR = pathlib.Path(__file__).resolve().parent / "data"
-
-metadata = task_metadata("assistantbench")
-file_path = pathlib.Path(__DATA_DIR) / "fallback_gpt4_seeplanact_predictions.jsonl"
-
-data_points = {}
-
-# Open the JSONL file and read each line as a JSON object
-with open(file_path, "r") as f:
- for line in f:
- data_point = json.loads(line)
-
- original_id = data_point["id"]
- answer = data_point["answer"]
- gold_answer = data_point["gold_answer"]
- score = data_point["score"]
- has_ans = data_point["has_ans"]
-
- data_points[original_id] = {
- "task_id": task_list_from_metadata(metadata, {"original_id": original_id})[0],
- "answer": answer,
- "gold_answer": gold_answer,
- "score": score,
- "has_ans": has_ans,
- }
-
-
-@pytest.mark.parametrize("original_id", list(data_points.keys()))
-def test_evaluate(original_id: str):
-
- answer = data_points[original_id]["answer"]
- gold_answer = data_points[original_id]["gold_answer"]
- expected_score = data_points[original_id]["score"]
- expected_has_ans = data_points[original_id]["has_ans"]
-
- score, has_ans = question_scorer(answer, gold_answer)
-
- # Assert if the expected results doesn't match
- assert score == expected_score
- assert has_ans == expected_has_ans
-
-
-@pytest.mark.parametrize(
- "original_id",
- [id for id in data_points.keys() if isinstance(data_points[id]["answer"], (str, float, int))],
-)
-@pytest.mark.slow
-def test_evaluate_within_env(original_id: str):
-
- task_id = data_points[original_id]["task_id"]
- answer = data_points[original_id]["answer"]
- expected_score = data_points[original_id]["score"]
-
- env = gym.make(
- f"browsergym/{task_id}",
- )
- obs, info = env.reset()
- assert not obs["last_action_error"]
-
- obs, reward, terminated, truncated, info = env.step(f"send_msg_to_user({repr(str(answer))})")
- assert not obs["last_action_error"]
- assert terminated
- assert reward == expected_score
-
- env.close()
diff --git a/BrowserGym/tests/core/__init__.py b/BrowserGym/tests/core/__init__.py
deleted file mode 100644
index 75f09d6fbde51609da41e1041eb3fb8125d808cb..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# bugfix: use same playwright instance in browsergym and pytest
-from ..utils import setup_playwright
diff --git a/BrowserGym/tests/core/data/basic_iframe_site/basic_iframe.html b/BrowserGym/tests/core/data/basic_iframe_site/basic_iframe.html
deleted file mode 100644
index e2e61c694f20f358274a32f62c0cb74b6a63286b..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/basic_iframe_site/basic_iframe.html
+++ /dev/null
@@ -1,37 +0,0 @@
-
-
-
- Iframe Example
-
-
-
-
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/basic_iframe_site/basic_iframe_2.html b/BrowserGym/tests/core/data/basic_iframe_site/basic_iframe_2.html
deleted file mode 100644
index d8e51b6ce1a4b8deebfd02868dd44e42e3a12158..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/basic_iframe_site/basic_iframe_2.html
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
-
- Simple Website
-
-
-
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/basic_iframe_site/inner-iframe.html b/BrowserGym/tests/core/data/basic_iframe_site/inner-iframe.html
deleted file mode 100644
index 6cb49db9ca79b79111698aa23d975a1900296298..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/basic_iframe_site/inner-iframe.html
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-
-
- Inner Iframe
-
-
-
-
- Iframe Level 2
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/basic_iframe_site/outer-iframe.html b/BrowserGym/tests/core/data/basic_iframe_site/outer-iframe.html
deleted file mode 100644
index b71a077f2b374005894c2804aa9bf827e139d213..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/basic_iframe_site/outer-iframe.html
+++ /dev/null
@@ -1,30 +0,0 @@
-
-
-
- Shadow DOM Example
-
-
-
-
-
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/basic_shadow_dom_site/basic_shadow_dom.html b/BrowserGym/tests/core/data/basic_shadow_dom_site/basic_shadow_dom.html
deleted file mode 100644
index 242678f9696f448afffe5e5523aa36704fe6ec95..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/basic_shadow_dom_site/basic_shadow_dom.html
+++ /dev/null
@@ -1,52 +0,0 @@
-
-
-
- Unit Test with Complex Nested Shadow DOM
-
-
-
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/basic_shadow_dom_site/simple_shadow_dom.html b/BrowserGym/tests/core/data/basic_shadow_dom_site/simple_shadow_dom.html
deleted file mode 100644
index fdcc8ceca07f897be41996144dd2a895d1a02229..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/basic_shadow_dom_site/simple_shadow_dom.html
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-
- Unit Test with Complex Nested Shadow DOM
-
-
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/basic_shadow_iframe_site/basic_iframe.html b/BrowserGym/tests/core/data/basic_shadow_iframe_site/basic_iframe.html
deleted file mode 100644
index e2e61c694f20f358274a32f62c0cb74b6a63286b..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/basic_shadow_iframe_site/basic_iframe.html
+++ /dev/null
@@ -1,37 +0,0 @@
-
-
-
- Iframe Example
-
-
-
-
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/basic_shadow_iframe_site/basic_iframe_2.html b/BrowserGym/tests/core/data/basic_shadow_iframe_site/basic_iframe_2.html
deleted file mode 100644
index dbcd6756822e81b68bdee21ec36944613b682826..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/basic_shadow_iframe_site/basic_iframe_2.html
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
-
- Simple Website
-
-
-
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/basic_shadow_iframe_site/inner-iframe.html b/BrowserGym/tests/core/data/basic_shadow_iframe_site/inner-iframe.html
deleted file mode 100644
index 0d480d6701adc7d034f3e05c03b899b206b9f949..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/basic_shadow_iframe_site/inner-iframe.html
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
-
- Inner Iframe
-
-
- Iframe Level 2
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/basic_shadow_iframe_site/outer-iframe.html b/BrowserGym/tests/core/data/basic_shadow_iframe_site/outer-iframe.html
deleted file mode 100644
index eed22ca03938bded8c1408df0a698515fa5068e9..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/basic_shadow_iframe_site/outer-iframe.html
+++ /dev/null
@@ -1,40 +0,0 @@
-
-
-
- Shadow DOM Example
-
-
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/example.html b/BrowserGym/tests/core/data/example.html
deleted file mode 100644
index 13552a70b0edc84663a94433a7da6ed525561e65..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/example.html
+++ /dev/null
@@ -1,52 +0,0 @@
-
-
-
-
- Example Domain
-
-
-
-
-
-
-
-
-
-
Example Domain
-
This domain is for use in illustrative examples in documents. You may use this
- domain in literature without prior coordination or asking for permission.
-
More information...
-
-
-
-
diff --git a/BrowserGym/tests/core/data/hover.html b/BrowserGym/tests/core/data/hover.html
deleted file mode 100644
index 385bf2dc97085ea2a06eefdc22aa1af159bbe077..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/hover.html
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/button_input.html b/BrowserGym/tests/core/data/input_type/button_input.html
deleted file mode 100644
index 9d6e6493c7594a0a9cd86cbd3f04fcfbea415c93..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/button_input.html
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-Input Button
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/checkbox_input.html b/BrowserGym/tests/core/data/input_type/checkbox_input.html
deleted file mode 100644
index ada1f2ff25cc66ed14281a96ca60021da9d173c4..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/checkbox_input.html
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-
-
-Checkboxes
-The input type="checkbox" defines a checkbox:
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/color_picker_input.html b/BrowserGym/tests/core/data/input_type/color_picker_input.html
deleted file mode 100644
index e33b957dc62cb351ad6f2af5e4b2b55af5967acf..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/color_picker_input.html
+++ /dev/null
@@ -1,18 +0,0 @@
-
-
-
-
-Show a Color Picker
-
-The input type="color" is used for input fields that should contain a color.
-
-
-
-Note: type="color" is not supported in Internet Explorer 11 or Safari 9.1 (or earlier).
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/date_input.html b/BrowserGym/tests/core/data/input_type/date_input.html
deleted file mode 100644
index 0e2d6a3fe1155b35651896483e9a072685f2c34d..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/date_input.html
+++ /dev/null
@@ -1,18 +0,0 @@
-
-
-
-
-Date Field
-
-The input type="date" is used for input fields that should contain a date.
-
-
-
-Note: type="date" is not supported in Internet Explorer 11 or prior Safari 14.1.
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/date_min_max_input.html b/BrowserGym/tests/core/data/input_type/date_min_max_input.html
deleted file mode 100644
index f519df9c130708a26e71c474496538c60d9930f4..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/date_min_max_input.html
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-
-
-Date Field Restrictions
-
-Use the min and max attributes to add restrictions to dates:
-
-
-
-Note: type="date" is not supported in Internet Explorer 11 or prior Safari 14.1.
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/date_time_local_input.html b/BrowserGym/tests/core/data/input_type/date_time_local_input.html
deleted file mode 100644
index cc34237bebfa0704a8cc6d1553d5b490fed9dd58..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/date_time_local_input.html
+++ /dev/null
@@ -1,18 +0,0 @@
-
-
-
-
-Local Date Field
-
-The input type="datetime-local" specifies a date and time input field, with no time zone.
-
-
-
-Note: type="datetime-local" is not supported in Internet Explorer 11 or prior Safari 14.1.
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/email_input.html b/BrowserGym/tests/core/data/input_type/email_input.html
deleted file mode 100644
index 0e2f6c3b5db4022e32eef4eb8ac5c0aa79a8ba40..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/email_input.html
+++ /dev/null
@@ -1,16 +0,0 @@
-
-
-
-
-Email Field
-
-The input type="email" is used for input fields that should contain an e-mail address:
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/file_input.html b/BrowserGym/tests/core/data/input_type/file_input.html
deleted file mode 100644
index 5a026e729276c425c23e546cee19a7900cbae84d..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/file_input.html
+++ /dev/null
@@ -1,15 +0,0 @@
-
-
-
-
-File upload
-
-Show a file-select field which allows a file to be chosen for upload:
-
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/hidden_field_input.html b/BrowserGym/tests/core/data/input_type/hidden_field_input.html
deleted file mode 100644
index af16596e12dfde14ecc4f2d3daac9006f0cfb26a..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/hidden_field_input.html
+++ /dev/null
@@ -1,17 +0,0 @@
-
-
-
-
-A Hidden Field (look in source code)
-
-
-
-Note: The hidden field is not shown to the user, but the data is sent when the form is submitted.
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/image_input.html b/BrowserGym/tests/core/data/input_type/image_input.html
deleted file mode 100644
index 502fd2990500a8a21b2ef030db2017a90e0bd02f..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/image_input.html
+++ /dev/null
@@ -1,18 +0,0 @@
-
-
-
-
-Display an Image as the Submit button
-
-
-
-Note: The input type="image" sends the X and Y coordinates of the click that activated the image button.
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/number_input.html b/BrowserGym/tests/core/data/input_type/number_input.html
deleted file mode 100644
index 1158e2baaab595f7ba1f8381fd95811b2fdf9be8..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/number_input.html
+++ /dev/null
@@ -1,18 +0,0 @@
-
-
-
-
-Number Field
-
-The input type="number" defines a numeric input field.
-
-You can use the min and max attributes to add numeric restrictions in the input field:
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/number_step_input.html b/BrowserGym/tests/core/data/input_type/number_step_input.html
deleted file mode 100644
index 8d68505c3c8005bbc6a055bca0210db276120050..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/number_step_input.html
+++ /dev/null
@@ -1,16 +0,0 @@
-
-
-
-
-Numeric Steps
-
-Depending on browser support: Fixed steps will apply in the input field.
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/password_input.html b/BrowserGym/tests/core/data/input_type/password_input.html
deleted file mode 100644
index 66eb78622aec4c406d77b49bb61e8f7e99503e41..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/password_input.html
+++ /dev/null
@@ -1,20 +0,0 @@
-
-
-
-
-Password field
-
-The input type="password" defines a password field:
-
-
-
-The characters in a password field are masked (shown as asterisks or circles).
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/radio_input.html b/BrowserGym/tests/core/data/input_type/radio_input.html
deleted file mode 100644
index 125d68f4df1bd3c28d1954519c01b40ff24daa68..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/radio_input.html
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-
-
-Radio Buttons
-
-The input type="radio" defines a radio button:
-
-Choose your favorite Web language:
-
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/range_input.html b/BrowserGym/tests/core/data/input_type/range_input.html
deleted file mode 100644
index d96b9791994a7546b71c4808ffd1b111546d3323..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/range_input.html
+++ /dev/null
@@ -1,16 +0,0 @@
-
-
-
-
-Range Field
-
-Depending on browser support: The input type "range" can be displayed as a slider control.
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/reset_input.html b/BrowserGym/tests/core/data/input_type/reset_input.html
deleted file mode 100644
index d7710c1a8078eceef1f0c3b70503c7aed35ac2da..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/reset_input.html
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
-
-Reset Button
-
-The input type="reset" defines a reset button that resets all form values to their default values:
-
-
-
-If you change the input values and then click the "Reset" button, the form-data will be reset to the default values.
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/search_input.html b/BrowserGym/tests/core/data/input_type/search_input.html
deleted file mode 100644
index db8ab66ed15a758c76d9f9fca7344c91b378dd10..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/search_input.html
+++ /dev/null
@@ -1,15 +0,0 @@
-
-
-
-
-Search Field
-The input type="search" is used for search fields (behaves like a regular text field):
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/submit_input.html b/BrowserGym/tests/core/data/input_type/submit_input.html
deleted file mode 100644
index 257ebdda7f922079c4cd2648564e6adcd8be4c58..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/submit_input.html
+++ /dev/null
@@ -1,20 +0,0 @@
-
-
-
-
-Submit Button
-
-The input type="submit" defines a button for submitting form data to a form-handler:
-
-
-
-If you click "Submit", the form-data will be sent to a page called "https://www.w3schools.com/action_page.php".
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/submit_nn_input.html b/BrowserGym/tests/core/data/input_type/submit_nn_input.html
deleted file mode 100644
index da04e9d3a5413e6d5ca8f08c0c705d3156161569..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/submit_nn_input.html
+++ /dev/null
@@ -1,14 +0,0 @@
-
-
-
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/telephone_input.html b/BrowserGym/tests/core/data/input_type/telephone_input.html
deleted file mode 100644
index 12a0c8a59a1da62b4e578dca976d167dbf1d8dfa..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/telephone_input.html
+++ /dev/null
@@ -1,17 +0,0 @@
-
-
-
-
-Telephone Field
-
-The input type="tel" is used for input fields that should contain a telephone number:
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/text_input.html b/BrowserGym/tests/core/data/input_type/text_input.html
deleted file mode 100644
index 811753a26fd5325d481b2246051dc1d2d153a540..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/text_input.html
+++ /dev/null
@@ -1,20 +0,0 @@
-
-
-
-
-Text field
-The input type="text" defines a one-line text input field:
-
-
-
-Note that the form itself is not visible.
-Also note that the default width of a text field is 20 characters.
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/time_input.html b/BrowserGym/tests/core/data/input_type/time_input.html
deleted file mode 100644
index 8ca605580af8ee58f86aecb20acf5d2d8fa9c263..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/time_input.html
+++ /dev/null
@@ -1,20 +0,0 @@
-
-
-
-
-Show a Time Input Control
-
-The input type="time" allows the user to select a time (no time zone):
-
-If the browser supports it, a time picker pops up when entering the input field.
-
-
-
-Note: type="time" is not supported in Internet Explorer 11 or prior Safari 14.1.
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/url_input.html b/BrowserGym/tests/core/data/input_type/url_input.html
deleted file mode 100644
index 1f6bdf641d746e582d0ce3dad5f04e483ab7bef4..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/url_input.html
+++ /dev/null
@@ -1,16 +0,0 @@
-
-
-
-
-Display a URL Input Field
-
-The input type="url" is used for input fields that should contain a URL address:
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/input_type/week_input.html b/BrowserGym/tests/core/data/input_type/week_input.html
deleted file mode 100644
index 1f6bdf641d746e582d0ce3dad5f04e483ab7bef4..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/input_type/week_input.html
+++ /dev/null
@@ -1,16 +0,0 @@
-
-
-
-
-Display a URL Input Field
-
-The input type="url" is used for input fields that should contain a URL address:
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/long_page.html b/BrowserGym/tests/core/data/long_page.html
deleted file mode 100644
index 8fd6ca357e35581ea09f2c905b36ee9df439f92f..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/long_page.html
+++ /dev/null
@@ -1,211 +0,0 @@
-
-
-
-
-
- This is the top
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- This is the bottom
-
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/lots_of_iframes.html b/BrowserGym/tests/core/data/lots_of_iframes.html
deleted file mode 100644
index ba342a9ced3d48364816b1fcb7888f5518a69001..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/lots_of_iframes.html
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
-
- Lots of Iframes
-
-
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/obstructed_checkbox_page.html b/BrowserGym/tests/core/data/obstructed_checkbox_page.html
deleted file mode 100644
index a3f9ec1f23c7dad374236eb1a6e19e52ceb56cb5..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/obstructed_checkbox_page.html
+++ /dev/null
@@ -1,93 +0,0 @@
-
-
-
-
-
- Checkbox with Label Interception
-
-
-
-
-
-
-
-
diff --git a/BrowserGym/tests/core/data/test_page.html b/BrowserGym/tests/core/data/test_page.html
deleted file mode 100644
index cdb46c801b32395364831e3c0a6dc32149bda067..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/test_page.html
+++ /dev/null
@@ -1,29 +0,0 @@
-
-
-
- Simple Form
-
-
- Simple Form
-
-
-
-
diff --git a/BrowserGym/tests/core/data/test_page_2.html b/BrowserGym/tests/core/data/test_page_2.html
deleted file mode 100644
index b3b2a5d69c83f74229fb2589f7c0798f960db9bb..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/test_page_2.html
+++ /dev/null
@@ -1,63 +0,0 @@
-
-
-
-
- Simple Form
-
-
-
- Simple Form
-
-
-
-
- Text within a non-html tag
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Text that should not be visible
-
-
-
diff --git a/BrowserGym/tests/core/data/textbox.html b/BrowserGym/tests/core/data/textbox.html
deleted file mode 100644
index c93bd6f7835a9f11860ce6cd2406794c3376a26b..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/data/textbox.html
+++ /dev/null
@@ -1,13 +0,0 @@
-
-
-
-
- Simple HTML Page
-
-
-
-
-
-
-
-
diff --git a/BrowserGym/tests/core/test_actions_highlevel.py b/BrowserGym/tests/core/test_actions_highlevel.py
deleted file mode 100644
index a3a4f56c6f9f7d579cccee40f2d747c8c42cbdc9..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/test_actions_highlevel.py
+++ /dev/null
@@ -1,1256 +0,0 @@
-import ast
-import os
-import pathlib
-import platform
-import re
-
-import bs4
-import gymnasium as gym
-import pytest
-from pyparsing.exceptions import ParseException
-
-# register openended gym environments
-import browsergym.core
-from browsergym.core.action.highlevel import HighLevelActionSet
-from browsergym.core.action.parsers import NamedArgument, highlevel_action_parser
-from browsergym.core.constants import BROWSERGYM_ID_ATTRIBUTE as BID_ATTR
-from browsergym.utils.obs import flatten_dom_to_str
-
-_IS_MAC_OS = platform.system() == "Darwin"
-
-__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
-__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
-__TIMEOUT = 500
-
-__DATA_DIR = pathlib.Path(__file__).resolve().parent / "data"
-
-TEXTBOX_URL = f"file://{__DATA_DIR}/textbox.html"
-EXAMPLE_URL = f"file://{__DATA_DIR}/example.html"
-HOVER_URL = f"file://{__DATA_DIR}/hover.html"
-INEXISTANT_FILE_URL = f"file://{__DATA_DIR}/no_file_here.html"
-LONG_PAGE_URL = f"file://{__DATA_DIR}/long_page.html"
-TEXT_INPUT_URL = f"file://{__DATA_DIR}/input_type/text_input.html"
-URL_INPUT_URL = f"file://{__DATA_DIR}/input_type/url_input.html"
-CHECKBOX_URL = f"file://{__DATA_DIR}/input_type/checkbox_input.html"
-MULTI_IFRAME_URL = f"file://{__DATA_DIR}/basic_iframe_site/basic_iframe_2.html"
-OBSTRUCTED_CHECKBOX_URL = f"file://{__DATA_DIR}/obstructed_checkbox_page.html"
-LOTS_OF_IFRAMES_URL = f"file://{__DATA_DIR}/lots_of_iframes.html"
-
-
-def test_action_parser():
- parser = highlevel_action_parser
-
- with pytest.raises(ParseException):
- function_calls = parser.parse_string("", parseAll=True)
- assert not function_calls
-
- function_calls = parser.parse_string("a()", parseAll=True)
- assert len(function_calls) == 1
-
- function_calls = parser.parse_string(" a ( ) \n\n\t", parseAll=True)
- assert len(function_calls) == 1
-
- function_calls = parser.parse_string(" a ( ) b() \n \tc()", parseAll=True)
- assert [function_name for function_name, _ in function_calls] == ["a", "b", "c"]
-
- function_calls = parser.parse_string('a(12, 12.2, "text", (1, 2, 3), ["a", 23])', parseAll=True)
- _, function_args = function_calls[0]
- assert function_args == [12, 12.2, "text", (1, 2, 3), ["a", 23]]
-
- function_calls = parser.parse_string('a(x=12, y = 12.2, other = "text")', parseAll=True)
- _, function_args = function_calls[0]
- assert function_args == [
- NamedArgument(name="x", value=12),
- NamedArgument(name="y", value=12.2),
- NamedArgument(name="other", value="text"),
- ]
-
- function_calls = parser.parse_string('a(12, y = 12.2, other = "text")', parseAll=True)
- _, function_args = function_calls[0]
- assert function_args == [
- 12,
- NamedArgument(name="y", value=12.2),
- NamedArgument(name="other", value="text"),
- ]
-
- with pytest.raises(ParseException):
- function_calls = parser.parse_string('a(x = 12, 12.2, other = "text")', parseAll=True)
-
- with pytest.raises(ParseException):
- function_calls = parser.parse_string('a(12, 12.2, 1 = "text")', parseAll=True)
-
- with pytest.raises(ParseException):
- function_calls = parser.parse_string("a(1-)", parseAll=True)
-
- with pytest.raises(ParseException):
- function_calls = parser.parse_string("a(1/2)", parseAll=True)
-
- function_calls = parser.parse_string('a("""\nsome\ntext\\"\\"""")', parseAll=True)
- _, function_args = function_calls[0]
- assert function_args == ['\nsome\ntext""']
-
- function_calls = parser.parse_string("a('\"some\\ntext\"')", parseAll=True)
- _, function_args = function_calls[0]
- assert function_args == ['"some\ntext"']
-
- function_calls = parser.parse_string('#comment\na("# not comment") #comment \n ', parseAll=True)
- assert len(function_calls) == 1
- function_name, function_args = function_calls[0]
- assert function_name == "a"
- assert function_args == ["# not comment"]
-
- function_calls = parser.parse_string('fun(12, x="val", y={"aaa": 23})', parseAll=True)
- function_name, function_args = function_calls[0]
- assert function_name == "fun"
- assert function_args == [
- 12,
- NamedArgument(name="x", value="val"),
- NamedArgument(name="y", value={"aaa": 23}),
- ]
-
-
-def test_valid_action():
- action_set = HighLevelActionSet()
-
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": CHECKBOX_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
-
- def get_checkbox_elem(obs):
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
- checkbox = soup.find("input", attrs={"type": "checkbox", "id": "vehicle1"})
- return checkbox
-
- obs, info = env.reset()
- checkbox = get_checkbox_elem(obs)
-
- # box not checked
- assert not obs["last_action_error"]
- assert not checkbox.has_attr("checked")
-
- # typo in action (unescaped double quotes)
- action = f"""\
-click({repr(checkbox.get(BID_ATTR))}, "17" screen") # typo here
-"""
- with pytest.raises(ValueError):
- python_action = action_set.to_python_code(action)
-
- obs, reward, term, trunc, info = env.step(action)
- checkbox = get_checkbox_elem(obs)
-
- # error and box not checked
- assert "Received an empty action." in obs["last_action_error"]
- assert not checkbox.has_attr("checked")
-
- # click box 1 time
- action = f"""\
-click({repr(checkbox.get(BID_ATTR))})
-"""
- python_action = action_set.to_python_code(action)
-
- assert python_action.count("\nclick(") == 1
-
- obs, reward, term, trunc, info = env.step(action)
- checkbox = get_checkbox_elem(obs)
-
- # box checked
- assert not obs["last_action_error"]
- assert checkbox.has_attr("checked")
-
- # click box 2 times
- action = f"""\
-click({repr(checkbox.get(BID_ATTR))})
-click({repr(checkbox.get(BID_ATTR))})
-"""
- python_action = action_set.to_python_code(action)
-
- assert python_action.count("\nclick(") == 2
-
- obs, reward, term, trunc, info = env.step(action)
- checkbox = get_checkbox_elem(obs)
-
- # box still checked
- assert not obs["last_action_error"]
- assert checkbox.has_attr("checked")
-
- # click box 3 times
- action = f"""\
-click({repr(checkbox.get(BID_ATTR))})
-click({repr(checkbox.get(BID_ATTR))})
-click({repr(checkbox.get(BID_ATTR))})
-"""
- python_action = action_set.to_python_code(action)
-
- assert python_action.count("\nclick(") == 3
-
- obs, reward, term, trunc, info = env.step(action)
- checkbox = get_checkbox_elem(obs)
-
- # box unchecked
- assert not obs["last_action_error"]
- assert not checkbox.has_attr("checked")
-
- # click box 3 times, same line ops
- action = f"""\
-click({repr(checkbox.get(BID_ATTR))}) click({repr(checkbox.get(BID_ATTR))}) click({repr(checkbox.get(BID_ATTR))})
-"""
- python_action = action_set.to_python_code(action)
-
- assert python_action.count("\nclick(") == 3
-
- obs, reward, term, trunc, info = env.step(action)
- checkbox = get_checkbox_elem(obs)
-
- # box checked
- assert not obs["last_action_error"]
- assert checkbox.has_attr("checked")
-
- # click box 3 times, multi line ops, whitespace, tab, comma in-between args
- action = f"""\
- click( {repr(checkbox.get(BID_ATTR))} ) click({repr(checkbox.get(BID_ATTR))})\t
- noop() noop () noop( )
- # THIS IS A COMMENT
- noop() # this is a noop() call
-click({repr(checkbox.get(BID_ATTR))}, )
-#click({repr(checkbox.get(BID_ATTR))})
-"""
- python_action = action_set.to_python_code(action)
-
- assert python_action.count("\nclick(") == 3
-
- obs, reward, term, trunc, info = env.step(action)
- checkbox = get_checkbox_elem(obs)
-
- # box unchecked
- assert not obs["last_action_error"]
- assert not checkbox.has_attr("checked")
-
- # click box 3 times, multi line ops, whitespace, tab, comma in-between args, markdown code block
- action = f"""\
-Below is code
- ```python
- click( {repr(checkbox.get(BID_ATTR))} ) click({repr(checkbox.get(BID_ATTR))})\t
- noop() noop () noop( )
- # THIS IS A COMMENT
- noop() # this is a noop() call
-click({repr(checkbox.get(BID_ATTR))}, )
-#click({repr(checkbox.get(BID_ATTR))})
-```
-This is not code, just an explanation
-"""
- python_action = action_set.to_python_code(action)
-
- assert python_action.count("\nclick(") == 3
-
- obs, reward, term, trunc, info = env.step(action)
- checkbox = get_checkbox_elem(obs)
-
- # box checked
- assert not obs["last_action_error"]
- assert checkbox.has_attr("checked")
-
- # multiple markdown code blocks
- action = f"""\
-Below is code
- ```python
- noop() noop () noop( )
- # THIS IS A COMMENT
- noop() # this is a noop() call
-click({repr(checkbox.get(BID_ATTR))}, )
-#click({repr(checkbox.get(BID_ATTR))})
-```
-This is not code, just an explanation
-Below is more code
- ```python
- click( {repr(checkbox.get(BID_ATTR))} ) click({repr(checkbox.get(BID_ATTR))})\t
- noop() noop () noop( )
- # THIS IS A COMMENT
- noop() # this is a noop() call
-#click({repr(checkbox.get(BID_ATTR))})
-```
-This is not code, just an explanation
-"""
- python_action = action_set.to_python_code(action)
-
- assert python_action.count("\nclick(") == 3
-
- obs, reward, term, trunc, info = env.step(action)
- checkbox = get_checkbox_elem(obs)
-
- # box unchecked
- assert not obs["last_action_error"]
- assert not checkbox.has_attr("checked")
-
- # multiple function calls in the middle of text
- action = f"""\
-Let's do a noop(), then noop () noop( ) then click({repr(checkbox.get(BID_ATTR))}, )
-#click({repr(checkbox.get(BID_ATTR))})
-Now let's do two more
- click( {repr(checkbox.get(BID_ATTR))} ) click({repr(checkbox.get(BID_ATTR))})\t
- noop() noop () noop( )
- # THIS IS A COMMENT
- noop() # this is a noop() call
-#click({repr(checkbox.get(BID_ATTR))})
-```
-This is not code, just an explanation
-This is garbage
-"""
- python_action = action_set.to_python_code(action)
-
- assert python_action.count("\nclick(") == 3
-
- obs, reward, term, trunc, info = env.step(action)
- checkbox = get_checkbox_elem(obs)
-
- # box checked
- assert not obs["last_action_error"]
- assert checkbox.has_attr("checked")
-
- env.close()
-
-
-def test_invalid_action():
- action_set = HighLevelActionSet()
-
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": CHECKBOX_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
- obs, info = env.reset()
-
- # click inexistant bid
- action = f"""\
-click("INVALID_BID")
-"""
-
- obs, reward, term, trunc, info = env.step(action)
-
- # error
- assert "ValueError" in obs["last_action_error"]
-
- # invalid bid value type
- action = f"""\
-click(None)
-"""
-
- obs, reward, term, trunc, info = env.step(action)
-
- # error
- assert obs["last_action_error"] == "ValueError: expected a string, got None"
-
- # invalid bid value type
- action = f"""\
-click(42.7)
-"""
-
- obs, reward, term, trunc, info = env.step(action)
-
- # error
- assert obs["last_action_error"] == "ValueError: expected a string, got 42.7"
-
- # invalid bid value type
- action = f"""\
-click([])
-"""
-
- obs, reward, term, trunc, info = env.step(action)
-
- # error
- assert obs["last_action_error"] == "ValueError: expected a string, got []"
-
- # invalid bid value type
- action = f"""\
-click([42, "a", True, None])
-"""
-
- obs, reward, term, trunc, info = env.step(action)
-
- # error
- assert obs["last_action_error"] == "ValueError: expected a string, got [42, 'a', True, None]"
-
- # invalid bid value type
- action = f"""\
-click({{}})
-"""
-
- obs, reward, term, trunc, info = env.step(action)
-
- # error
- assert obs["last_action_error"] == "ValueError: expected a string, got {}"
-
- # invalid bid value type
- action = f"""\
-click({{"k": "aaa"}})
-"""
-
- obs, reward, term, trunc, info = env.step(action)
-
- # error
- assert obs["last_action_error"] == "ValueError: expected a string, got {'k': 'aaa'}"
-
- # invalid action args (too many)
- action = f"""\
-click("4", "aa", "bb")
-"""
-
- obs, reward, term, trunc, info = env.step(action)
-
- # error
- assert obs["last_action_error"] == "Error: Locator.click: modifiers: expected array, got string"
-
- # invalid action args (not enough)
- action = f"""\
-click()
-"""
-
- obs, reward, term, trunc, info = env.step(action)
-
- # error
- assert (
- obs["last_action_error"]
- == "TypeError: click() missing 1 required positional argument: 'bid'"
- )
-
- # invalid action args (not enough)
- action = f"""\
-click()
-"""
-
- obs, reward, term, trunc, info = env.step(action)
-
- # error
- assert (
- obs["last_action_error"]
- == "TypeError: click() missing 1 required positional argument: 'bid'"
- )
-
- # invalid action name
- with pytest.raises(NameError):
- action_set.to_python_code(
- f"""\
-not_a_valid_action()
-"""
- )
-
- # forbidden fill action
- with pytest.raises(NameError):
- HighLevelActionSet(subsets=["coord"]).to_python_code(
- f"""\
-fill("INVALID_BID", "some text")
-"""
- )
-
- # forbidden import
- with pytest.raises(ValueError):
- action_set.to_python_code(
- f"""\
-import numpy as np
-"""
- )
-
- # invalid expression, results in empty action
- with pytest.raises(ValueError):
- action_set.to_python_code(
- f"""\
-[
-"""
- )
-
- # invalid expression, results in empty action
- with pytest.raises(ValueError):
- action_set.to_python_code(
- f"""\
-click
-"""
- )
-
- env.close()
-
-
-def test_click_through_frames():
- action_set = HighLevelActionSet()
-
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": MULTI_IFRAME_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
-
- obs, info = env.reset()
-
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
- checkbox = soup.find("input", attrs={"type": "checkbox", "id": "checkbox_2"})
-
- # box checked
- assert checkbox.has_attr("checked")
-
- # click box
- action = f"""\
-click({repr(checkbox.get(BID_ATTR))})
-"""
- python_action = action_set.to_python_code(action)
-
- obs, reward, term, trunc, info = env.step(action)
-
- # no error
- assert not obs["last_action_error"]
-
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
- checkbox = soup.find("input", attrs={"type": "checkbox", "id": "checkbox_2"})
-
- # box not checked
- assert not checkbox.has_attr("checked")
-
- env.close()
-
-
-def test_fill_through_iframe():
- action_set = HighLevelActionSet()
-
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": MULTI_IFRAME_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
-
- obs, info = env.reset()
-
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
- text_input = soup.find(
- "input", attrs={"type": "text", "placeholder": "Enter text here in iframe"}
- )
-
- # empty input
- assert text_input.get("value") == ""
-
- # fill with some text
- action = f"""\
-fill({repr(text_input.get(BID_ATTR))}, "This is a test value.")
-"""
- python_action = action_set.to_python_code(action)
-
- obs, reward, term, trunc, info = env.step(action)
-
- # no error
- assert not obs["last_action_error"]
-
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
- text_input = soup.find(
- "input", attrs={"type": "text", "placeholder": "Enter text here in iframe"}
- )
-
- # input filled to desired value
- assert text_input.get("value") == "This is a test value."
-
- env.close()
-
-
-def test_click():
- action_set = HighLevelActionSet()
-
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": CHECKBOX_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
-
- def get_checkbox_elem(obs):
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
- checkbox = soup.find("input", attrs={"type": "checkbox", "id": "vehicle1"})
- return checkbox
-
- obs, info = env.reset()
- checkbox = get_checkbox_elem(obs)
-
- # box not checked
- assert not checkbox.has_attr("checked")
-
- # click box
- action = f"""
-click({repr(checkbox.get(BID_ATTR))})
-"""
- python_action = action_set.to_python_code(action)
-
- obs, reward, terminated, truncated, info = env.step(action)
- checkbox = get_checkbox_elem(obs)
-
- # no error
- assert not obs["last_action_error"]
-
- # box checked
- assert checkbox.has_attr("checked")
-
- # click box
- action = f"""\
-click({repr(checkbox.get(BID_ATTR))})
-"""
- python_action = action_set.to_python_code(action)
-
- obs, reward, term, trunc, info = env.step(action)
- checkbox = get_checkbox_elem(obs)
-
- # no error
- assert not obs["last_action_error"]
-
- # box unchecked
- assert not checkbox.has_attr("checked")
-
- # click box twice
- action = f"""\
-click({repr(checkbox.get(BID_ATTR))})
-click({repr(checkbox.get(BID_ATTR))})
-"""
- python_action = action_set.to_python_code(action)
-
- obs, reward, term, trunc, info = env.step(action)
- checkbox = get_checkbox_elem(obs)
-
- # no error
- assert not obs["last_action_error"]
-
- # box still unchecked
- assert not checkbox.has_attr("checked")
-
- env.close()
-
-
-def test_hover():
- action_set = HighLevelActionSet(subsets=["bid", "coord"])
-
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": HOVER_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
-
- def get_button_elem(obs):
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
- button = soup.find("input", attrs={"type": "button"})
- return button
-
- obs, info = env.reset()
- button = get_button_elem(obs)
-
- assert not obs["last_action_error"]
- assert button.get("value") == "Hover me"
-
- action = f"""
-hover({repr(button.get(BID_ATTR))})
-"""
-
- obs, reward, terminated, truncated, info = env.step(action)
- button = get_button_elem(obs)
-
- assert not obs["last_action_error"]
- assert button.get("value") == "Hello world!"
-
- action = f"""
-mouse_move(0, 0)
-"""
-
- obs, reward, terminated, truncated, info = env.step(action)
- button = get_button_elem(obs)
-
- assert not obs["last_action_error"]
- assert button.get("value") == "Hover me"
-
- env.close()
-
-
-def test_fill_type_press():
- action_set = HighLevelActionSet(subsets=["bid", "coord"])
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": TEXT_INPUT_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
-
- def get_fname_lname_elems(obs):
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
- fname = soup.find("input", attrs={"id": "fname"})
- lname = soup.find("input", attrs={"id": "lname"})
- return fname, lname
-
- obs, info = env.reset()
- fname, lname = get_fname_lname_elems(obs)
-
- # type using bid
- action = f"""
-fill({repr(fname.get(BID_ATTR))}, 'Christian')
-"""
-
- obs, reward, terminated, truncated, info = env.step(action)
- fname, lname = get_fname_lname_elems(obs)
-
- assert not obs["last_action_error"]
- assert fname.get("value") == "Christian"
- assert lname.get("value") == ""
-
- # type using bid
- action = f"""
-fill({repr(lname.get(BID_ATTR))}, 'Clavier')
-"""
-
- obs, reward, terminated, truncated, info = env.step(action)
- fname, lname = get_fname_lname_elems(obs)
-
- assert not obs["last_action_error"]
- assert fname.get("value") == "Christian"
- assert lname.get("value") == "Clavier"
-
- # type using focus and keyboard_type
- action = f"""
-focus({repr(fname.get(BID_ATTR))}) keyboard_type('Gรฉrard')
-"""
-
- obs, reward, terminated, truncated, info = env.step(action)
- fname, lname = get_fname_lname_elems(obs)
-
- assert not obs["last_action_error"]
- assert fname.get("value") == "ChristianGรฉrard"
- assert lname.get("value") == "Clavier"
-
- # type using click and keyboard_insert_text
- action = f"""
-click({repr(lname.get(BID_ATTR))}) keyboard_insert_text('Jugnot')
-"""
-
- obs, reward, terminated, truncated, info = env.step(action)
- fname, lname = get_fname_lname_elems(obs)
-
- assert not obs["last_action_error"]
- assert fname.get("value") == "ChristianGรฉrard"
- assert lname.get("value") == "ClavierJugnot"
-
- # type using clear and keyboard_insert_text
- action = f"""
-clear({repr(lname.get(BID_ATTR))}) keyboard_insert_text('Jugnot')
-"""
-
- obs, reward, terminated, truncated, info = env.step(action)
- fname, lname = get_fname_lname_elems(obs)
-
- assert not obs["last_action_error"]
- assert fname.get("value") == "ChristianGรฉrard"
- assert lname.get("value") == "Jugnot"
-
- # type using click, manual clear and keyboard_insert_text
- action = f"""
-click({repr(fname.get(BID_ATTR))})
-# clear the field
-keyboard_press('End')
-keyboard_down('Shift')
-keyboard_press('Home')
-keyboard_up('Shift')
-keyboard_press('Backspace')
-# insert text
-keyboard_insert_text('Gรฉrard')
-"""
-
- obs, reward, terminated, truncated, info = env.step(action)
- fname, lname = get_fname_lname_elems(obs)
-
- assert not obs["last_action_error"]
- assert fname.get("value") == "Gรฉrard"
- assert lname.get("value") == "Jugnot"
-
- # fill empty text
- action = f"""
-fill({repr(fname.get(BID_ATTR))}, '')
-"""
-
- obs, reward, terminated, truncated, info = env.step(action)
- fname, lname = get_fname_lname_elems(obs)
-
- assert not obs["last_action_error"]
- assert fname.get("value") == ""
- assert lname.get("value") == "Jugnot"
-
- # type in currently focused element
- action = f"""
-keyboard_type('Jean')
-"""
-
- obs, reward, terminated, truncated, info = env.step(action)
- fname, lname = get_fname_lname_elems(obs)
-
- assert not obs["last_action_error"]
- assert fname.get("value") == "Jean"
- assert lname.get("value") == "Jugnot"
-
- # de-focus (click 0, 0), then type text
- action = f"""
-mouse_click(0, 0)
-"""
- obs, reward, terminated, truncated, info = env.step(action)
- fname, lname = get_fname_lname_elems(obs)
-
- assert not obs["last_action_error"]
- assert fname.get("value") == "Jean"
- assert lname.get("value") == "Jugnot"
-
- action = f"""
-keyboard_type('Reno')
-"""
- obs, reward, terminated, truncated, info = env.step(action)
- fname, lname = get_fname_lname_elems(obs)
-
- assert not obs["last_action_error"]
- assert fname.get("value") == "Jean"
- assert lname.get("value") == "Jugnot"
-
- env.close()
-
-
-@pytest.mark.skip(reason="Not implemented yet")
-def test_dblclick():
- pass
-
-
-# copy/paste text using a sequence of keyboard_press actions
-def test_key_press():
- action_set = HighLevelActionSet(subsets=["bid", "coord"])
-
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": TEXT_INPUT_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
-
- obs, info = env.reset()
-
- def get_fname_lname_elems(obs):
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
- fname = soup.find("input", attrs={"id": "fname"})
- lname = soup.find("input", attrs={"id": "lname"})
- return fname, lname
-
- fname, lname = get_fname_lname_elems(obs)
-
- action = f"""
- fill({repr(fname.get(BID_ATTR))}, "Christian")
- keyboard_press({repr("Meta+a" if _IS_MAC_OS else "Control+a")})
- keyboard_press({repr("Meta+c" if _IS_MAC_OS else "Control+c")})
- click({repr(lname.get(BID_ATTR))})
- keyboard_press({repr("Meta+v" if _IS_MAC_OS else "Control+v")})
- """
-
- obs, reward, terminated, truncated, info = env.step(action)
-
- assert not obs["last_action_error"]
-
- fname, lname = get_fname_lname_elems(obs)
-
- assert lname.get("value") == "Christian"
-
- env.close()
-
-
-def test_goto():
- url1 = URL_INPUT_URL
- url2 = TEXT_INPUT_URL
-
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": url1},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- )
-
- obs, info = env.reset()
-
- assert obs["url"] == url1
-
- action = f"""
-goto({repr(url2)})
-"""
-
- obs, reward, terminated, truncated, info = env.step(action)
-
- assert not obs["last_action_error"]
-
- assert obs["url"] == url2
-
- action = """
-go_back()
-"""
-
- obs, reward, terminated, truncated, info = env.step(action)
-
- assert not obs["last_action_error"]
-
- assert obs["url"] == url1
-
- action = """
-go_forward()
-"""
-
- obs, reward, terminated, truncated, info = env.step(action)
-
- assert not obs["last_action_error"]
-
- assert obs["url"] == url2
-
- env.close()
-
-
-def test_scroll():
- action_set = HighLevelActionSet(subsets=["coord"])
-
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": LONG_PAGE_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
-
- def extract_coords_from_elem(elem):
- return ast.literal_eval(elem.get("center"))
-
- def get_top_bottom_elems(obs):
- soup = bs4.BeautifulSoup(
- flatten_dom_to_str(
- obs["dom_object"], obs["extra_element_properties"], with_center_coords=True
- ),
- "lxml",
- )
- top = soup.find("input", attrs={"type": "checkbox", "id": "top"})
- bottom = soup.find("input", attrs={"type": "checkbox", "id": "bottom"})
- return top, bottom
-
- obs, info = env.reset()
- top, bottom = get_top_bottom_elems(obs)
- top_x, top_y = extract_coords_from_elem(top)
- bottom_x, bottom_y = extract_coords_from_elem(bottom)
-
- # top not checked
- assert not top.has_attr("checked")
- # bottom not checked
- assert not bottom.has_attr("checked")
-
- # click top
- action = f"mouse_click({repr(top_x)}, {repr(top_y)})"
-
- obs, reward, terminated, truncated, info = env.step(action)
-
- top, bottom = get_top_bottom_elems(obs)
- top_x, top_y = extract_coords_from_elem(top)
- bottom_x, bottom_y = extract_coords_from_elem(bottom)
-
- # no error
- assert not obs["last_action_error"]
- # top checked
- assert top.has_attr("checked")
- # bottom not checked
- assert not bottom.has_attr("checked")
-
- top, bottom = get_top_bottom_elems(obs)
- top_x, top_y = extract_coords_from_elem(top)
- bottom_x, bottom_y = extract_coords_from_elem(bottom)
-
- # click bottom
- action = f"mouse_click({repr(bottom_x)}, {repr(bottom_y)})"
-
- obs, reward, terminated, truncated, info = env.step(action)
-
- top, bottom = get_top_bottom_elems(obs)
- top_x, top_y = extract_coords_from_elem(top)
- bottom_x, bottom_y = extract_coords_from_elem(bottom)
-
- # no error (click coordinates out of viewport is a silent fail in playwright)
- assert not obs["last_action_error"]
- # top checked
- assert top.has_attr("checked")
- # bottom not checked (click didn't go through)
- assert not bottom.has_attr("checked")
-
- # scroll up
- action = f"scroll(0, -500)"
-
- obs, reward, terminated, truncated, info = env.step(action)
-
- top, bottom = get_top_bottom_elems(obs)
- prev_top_x, prev_top_y = top_x, top_y
- top_x, top_y = extract_coords_from_elem(top)
- prev_bottom_x, prev_bottom_y = bottom_x, bottom_y
- bottom_x, bottom_y = extract_coords_from_elem(bottom)
-
- # no error
- assert not obs["last_action_error"]
-
- # no movement
- assert prev_top_x == top_x and prev_top_y == top_y
- assert prev_bottom_x == bottom_x and prev_bottom_y == bottom_y
-
- # scroll down
- action = f"scroll(0, 500)"
-
- obs, reward, terminated, truncated, info = env.step(action)
-
- top, bottom = get_top_bottom_elems(obs)
- prev_top_x, prev_top_y = top_x, top_y
- top_x, top_y = extract_coords_from_elem(top)
- prev_bottom_x, prev_bottom_y = bottom_x, bottom_y
- bottom_x, bottom_y = extract_coords_from_elem(bottom)
-
- # no error
- assert not obs["last_action_error"]
-
- # movement
- assert prev_top_x == top_x and prev_top_y > top_y
- assert prev_bottom_x == bottom_x and prev_bottom_y > bottom_y
-
- env.close()
-
-
-def test_tab_actions():
- action_set = HighLevelActionSet(subsets=["tab", "nav"])
-
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": CHECKBOX_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
- obs, info = env.reset()
- assert not obs["last_action_error"]
- assert len(obs["open_pages_urls"]) == 1
- assert len(obs["open_pages_titles"]) == 1
- assert obs["active_page_index"] == 0
- assert obs["open_pages_urls"][obs["active_page_index"][0]] == obs["url"]
-
- obs, reward, terminated, truncated, info = env.step("new_tab()")
- assert not obs["last_action_error"]
- assert len(obs["open_pages_urls"]) == 2
- assert len(obs["open_pages_titles"]) == 2
- assert obs["active_page_index"] == 1
- assert obs["open_pages_urls"][obs["active_page_index"][0]] == obs["url"]
-
- obs, reward, terminated, truncated, info = env.step(f"goto({repr(TEXTBOX_URL)})")
- assert not obs["last_action_error"]
- assert len(obs["open_pages_urls"]) == 2
- assert len(obs["open_pages_titles"]) == 2
- assert obs["active_page_index"] == 1
- assert obs["open_pages_urls"][obs["active_page_index"][0]] == obs["url"]
-
- obs, reward, terminated, truncated, info = env.step("tab_focus(0)")
- assert not obs["last_action_error"]
- assert len(obs["open_pages_urls"]) == 2
- assert len(obs["open_pages_titles"]) == 2
- assert obs["active_page_index"] == 0
- assert obs["open_pages_urls"][obs["active_page_index"][0]] == obs["url"]
-
- obs, reward, terminated, truncated, info = env.step("tab_close()")
- assert not obs["last_action_error"]
- assert len(obs["open_pages_urls"]) == 1
- assert len(obs["open_pages_titles"]) == 1
- assert obs["active_page_index"] == 0
- assert obs["open_pages_urls"][obs["active_page_index"][0]] == obs["url"]
-
- env.close()
-
-
-def test_mouse_down_up():
- action_set = HighLevelActionSet(subsets=["bid", "coord"])
-
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": CHECKBOX_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
-
- def get_checkbox_elem(obs):
- soup = bs4.BeautifulSoup(
- flatten_dom_to_str(
- obs["dom_object"], obs["extra_element_properties"], with_center_coords=True
- ),
- "lxml",
- )
- checkbox = soup.find("input", attrs={"type": "checkbox", "id": "vehicle1"})
- return checkbox
-
- obs, info = env.reset()
- checkbox = get_checkbox_elem(obs)
-
- # box not checked
- assert not obs["last_action_error"]
- assert not checkbox.has_attr("checked")
-
- # click box 1 time
- x, y = ast.literal_eval(checkbox.get("center"))
- action = f"""\
-mouse_click({repr(x)}, {repr(y)})
-"""
- python_action = action_set.to_python_code(action)
-
- assert python_action.count("\nmouse_") == 1
-
- obs, reward, term, trunc, info = env.step(action)
- checkbox = get_checkbox_elem(obs)
-
- # box checked
- assert not obs["last_action_error"]
- assert checkbox.has_attr("checked")
-
- # click box 1 time
- x, y = ast.literal_eval(checkbox.get("center"))
- action = f"""\
-mouse_move(0, 0)
-mouse_move({repr(x)}, {repr(y)})
-mouse_down({repr(x)}, {repr(y)})
-mouse_up({repr(x)}, {repr(y)})
-"""
- python_action = action_set.to_python_code(action)
-
- assert python_action.count("\nmouse_") == 4
-
- obs, reward, term, trunc, info = env.step(action)
- checkbox = get_checkbox_elem(obs)
-
- # box not checked
- assert not obs["last_action_error"]
- assert not checkbox.has_attr("checked")
-
- # click box 2 times
- x, y = ast.literal_eval(checkbox.get("center"))
- action = f"""\
-mouse_move(0, 0)
-mouse_move({repr(x)}, {repr(y)})
-mouse_down({repr(x)}, {repr(y)}, button="left")
-mouse_up({repr(x)}, {repr(y)}, "left")
-mouse_down({repr(x)}, {repr(y)})
-mouse_up({repr(x)}, {repr(y)})
-"""
- python_action = action_set.to_python_code(action)
-
- assert python_action.count("\nmouse_") == 6
-
- obs, reward, term, trunc, info = env.step(action)
- checkbox = get_checkbox_elem(obs)
-
- # box not checked
- assert not obs["last_action_error"]
- assert not checkbox.has_attr("checked")
-
-
-# test that forced action can click an obstructed element
-@pytest.mark.parametrize("retry_with_force", [True, False])
-def test_forced_actions(retry_with_force):
- action_set = HighLevelActionSet(subsets=["bid"], retry_with_force=retry_with_force)
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": OBSTRUCTED_CHECKBOX_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
-
- obs, info = env.reset()
-
- def get_checkbox(obs):
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
- checkbox = soup.find("input", attrs={"id": "hobbies-checkbox-1"})
- return checkbox
-
- checkbox = get_checkbox(obs)
-
- action = f"""
- click({repr(checkbox.get(BID_ATTR))})
- """
-
- obs, reward, terminated, truncated, info = env.step(action)
- checkbox = get_checkbox(obs)
- if retry_with_force:
- assert not obs["last_action_error"]
- assert checkbox.get("checked", False) == False
- else:
- assert obs["last_action_error"]
- assert checkbox.has_attr("checked")
-
- env.close()
-
-
-# TODO investigate why it takes ~1sec to mark each frame, although they are very small, and if we can do something about it
-@pytest.mark.slow
-def test_iframe_bid():
- action_set = HighLevelActionSet(subsets=["bid"])
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": LOTS_OF_IFRAMES_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
-
- obs, info = env.reset()
-
- def get_checkbox(obs, i):
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
- checkbox = soup.find("input", attrs={"id": f"checkbox{i}"})
- return checkbox
-
- # try to click on checkboxes
- checkboxes = [
- (0, "a"),
- # (5, "f"),
- # (26, "aA"),
- (29, "aD"),
- ]
- for id, iframe_bid in checkboxes:
-
- # try to click on checkbox
- checkbox = get_checkbox(obs, id)
- bid = checkbox.get(BID_ATTR)
-
- # iframe bid should match
- assert re.match(f"^{iframe_bid}[0-9]+$", bid)
-
- action = f"""
- click({repr(bid)})
- """
-
- obs, reward, terminated, truncated, info = env.step(action)
- assert not obs["last_action_error"]
-
- # checkbox should get checked
- checkbox = get_checkbox(obs, id)
- assert checkbox.has_attr("checked")
-
- env.close()
diff --git a/BrowserGym/tests/core/test_actions_python.py b/BrowserGym/tests/core/test_actions_python.py
deleted file mode 100644
index 69cc6237bb1f128709578f7ea84a969cfb33adf8..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/test_actions_python.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import pytest
-
-from browsergym.core.action.python import PythonActionSet
-
-
-ACTIONS_TO_TEST = [
- (
- """\
-a = 0
-""",
- """\
-a = 0
-""",
- ),
- (
- """\
-```
-a = 0
-```
-""",
- """\
-a = 0
-""",
- ),
- (
- """\
-```python
-a = 0
-```
-""",
- """\
-a = 0
-""",
- ),
- (
- """\
-```python
-a = 0
-```
-This is an explanation
-```python
-b = 3
-```
-More explanations
-""",
- """\
-a = 0
-
-b = 3
-""",
- ),
-]
-
-
-@pytest.mark.parametrize("action,expected_code", ACTIONS_TO_TEST)
-def test_action_cleaning(action, expected_code):
- action_set = PythonActionSet()
- code = action_set.to_python_code(action)
-
- assert code == expected_code
diff --git a/BrowserGym/tests/core/test_gym_envs.py b/BrowserGym/tests/core/test_gym_envs.py
deleted file mode 100644
index 48fca3a32e119ee0a1e58440565c8e10d1fca2d7..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/test_gym_envs.py
+++ /dev/null
@@ -1,313 +0,0 @@
-import os
-import pathlib
-from time import time
-
-import bs4
-import gymnasium as gym
-import pytest
-
-# register openended gym environments
-import browsergym.core
-import browsergym.core.action
-from browsergym.core.action.highlevel import HighLevelActionSet
-from browsergym.core.action.python import PythonActionSet
-from browsergym.core.constants import BROWSERGYM_ID_ATTRIBUTE as BID_ATTR
-from browsergym.utils.obs import flatten_dom_to_str
-
-__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
-__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
-__TIMEOUT = 500
-
-__DATA_DIR = pathlib.Path(__file__).resolve().parent / "data"
-TEST_PAGE = f"file://{__DATA_DIR}/test_page.html"
-BASIC_IFRAME_PAGE = f"file://{__DATA_DIR}/basic_iframe_site/basic_iframe_2.html"
-
-
-def test_gym_env():
- action_set = PythonActionSet()
-
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": TEST_PAGE},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
- obs, info = env.reset()
-
- assert not obs["last_action_error"]
-
- obs, reward, term, trunc, info = env.step(
- f"""\
-page.get_by_label("Name:").click()
-page.get_by_label("Name:").fill("Janice")
-page.get_by_label("Name:").press("Tab")
-page.get_by_label("Email:").fill("janice@mail.com")
-page.get_by_label("Email:").press("Tab")
-page.get_by_label("Age:", exact=True).fill("21")
-page.get_by_label("Age:", exact=True).press("Tab")
-"""
- )
-
- assert obs["last_action_error"] == ""
- assert reward == 0
- assert term == False
- assert trunc == False
-
- obs, reward, term, trunc, info = env.step(
- f"""\
-page.get_by_label("Message:").fill("Hello")
-page.get_by_label("Message:").press("Tab")
-page.get_by_label("Subscribe to newsletter").check()
-page.get_by_label("Subscribe to newsletter").press("Tab")
-page.get_by_role("button", name="Submit").press("Enter")
-"""
- )
-
- assert obs["last_action_error"] == ""
- assert reward == 0
- assert term == False
- assert trunc == False
-
- obs, reward, term, trunc, info = env.step(
- f"""\
-page.get_by_label("LABEL DOES NOT EXIST:").fill("Hello")
-page.get_by_role("button", name="Submit").press("Enter")
-"""
- )
-
- assert obs["last_action_error"] != ""
- assert reward == 0
- assert term == False
- assert trunc == False
-
- env.close()
-
-
-def test_max_episode_steps():
- # no max_steps
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": TEST_PAGE},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- )
- obs, info = env.reset()
-
- obs, reward, term, trunc, info = env.step("")
-
- assert term == False
- assert trunc == False
-
- obs, reward, term, trunc, info = env.step("")
-
- assert term == False
- assert trunc == False
-
- # max_steps = 2
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": TEST_PAGE},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- max_episode_steps=2,
- )
- obs, info = env.reset()
-
- obs, reward, term, trunc, info = env.step("")
-
- assert term == False
- assert trunc == False
-
- obs, reward, term, trunc, info = env.step("")
-
- assert term == False
- assert trunc == True
-
- env.close()
-
-
-def test_active_page():
- action_set = PythonActionSet()
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": TEST_PAGE},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
- obs, info = env.reset()
-
- assert len(obs["open_pages_urls"]) == 1
- assert obs["active_page_index"] == 0
-
- obs, reward, term, trunc, info = env.step("page.context.new_page()")
-
- assert len(obs["open_pages_urls"]) == 2
- assert obs["active_page_index"] == 1
-
- obs, reward, term, trunc, info = env.step("page.context.pages[0].mouse.click(5, 5)")
-
- assert len(obs["open_pages_urls"]) == 2
- assert obs["active_page_index"] == 0
-
- obs, reward, term, trunc, info = env.step("page.context.pages[1].mouse.click(5, 5)")
-
- assert len(obs["open_pages_urls"]) == 2
- assert obs["active_page_index"] == 1
-
- obs, reward, term, trunc, info = env.step("page.context.pages[1].close()")
-
- assert len(obs["open_pages_urls"]) == 1
- assert obs["active_page_index"] == 0
-
- obs, reward, term, trunc, info = env.step("page.close()")
-
- assert len(obs["open_pages_urls"]) == 1
- assert obs["active_page_index"] == 0
-
- obs, reward, term, trunc, info = env.step("page.context.new_page()")
-
- assert len(obs["open_pages_urls"]) == 2
- assert obs["active_page_index"] == 1
-
- obs, reward, term, trunc, info = env.step("page.close()")
-
- assert len(obs["open_pages_urls"]) == 1
- assert obs["active_page_index"] == 0
-
- env.close()
-
-
-def test_nested_iframes_default_demo_mode():
- demo_mode = "default"
- action_set = HighLevelActionSet(demo_mode=demo_mode)
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": BASIC_IFRAME_PAGE},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
- obs, info = env.reset()
- assert not obs["last_action_error"]
-
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
- inner_checkbox = soup.find("input", attrs={"id": "checkbox_2"})
-
- assert inner_checkbox.has_attr("checked")
- # click box
- action = f"""\
-click({repr(inner_checkbox.get(BID_ATTR))})
-"""
- click_start = time()
- obs, _, _, _, _ = env.step(action)
- click_end = time()
- # clicking should be slow in demo mode
- assert click_end - click_start > 1
-
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
- inner_checkbox = soup.find("input", attrs={"id": "checkbox_2"})
- # box is not checked; meaning it was clicked by the previous action
- assert not inner_checkbox.has_attr("checked")
-
- env.close()
-
-
-@pytest.mark.parametrize("global_demo_mode", [True, False])
-@pytest.mark.parametrize("demo_mode", [None, "off", "default", "only_visible_elements", "all_blue"])
-def test_demo_mode(global_demo_mode: bool, demo_mode: str):
- action_set = HighLevelActionSet(demo_mode=demo_mode)
- browsergym.core.action.set_global_demo_mode(global_demo_mode)
-
- demo_mode_active = (global_demo_mode and demo_mode is None) or (
- demo_mode is not None and demo_mode != "off"
- )
-
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": TEST_PAGE},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=action_set.to_python_code,
- )
- obs, info = env.reset()
- assert not obs["last_action_error"]
-
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
- email_field = soup.find("input", attrs={"id": "email"})
- checkbox = soup.find("input", attrs={"id": "subscribe"})
-
- # check that the email field is empty
- assert email_field.get("value") == ""
-
- # check that the box is not checked
- assert not checkbox.has_attr("checked")
-
- # click box
- action = f"""\
-click({repr(checkbox.get(BID_ATTR))})
-"""
- obs, reward, terminated, truncated, info = env.step(action)
- assert not obs["last_action_error"]
-
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
- checkbox = soup.find("input", attrs={"type": "checkbox", "id": "subscribe"})
-
- # check that the box is checked
- assert checkbox.has_attr("checked")
-
- # clicking should be slow (only in demo mode)
- action_time = info["action_exec_stop"] - info["action_exec_start"]
- if demo_mode_active:
- assert action_time > 2
- else:
- assert action_time <= 1.5
-
- # fill box
- action = f"""\
-fill({repr(email_field.get(BID_ATTR))}, "test@test")
-"""
- obs, reward, terminated, truncated, info = env.step(action)
- assert not obs["last_action_error"]
-
- soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
-
- # email field has been filled correctly
- email_field = soup.find("input", attrs={"id": "email"})
- assert email_field.get("value") == "test@test"
-
- # typing should be slow (only in demo mode)
- action_time = info["action_exec_stop"] - info["action_exec_start"]
- if demo_mode_active:
- assert action_time > 2
- else:
- assert action_time <= 1.5
-
- env.close()
-
-
-@pytest.mark.parametrize("resizeable_window", (True, False))
-@pytest.mark.parametrize("size", ((1600, 1200), (800, 800)))
-def test_resizeable_window(resizeable_window, size):
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": TEST_PAGE},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- viewport={"width": size[0], "height": size[1]},
- resizeable_window=resizeable_window,
- )
- obs, info = env.reset()
- assert not obs["last_action_error"]
-
- assert (obs["screenshot"].shape[1], obs["screenshot"].shape[0]) == size
-
- env.close()
diff --git a/BrowserGym/tests/core/test_observation.py b/BrowserGym/tests/core/test_observation.py
deleted file mode 100644
index 36bb341937b265d8792199ac4722d404b3199049..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/core/test_observation.py
+++ /dev/null
@@ -1,819 +0,0 @@
-import ast
-import os
-from pathlib import Path
-
-import bs4
-import gymnasium as gym
-import numpy as np
-import pytest
-import regex as re
-
-# register gym environments
-import browsergym.core
-from browsergym.core.constants import BROWSERGYM_ID_ATTRIBUTE as BID_ATTR
-from browsergym.core.observation import (
- _post_extract,
- _pre_extract,
- extract_all_frame_axtrees,
- extract_dom_snapshot,
- extract_merged_axtree,
- extract_screenshot,
-)
-from browsergym.utils.obs import flatten_axtree_to_str, flatten_dom_to_str
-
-__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
-__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
-__TIMEOUT = 500
-__VIEWPORT = {"width": 800, "height": 600}
-
-__DATA_DIR = Path(__file__).resolve().parent / "data"
-
-TEST_PAGE = f"file://{__DATA_DIR}/test_page.html"
-TEST_PAGE_2 = f"file://{__DATA_DIR}/test_page_2.html"
-MULTI_IFRAME_URL = f"file://{__DATA_DIR}/basic_iframe_site/basic_iframe_2.html"
-SHADOW_DOM_URL = f"file://{__DATA_DIR}/basic_shadow_dom_site/basic_shadow_dom.html"
-SIMPLE_SHADOW_DOM_URL = f"file://{__DATA_DIR}/basic_shadow_dom_site/simple_shadow_dom.html"
-BASIC_IFRAME_URL = f"file://{__DATA_DIR}/basic_shadow_iframe_site/basic_iframe.html"
-BASIC_IFRAME_2_URL = f"file://{__DATA_DIR}/basic_shadow_iframe_site/basic_iframe_2.html"
-INNER_IFRAME_URL = f"file://{__DATA_DIR}/basic_shadow_iframe_site/inner-iframe.html"
-OUTER_IFRAME_URL = f"file://{__DATA_DIR}/basic_shadow_iframe_site/outer-iframe.html"
-CUSTOM_PAGE_URL = f"file://{__DATA_DIR}/custom_page/basic_iframe.html"
-MULTI_IFRAME_URL = f"file://{__DATA_DIR}/basic_iframe_site/basic_iframe_2.html"
-
-
-@pytest.mark.skip(reason="TODO: how to get the final viewport size right?")
-def test_extract_screenshot():
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": TEST_PAGE},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- viewport=__VIEWPORT,
- timeout=__TIMEOUT,
- )
- obs, info = env.reset()
-
- _pre_extract(env.unwrapped.page)
- screenshot = extract_screenshot(env.unwrapped.page)
- _post_extract(env.unwrapped.page)
-
- # 3D array (height, width, rgb) of unsigned bytes (between 0 and 255)
- assert isinstance(screenshot, np.ndarray)
- assert len(screenshot.shape) == 3
- assert screenshot.shape[0] == __VIEWPORT["height"]
- assert screenshot.shape[1] == __VIEWPORT["width"]
- assert screenshot.shape[2] == 3 # RGB
- assert screenshot.dtype == np.uint8
-
- env.close()
-
-
-def test_extract_axtree_simple():
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": TEST_PAGE},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- viewport=__VIEWPORT,
- timeout=__TIMEOUT,
- )
- obs, info = env.reset()
-
- _pre_extract(env.unwrapped.page)
- all_frame_axtrees = extract_all_frame_axtrees(env.unwrapped.page)
- merged_axtree = extract_merged_axtree(env.unwrapped.page)
- _post_extract(env.unwrapped.page)
-
- # single frame
- assert len(all_frame_axtrees) == 1
- assert len(next(iter(all_frame_axtrees.values()))["nodes"]) == len(merged_axtree["nodes"])
-
- env.close()
-
-
-def test_extract_axtree_multi_iframe():
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": MULTI_IFRAME_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- viewport=__VIEWPORT,
- timeout=__TIMEOUT,
- )
- obs, info = env.reset()
-
- _pre_extract(env.unwrapped.page)
- all_frame_axtrees = extract_all_frame_axtrees(env.unwrapped.page)
- merged_axtree = extract_merged_axtree(env.unwrapped.page)
- _post_extract(env.unwrapped.page)
-
- # multiple frames
- assert len(all_frame_axtrees) == 3
-
- # total number of nodes in merged and individual frame axtrees should be equal
- n_nodes = 0
- for frame_id, frame_axtree in all_frame_axtrees.items():
- n_nodes += len(frame_axtree["nodes"])
-
- assert n_nodes == len(merged_axtree["nodes"])
-
- env.close()
-
-
-def test_extract_dom_simple():
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": TEST_PAGE},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- viewport=__VIEWPORT,
- timeout=__TIMEOUT,
- )
- obs, info = env.reset()
-
- _pre_extract(env.unwrapped.page)
- dom_snapshot = extract_dom_snapshot(env.unwrapped.page)
- _post_extract(env.unwrapped.page)
-
- # single frame
- assert len(dom_snapshot["documents"]) == 1
-
- env.close()
-
-
-def test_extract_dom_multi_iframe():
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": MULTI_IFRAME_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- viewport=__VIEWPORT,
- timeout=__TIMEOUT,
- )
- obs, info = env.reset()
-
- _pre_extract(env.unwrapped.page)
- dom_snapshot = extract_dom_snapshot(env.unwrapped.page)
- _post_extract(env.unwrapped.page)
-
- # multiple frames
- assert len(dom_snapshot["documents"]) == 3
-
- env.close()
-
-
-def test_simple_shadowdom():
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": SIMPLE_SHADOW_DOM_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- viewport=__VIEWPORT,
- timeout=__TIMEOUT,
- )
- obs, info = env.reset()
-
- # retrieve an input element inside the shadowDOM
- elem = env.unwrapped.page.get_by_placeholder("Level 1.1 Text Field 1")
- assert elem.count() == 1
-
- # elem should have a browsergym_id in its BID_ATTR attribute
- elem_id = elem.get_attribute(BID_ATTR)
- assert elem_id is not None
-
- # elem should not have an aria-description (it should have been cleaned)
- aria_description = elem.get_attribute("aria-description")
- assert aria_description is None
-
- # elem should not have an aria-roledescription (it should have been cleaned)
- aria_roledescription = elem.get_attribute("aria-roledescription")
- assert aria_roledescription is None
-
- # check that elem can be retrieved correctly using its browsergym_id
- elem2 = env.unwrapped.page.get_by_test_id(elem_id)
- assert elem2.count() == 1
- assert env.unwrapped.page.evaluate(
- "([node1, node2]) => {return node1.isEqualNode(node2);}",
- [elem.element_handle(), elem2.element_handle()],
- )
-
- env.close()
-
-
-def test_nested_shadowdom():
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": SHADOW_DOM_URL},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- viewport=__VIEWPORT,
- timeout=__TIMEOUT,
- )
- obs, info = env.reset()
-
- # retrieve an input element inside the nested shadowDOM
- elem = env.unwrapped.page.get_by_placeholder("Level 2.4 Text Field 2")
- assert elem.count() == 1
-
- # elem should have a browsergym_id in its BID_ATTR attribute
- elem_id = elem.get_attribute(BID_ATTR)
- assert elem_id is not None
-
- # elem should not have an aria-description (it should have been cleaned)
- aria_description = elem.get_attribute("aria-description")
- assert aria_description is None
-
- # elem should not have an aria-roledescription (it should have been cleaned)
- aria_roledescription = elem.get_attribute("aria-roledescription")
- assert aria_roledescription is None
-
- # check that elem can be retrieved correctly using its browsergym_id
- elem2 = env.unwrapped.page.get_by_test_id(elem_id)
- assert elem2.count() == 1
- assert env.unwrapped.page.evaluate(
- "([node1, node2]) => {return node1.isEqualNode(node2);}",
- [elem.element_handle(), elem2.element_handle()],
- )
-
- env.close()
-
-
-@pytest.mark.parametrize(
- "url",
- [
- TEST_PAGE,
- MULTI_IFRAME_URL,
- SIMPLE_SHADOW_DOM_URL,
- BASIC_IFRAME_URL,
- BASIC_IFRAME_2_URL,
- INNER_IFRAME_URL,
- OUTER_IFRAME_URL,
- ],
-)
-def test_dom_has_bids_no_aria(url):
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": url},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- viewport=__VIEWPORT,
- timeout=__TIMEOUT,
- )
- obs, info = env.reset()
-
- # exceptions
- dom_node_names_without_bid = ["html", "#text", "#document", "#comment"]
- axtree_roles_without_bid = ["RootWebArea", "none", "generic", "StaticText", "InlineTextBox"]
-
- # 1. test the DOM snapshot for BID_ATTR, "aria-description" and "aria-roledescription"
-
- # check all HTML elements in the DOM for unique browsergym id
- dom = obs["dom_object"]
- bids = []
- for doc in dom["documents"]:
- for node_name_id, attributes in zip(doc["nodes"]["nodeName"], doc["nodes"]["attributes"]):
- node_name = dom["strings"][node_name_id]
- # read the node's attributes
- j = 0
- bid = None
- while j < len(attributes):
- attr_name = dom["strings"][attributes[j]]
- attr_value = dom["strings"][attributes[j + 1]]
-
- # print(f"{node_name} {attr_name}: {attr_value}")
-
- # check that the "aria-roledescription" attribute is absent (this is specific to this test page)
- assert attr_name != "aria-roledescription"
-
- # check that the "aria-description" attribute is absent (this is specific to this test page)
- assert attr_name != "aria-description"
-
- # extract the browsergym id from the BID_ATTR attribute
- if attr_name == BID_ATTR:
- bid = attr_value
- j += 2
-
- # check that all elements (with exceptions) have a browsergym id
- if node_name not in dom_node_names_without_bid:
- assert bid is not None
-
- if bid is not None:
- bids.append(bid)
-
- # check that all browsergym ids are unique
- assert len(bids) == len(set(bids))
-
- # 2. test the AXTree for "browsergym_id" and "description" properties
- axtree = obs["axtree_object"]
- bids = []
- for node in axtree["nodes"]:
- bid = node.get("browsergym_id", None)
-
- # check that the "aria-roledescription" attribute is absent (this is specific to this test page)
- for property in node.get("properties", []):
- assert property["name"] != "roledescription"
-
- # check that the "aria-description" attribute is absent (this is specific to this test page)
- assert "description" not in node
-
- # check that all elements (with exceptions) have a browsergym id
- if node["role"]["value"] not in axtree_roles_without_bid:
- assert bid is not None
-
- if bid is not None:
- bids.append(bid)
-
- # check that all browsergym ids are unique
- assert len(bids) == len(set(bids))
-
- env.close()
-
-
-def test_dom_to_text():
- env = gym.make(
- "browsergym/openended",
- task_kwargs={"start_url": TEST_PAGE_2},
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- timeout=__TIMEOUT,
- action_mapping=None,
- )
- obs, info = env.reset()
-
- dom = flatten_dom_to_str(obs["dom_object"])
- assert isinstance(dom, str)
- assert "Subscribe to newsletter" in dom
- assert "Janice" not in dom
-
- obs, reward, term, trunc, info = env.step(
- f"""\
-page.get_by_label("Name:").click()
-page.get_by_label("Name:").fill("Janice")
-page.get_by_label("Name:").press("Tab")
-page.get_by_label("Email:").fill("janice@mail.com")
-page.get_by_label("Email:").press("Tab")
-page.get_by_label("Age:", exact=True).fill("21")
-page.get_by_label("Age:", exact=True).press("Tab")
-"""
- )
-
- dom = flatten_dom_to_str(obs["dom_object"])
- assert "Janice" in dom
- assert "janice@mail.com" in dom
-
- dom = flatten_dom_to_str(
- obs["dom_object"],
- extra_properties=obs["extra_element_properties"],
- with_visible=True,
- with_clickable=True,
- with_center_coords=True,
- with_bounding_box_coords=True,
- with_som=True,
- )
- assert 'box="(' in dom
- assert 'center="(' in dom
- assert 'clickable="" som="" type="submit" value="Submit" visible=""' in dom
- assert 'head bid="1">' in dom
- assert 'clickable="" for="email" visible=""' in dom
- assert "Text within a non-html tag" in dom
- assert "Text that should not be visible" in dom
-
- dom = flatten_dom_to_str(
- obs["dom_object"], extra_properties=obs["extra_element_properties"], filter_som_only=True
- )
- assert 'for="email"' not in dom
- assert 'type="submit" value="Submit"' in dom
- assert "Text within a non-html tag" not in dom
- assert "Text that should not be visible" not in dom
-
- dom = flatten_dom_to_str(
- obs["dom_object"],
- extra_properties=obs["extra_element_properties"],
- filter_visible_only=True,
- )
- assert " None:
- """
- Args:
- seed: random seed.
- start_url: str, the url for the starting page.
- goal: str, the initial goal.
-
- """
- super().__init__(seed)
- self.start_url = start_url
- self.goal = [
- {"type": "text", "text": "This is a mock task with an image goal."},
- {
- "type": "image_url",
- "image_url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII=",
- },
- ]
-
- def setup(self, page: playwright.sync_api.Page) -> tuple[str, dict]:
- page.goto(self.start_url, timeout=10000)
- return self.goal, {}
-
- def teardown(self) -> None:
- pass
-
- def validate(
- self, page: playwright.sync_api.Page, chat_messages: list[str]
- ) -> Tuple[float, bool, str, dict]:
- reward, done, msg, info = 0, False, "", {}
-
- for message in chat_messages:
- if message["role"] == "user" and message["message"] == "exit":
- done = True
- break
-
- return reward, done, msg, info
-
-
-def test_mock_image_goal_task():
- env = BrowserEnv(MockImageGoalTask)
- obs, _ = env.reset()
-
- assert "goal_object" in obs
- assert len(obs["goal_object"]) == 2
- assert obs["goal_object"][0]["type"] == "text"
- assert obs["goal_object"][0]["text"] == "This is a mock task with an image goal."
- assert obs["goal_object"][1]["type"] == "image_url"
-
- env.chat.add_message("user", "exit")
- obs, reward, terminated, _, _ = env.step("send_msg_to_user('bye')")
-
- assert reward == 0
- assert terminated is True
-
- env.close()
-
-
-if __name__ == "__main__":
- test_mock_image_goal_task()
diff --git a/BrowserGym/tests/experiments/__init__.py b/BrowserGym/tests/experiments/__init__.py
deleted file mode 100644
index 75f09d6fbde51609da41e1041eb3fb8125d808cb..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/experiments/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# bugfix: use same playwright instance in browsergym and pytest
-from ..utils import setup_playwright
diff --git a/BrowserGym/tests/experiments/test_benchmark.py b/BrowserGym/tests/experiments/test_benchmark.py
deleted file mode 100644
index 9222be11c98c628551499aab0d43cf218a0fcc30..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/experiments/test_benchmark.py
+++ /dev/null
@@ -1,351 +0,0 @@
-import dataclasses
-import os
-import random
-import re
-import tempfile
-
-import numpy as np
-import pytest
-
-from browsergym.core.action.base import AbstractActionSet
-from browsergym.experiments.agent import Agent
-from browsergym.experiments.benchmark import Benchmark, HighLevelActionSetArgs
-from browsergym.experiments.benchmark.configs import DEFAULT_BENCHMARKS
-from browsergym.experiments.benchmark.utils import make_env_args_list_from_fixed_seeds
-from browsergym.experiments.loop import AbstractAgentArgs, ExpArgs, get_exp_result
-from browsergym.utils.obs import flatten_axtree_to_str
-
-
-class MiniwobTestAgent(Agent):
-
- def __init__(self, action_set: AbstractActionSet):
- self.action_set = action_set
-
- def obs_preprocessor(self, obs: dict):
- return {"axtree_txt": flatten_axtree_to_str(obs["axtree_object"])}
-
- def get_action(self, obs: dict) -> tuple[str, dict]:
- match = re.search(r"^\s*\[(\d+)\].*button", obs["axtree_txt"], re.MULTILINE | re.IGNORECASE)
-
- if match:
- bid = match.group(1)
- action = f'click("{bid}")'
- else:
- raise Exception("Can't find the button's bid")
-
- return action, dict(think="I'm clicking the button as requested.")
-
-
-@dataclasses.dataclass
-class MiniwobTestAgentArgs(AbstractAgentArgs):
- high_level_action_set: HighLevelActionSetArgs = None
-
- def make_agent(self):
- return MiniwobTestAgent(action_set=self.high_level_action_set.make_action_set())
-
-
-def test_build_benchmarks():
- expected_bench_size = {
- "miniwob": 125 * 5,
- "miniwob_tiny_test": 2 * 2,
- "webarena": 812,
- "webarena_tiny": 6,
- "visualwebarena": 910,
- "visualwebarena_tiny": 4,
- "workarena_l1": 33 * 10,
- "workarena_l2_agent_curriculum_eval": 235,
- "workarena_l3_agent_curriculum_eval": 235,
- "assistantbench": 214,
- "weblinx": 31586,
- }
- for name, benchmark_builder in DEFAULT_BENCHMARKS.items():
- benchmark = benchmark_builder()
- assert name == benchmark.name
- assert benchmark.env_args_list # non-empty
- assert benchmark.task_metadata is not None
- assert len(benchmark.env_args_list) == expected_bench_size[name]
- benchmark_bis = Benchmark.from_json(benchmark.to_json())
- assert benchmark.to_dict() == benchmark_bis.to_dict()
-
-
-def test_benchmark_subset():
- benchmark: Benchmark = DEFAULT_BENCHMARKS["miniwob"]()
-
- benchmark_subset = benchmark.subset_from_regexp(column="task_name", regexp="click")
- assert len(benchmark_subset.env_args_list) == 31 * 5
- assert benchmark_subset.name == "miniwob[task_name=/click/]"
-
- benchmark_subset_1 = benchmark_subset.subset_from_regexp(
- column="miniwob_category", regexp="original"
- )
- benchmark_subset_2 = benchmark_subset.subset_from_glob(
- column="miniwob_category", glob="original"
- )
-
- assert benchmark_subset_1.name == "miniwob[task_name=/click/][miniwob_category=/original/]"
- assert benchmark_subset_2.name == "miniwob[task_name=/click/][miniwob_category=original]"
-
- dict_1 = benchmark_subset_1.to_dict()
- dict_1.pop("name")
- dict_2 = benchmark_subset_2.to_dict()
- dict_2.pop("name")
-
- assert dict_1 == dict_2
-
-
-def test_benchmark_subset_from_task_ratio():
- benchmark: Benchmark = DEFAULT_BENCHMARKS["webarena"]()
-
- # Store initial random state
- initial_state = random.getstate()
-
- benchmark_subset = benchmark.subset_from_task_ratio(ratio=0.5, seed=1)
- assert len(benchmark_subset.env_args_list) == 812 // 2
- assert benchmark_subset.name == "webarena[ratio=0.5, seed=1]"
-
- # Verify global random state hasn't changed
- assert random.getstate() == initial_state
-
- benchmark_subset_1 = benchmark_subset.subset_from_task_ratio(ratio=0.5, seed=1)
- benchmark_subset_2 = benchmark_subset.subset_from_task_ratio(ratio=0.5, seed=2)
-
- # Verify global random state still hasn't changed
- assert random.getstate() == initial_state
-
- # Check the task lists are different
- assert not np.all(
- [
- env_args.task_name == env_args_2.task_name
- for env_args, env_args_2 in zip(
- benchmark_subset_1.env_args_list, benchmark_subset_2.env_args_list
- )
- ]
- )
-
- dict_1 = benchmark_subset_1.to_dict()
- dict_1.pop("name")
- dict_2 = benchmark_subset_2.to_dict()
- dict_2.pop("name")
- assert len(dict_1["env_args_list"]) == len(dict_2["env_args_list"])
- assert dict_1 != dict_2
-
-
-def test_prepare_backend_miniwob():
- MINIWOB_URL = os.environ["MINIWOB_URL"]
- try:
- benchmark: Benchmark = DEFAULT_BENCHMARKS["miniwob"]()
-
- benchmark.prepare_backends()
-
- del os.environ["MINIWOB_URL"]
- with pytest.raises(Exception):
- benchmark.prepare_backends()
-
- os.environ["MINIWOB_URL"] = ""
- with pytest.raises(Exception):
- benchmark.prepare_backends()
- finally:
- os.environ["MINIWOB_URL"] = MINIWOB_URL
-
-
-def test_prepare_backend_assistantbench():
- benchmark: Benchmark = DEFAULT_BENCHMARKS["assistantbench"]()
- benchmark.prepare_backends()
-
-
-@pytest.mark.skip
-def test_prepare_backend_webarena():
- WA_FULL_RESET = os.environ["WA_FULL_RESET"]
- try:
- benchmark: Benchmark = DEFAULT_BENCHMARKS["webarena"]()
-
- benchmark.prepare_backends()
-
- del os.environ["WA_FULL_RESET"]
- with pytest.raises(Exception):
- benchmark.prepare_backends()
-
- os.environ["WA_FULL_RESET"] = "http://localhost:12345/reset"
- with pytest.raises(Exception):
- benchmark.prepare_backends()
- finally:
- os.environ["WA_FULL_RESET"] = WA_FULL_RESET
-
-
-@pytest.mark.skip
-def test_prepare_backend_visualwebarena():
- VWA_FULL_RESET = os.environ["VWA_FULL_RESET"]
- try:
- benchmark: Benchmark = DEFAULT_BENCHMARKS["visualwebarena"]()
-
- benchmark.prepare_backends()
-
- del os.environ["VWA_FULL_RESET"]
- with pytest.raises(Exception):
- benchmark.prepare_backends()
-
- os.environ["VWA_FULL_RESET"] = "http://localhost:12345/reset"
- with pytest.raises(Exception):
- benchmark.prepare_backends()
- finally:
- os.environ["VWA_FULL_RESET"] = VWA_FULL_RESET
-
-
-@pytest.mark.skip
-def test_prepare_backend_weblinx():
- BROWSERGYM_WEBLINX_CACHE_DIR = os.environ["BROWSERGYM_WEBLINX_CACHE_DIR"]
- try:
- benchmark: Benchmark = DEFAULT_BENCHMARKS["weblinx"]()
-
- benchmark.prepare_backends()
-
- del os.environ["BROWSERGYM_WEBLINX_CACHE_DIR"]
- with pytest.raises(Exception):
- benchmark.prepare_backends()
-
- finally:
- os.environ["BROWSERGYM_WEBLINX_CACHE_DIR"] = BROWSERGYM_WEBLINX_CACHE_DIR
-
-
-def test_run_mock_benchmark():
- benchmark = Benchmark(
- name="miniwob_click_test",
- high_level_action_set_args=HighLevelActionSetArgs(
- subsets=["bid"],
- multiaction=False,
- strict=False,
- retry_with_force=True,
- demo_mode="off",
- ),
- is_multi_tab=False,
- supports_parallel_seeds=True,
- backends=["miniwob"],
- env_args_list=make_env_args_list_from_fixed_seeds(
- task_list=["miniwob.click-test"],
- max_steps=5,
- fixed_seeds=[0, 1],
- ),
- )
-
- for env_args in benchmark.env_args_list:
- agent_args = MiniwobTestAgentArgs(
- high_level_action_set=benchmark.high_level_action_set_args
- )
- exp_args = ExpArgs(
- agent_args=agent_args,
- env_args=env_args,
- )
-
- with tempfile.TemporaryDirectory() as tmp_dir:
- exp_args.prepare(tmp_dir)
- exp_args.run()
- exp_result = get_exp_result(exp_args.exp_dir)
- exp_record = exp_result.get_exp_record()
-
- target = {
- "env_args.task_name": "miniwob.click-test",
- "env_args.headless": True,
- "env_args.record_video": False,
- "n_steps": 1,
- "cum_reward": 1.0,
- "terminated": True,
- "truncated": False,
- }
-
- assert len(exp_result.steps_info) == 2
-
- for key, target_val in target.items():
- assert key in exp_record
- assert exp_record[key] == target_val
-
-
-def test_dependency_graphs():
- benchmark = Benchmark(
- name="my_bench",
- high_level_action_set_args=HighLevelActionSetArgs(
- subsets=["bid"],
- multiaction=False,
- strict=False,
- retry_with_force=True,
- demo_mode="off",
- ),
- is_multi_tab=False,
- supports_parallel_seeds=True,
- backends=["miniwob"],
- env_args_list=make_env_args_list_from_fixed_seeds(
- task_list=["miniwob.click-test"],
- max_steps=5,
- fixed_seeds=[0, 1],
- ),
- )
-
- # one task, two seeds
- task_dependencies = benchmark.dependency_graph_over_tasks()
- assert task_dependencies == {"miniwob.click-test": []}
-
- env_args_dependencies = benchmark.dependency_graphs_over_env_args()
- assert env_args_dependencies == [{0: [], 1: []}]
-
- # change to no parallel seed support
- benchmark.supports_parallel_seeds = False
- env_args_dependencies = benchmark.dependency_graphs_over_env_args()
- assert env_args_dependencies == [{0: []}, {1: []}]
-
- # webarena, 3 tasks x 1 seed
- benchmark = DEFAULT_BENCHMARKS["webarena"]().subset_from_regexp(
- column="task_name", regexp=r"^webarena\.[012]$"
- )
-
- task_dependencies = benchmark.dependency_graph_over_tasks()
- assert task_dependencies == {
- "webarena.0": [],
- "webarena.1": ["webarena.0"],
- "webarena.2": ["webarena.1"],
- }
-
- env_args_dependencies = benchmark.dependency_graphs_over_env_args()
- assert env_args_dependencies == [{0: [], 1: [0], 2: [1]}]
-
- # workarena L2, 2 task x (2 seeds, 1 seed)
- benchmark = DEFAULT_BENCHMARKS["workarena_l2_agent_curriculum_eval"]().subset_from_regexp(
- column="task_name",
- regexp=r"^workarena\.servicenow\.workload-balancing-small-l2$|^workarena\.servicenow\.easy-expense-management-small-l2$",
- )
-
- task_dependencies = benchmark.dependency_graph_over_tasks()
- assert task_dependencies == {
- "workarena.servicenow.workload-balancing-small-l2": [],
- "workarena.servicenow.easy-expense-management-small-l2": [],
- }
-
- env_args_dependencies = benchmark.dependency_graphs_over_env_args()
- assert env_args_dependencies == [{0: [], 1: [], 2: []}]
-
- # change to no parallel seed support
- benchmark.supports_parallel_seeds = False
- env_args_dependencies = benchmark.dependency_graphs_over_env_args()
- assert env_args_dependencies == [{0: [], 2: []}, {1: []}]
-
- # webarena, 6 dependent tasks x 1 seed
- benchmark = DEFAULT_BENCHMARKS["webarena"]().subset_from_regexp(
- column="task_name",
- regexp=r"^webarena\.533$|^webarena\.537$|^webarena\.552$|^webarena\.410$|^webarena\.561$|^webarena\.562$",
- )
-
- task_dependencies = benchmark.dependency_graph_over_tasks()
- assert {k: set(v) for k, v in task_dependencies.items()} == {
- k: set(v)
- for k, v in {
- "webarena.410": [],
- "webarena.533": [],
- "webarena.537": ["webarena.533"],
- "webarena.552": ["webarena.410", "webarena.537"],
- "webarena.561": ["webarena.552"],
- "webarena.562": ["webarena.552", "webarena.561"],
- }.items()
- }
-
- env_args_dependencies = benchmark.dependency_graphs_over_env_args()
- assert [{k: set(v) for k, v in deps.items()} for deps in env_args_dependencies] == [
- {k: set(v) for k, v in {0: [], 1: [], 2: [1], 3: [0, 2], 4: [3], 5: [3, 4]}.items()}
- ]
diff --git a/BrowserGym/tests/experiments/test_bgym.py b/BrowserGym/tests/experiments/test_bgym.py
deleted file mode 100644
index 193822caa4dd95bc7774b96ab390514c96a30f21..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/experiments/test_bgym.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import bgym
-import pytest
-
-
-def test_classes():
- bgym.EnvArgs(task_name="something")
- bgym.HighLevelActionSet()
- with pytest.raises(TypeError):
- bgym.Agent()
diff --git a/BrowserGym/tests/experiments/test_exp_loop.py b/BrowserGym/tests/experiments/test_exp_loop.py
deleted file mode 100644
index a954f9b7f5e1fedcfd413c4490c762ff23d4aa9a..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/experiments/test_exp_loop.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import re
-import tempfile
-import logging
-import dataclasses
-
-from browsergym.core.action.highlevel import HighLevelActionSet
-from browsergym.experiments.agent import Agent
-from browsergym.experiments.loop import AbstractAgentArgs, EnvArgs, ExpArgs, get_exp_result
-from browsergym.utils.obs import flatten_axtree_to_str
-
-
-class MiniwobTestAgent(Agent):
-
- action_set = HighLevelActionSet(subsets="bid")
-
- def obs_preprocessor(self, obs: dict):
- return {"axtree_txt": flatten_axtree_to_str(obs["axtree_object"])}
-
- def get_action(self, obs: dict) -> tuple[str, dict]:
- match = re.search(r"^\s*\[(\d+)\].*button", obs["axtree_txt"], re.MULTILINE | re.IGNORECASE)
-
- if match:
- bid = match.group(1)
- action = f'click("{bid}")'
- else:
- raise Exception("Can't find the button's bid")
-
- return action, dict(think="I'm clicking the button as requested.")
-
-
-@dataclasses.dataclass
-class MiniwobTestAgentArgs(AbstractAgentArgs):
- def make_agent(self):
- return MiniwobTestAgent()
-
-
-def test_run_exp():
- exp_args = ExpArgs(
- agent_args=MiniwobTestAgentArgs(),
- env_args=EnvArgs(task_name="miniwob.click-test", task_seed=42),
- )
-
- with tempfile.TemporaryDirectory() as tmp_dir:
- exp_args.prepare(tmp_dir)
- exp_args.run()
- exp_result = get_exp_result(exp_args.exp_dir)
- exp_record = exp_result.get_exp_record()
-
- target = {
- "env_args.task_name": "miniwob.click-test",
- "env_args.task_seed": 42,
- "env_args.headless": True,
- "env_args.record_video": False,
- "n_steps": 1,
- "cum_reward": 1.0,
- "terminated": True,
- "truncated": False,
- }
-
- assert len(exp_result.steps_info) == 2
-
- for key, target_val in target.items():
- assert key in exp_record
- assert exp_record[key] == target_val
-
- # TODO investigate why it's taking almost 5 seconds to solve
- assert exp_record["stats.cum_step_elapsed"] < 5
- if exp_record["stats.cum_step_elapsed"] > 3:
- t = exp_record["stats.cum_step_elapsed"]
- logging.warning(
- f"miniwob.click-test is taking {t:.2f}s (> 3s) to solve with an oracle."
- )
diff --git a/BrowserGym/tests/miniwob/__init__.py b/BrowserGym/tests/miniwob/__init__.py
deleted file mode 100644
index 75f09d6fbde51609da41e1041eb3fb8125d808cb..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/miniwob/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# bugfix: use same playwright instance in browsergym and pytest
-from ..utils import setup_playwright
diff --git a/BrowserGym/tests/miniwob/test_base.py b/BrowserGym/tests/miniwob/test_base.py
deleted file mode 100644
index fe0fdf330f7e08c148206ecbed9ad8692135ea2f..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/miniwob/test_base.py
+++ /dev/null
@@ -1,196 +0,0 @@
-import os
-import pytest
-import time
-import gymnasium as gym
-
-# register gym environments
-import browsergym.miniwob
-
-from browsergym.miniwob.all import (
- ClickButtonTask,
- ClickOptionTask,
- DrawLineTask,
- LoginUserTask,
-)
-
-__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
-__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
-
-TASKS = [ClickButtonTask, ClickOptionTask, DrawLineTask, LoginUserTask]
-
-
-@pytest.mark.parametrize("task_cls", TASKS)
-def test_validate_teardown(task_cls):
- pw = browsergym.core._get_global_playwright()
-
- browser = pw.chromium.launch(headless=__HEADLESS, slow_mo=__SLOW_MO)
- context = browser.new_context()
- page = context.new_page()
-
- task = task_cls(seed=42)
- task.setup(page=page)
-
- reward, done, msg, info = task.validate(page, [])
-
- assert done is False
-
- task.teardown()
-
- context.close()
- browser.close()
-
-
-@pytest.mark.parametrize("task_cls", TASKS)
-def test_episode_max_time(task_cls):
- pw = browsergym.core._get_global_playwright()
-
- browser = pw.chromium.launch(headless=__HEADLESS, slow_mo=__SLOW_MO)
- context = browser.new_context()
- page = context.new_page()
-
- task = task_cls(seed=42, episode_max_time=0.2)
- task.setup(page=page)
-
- time.sleep(0.5)
-
- reward, done, msg, info = task.validate(page, [])
-
- assert done is True
- assert reward == 0
-
- task.teardown()
-
- context.close()
- browser.close()
-
-
-@pytest.mark.parametrize("task_cls", TASKS)
-def test_remove_human_display(task_cls):
- pw = browsergym.core._get_global_playwright()
-
- browser = pw.chromium.launch(headless=__HEADLESS, slow_mo=__SLOW_MO)
-
- # remove display
-
- context = browser.new_context()
- page = context.new_page()
-
- task = task_cls(seed=42, remove_human_display=True)
- task.setup(page=page)
-
- for element_id in ["reward-display", "click-canvas", "sync-task-cover"]:
- element_in_dom = page.evaluate(f"!!document.getElementById('{element_id}')")
- assert not element_in_dom
-
- assert page.evaluate(f"document.getElementById('query').innerHTML") == ""
-
- for element_id in ["wrap", "area"]:
- element_in_dom = page.evaluate(f"!!document.getElementById('{element_id}')")
- assert element_in_dom
-
- task.teardown()
-
- context.close()
-
- # keep display
-
- context = browser.new_context()
- page = context.new_page()
-
- task = task_cls(seed=42, remove_human_display=False)
- task.setup(page=page)
-
- for element_id in ["reward-display", "click-canvas", "sync-task-cover"]:
- element_in_dom = page.evaluate(f"!!document.getElementById('{element_id}')")
- assert element_in_dom
-
- assert page.evaluate(f"document.getElementById('query').innerHTML") != ""
-
- for element_id in ["wrap", "area"]:
- element_in_dom = page.evaluate(f"!!document.getElementById('{element_id}')")
- assert element_in_dom
-
- task.teardown()
-
- context.close()
- browser.close()
-
-
-@pytest.mark.skip(reason="TODO: how to get the final viewport size right?")
-@pytest.mark.parametrize("task_cls", TASKS)
-def test_viewport(task_cls):
- env = gym.make(
- f"browsergym/{task_cls.get_task_id()}",
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- )
- obs, info = env.reset(seed=42)
-
- screenshot = obs["screenshot"]
-
- # 3D array (height, width, rgb) of unsigned bytes (between 0 and 255)
- # Miniwob viewport should be (320x500)
- assert screenshot.shape[0] == 320
- assert screenshot.shape[1] == 500
- assert screenshot.shape[2] == 3 # RGB
-
- env.close()
-
-
-@pytest.mark.parametrize("task_cls", TASKS)
-def test_forbidden_navigation(task_cls):
- pw = browsergym.core._get_global_playwright()
-
- browser = pw.chromium.launch(headless=__HEADLESS, slow_mo=__SLOW_MO)
- context = browser.new_context()
- page = context.new_page()
-
- task = task_cls(seed=42)
- task.setup(page=page)
-
- reward, done, msg, info = task.validate(page, [])
-
- assert reward == 0.0 and done == False
-
- page.goto("http://www.google.com")
-
- reward, done, msg, info = task.validate(page, [])
-
- assert reward == 0.0 and done == True
-
- task.teardown()
-
- context.close()
- browser.close()
-
-
-@pytest.mark.parametrize("task_cls", TASKS)
-def test_forbidden_navigation_2(task_cls):
- pw = browsergym.core._get_global_playwright()
-
- browser = pw.chromium.launch(headless=__HEADLESS, slow_mo=__SLOW_MO)
- context = browser.new_context()
- page = context.new_page()
-
- task = task_cls(seed=42)
- task.setup(page=page)
-
- reward, done, msg, info = task.validate(page, [])
-
- assert reward == 0.0 and done == False
-
- page2 = context.new_page()
- page2.goto("http://www.google.com")
-
- reward, done, msg, info = task.validate(page, [])
-
- assert reward == 0.0 and done == False
-
- reward, done, msg, info = task.validate(page2, [])
-
- assert reward == 0.0 and done == True
-
- task.teardown()
-
- context.close()
- browser.close()
diff --git a/BrowserGym/tests/miniwob/test_click-menu-2.py b/BrowserGym/tests/miniwob/test_click-menu-2.py
deleted file mode 100644
index 8296da1cecff67321892a9b94aca1ba58febbd12..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/miniwob/test_click-menu-2.py
+++ /dev/null
@@ -1,81 +0,0 @@
-import os
-import gymnasium as gym
-import re
-import pytest
-
-# register gym environments
-import browsergym.miniwob
-
-__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
-__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
-
-
-@pytest.mark.parametrize("seed", range(5))
-def test_cheat(seed):
- env = gym.make(
- "browsergym/miniwob.click-menu-2",
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- action_mapping=None,
- )
- obs, info = env.reset(seed=seed)
-
- assert obs["last_action_error"] == ""
-
- match1 = re.match(
- 'Click the "Menu" button, and then find and click on the item labeled "(.+)".', obs["goal"]
- )
- match2 = re.match(
- 'Click the "Menu" button, and then find and click on the item with the "(.+)" icon.',
- obs["goal"],
- )
-
- assert match1 or match2
-
- if match1:
- item_label = match1.groups()[0]
- item_classname = {
- "Save": "ui-icon-disk",
- "Prev": "ui-icon-seek-start",
- "Stop": "ui-icon-stop",
- "Play": "ui-icon-play",
- "Next": "ui-icon-seek-end",
- "Zoom In": "ui-icon-zoomin",
- "Zoom Out": "ui-icon-zoomout",
- }[item_label]
- else:
- item_classname = match2.groups()[0]
-
- action = f"""\
-page.get_by_text("Menu").click()
-"""
-
- obs, reward, term, trunc, info = env.step(action)
-
- assert obs["last_action_error"] == ""
- assert reward == 0
- assert term == False
-
- if item_classname in ("ui-icon-seek-start", "ui-icon-stop", "ui-icon-play", "ui-icon-seek-end"):
-
- action = f"""\
-page.get_by_text("Playback").click()
-"""
-
- obs, reward, term, trunc, info = env.step(action)
-
- assert obs["last_action_error"] == ""
- assert reward == 0
- assert term == False
-
- action = f"""\
-page.locator(".{item_classname}").click()
-"""
-
- obs, reward, term, trunc, info = env.step(action)
-
- assert obs["last_action_error"] == ""
- assert reward == 1
- assert term == True
-
- env.close()
diff --git a/BrowserGym/tests/miniwob/test_click-scroll-list.py b/BrowserGym/tests/miniwob/test_click-scroll-list.py
deleted file mode 100644
index 8f16cd7c2fd14fcf364abc5025d10067689ec8ee..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/miniwob/test_click-scroll-list.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import os
-import gymnasium as gym
-import re
-import pytest
-
-# register gym environments
-import browsergym.miniwob
-
-__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
-__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
-
-
-@pytest.mark.parametrize("seed", range(5))
-def test_cheat(seed):
- env = gym.make(
- "browsergym/miniwob.click-scroll-list",
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- action_mapping=None,
- )
- obs, info = env.reset(seed=seed)
-
- assert obs["last_action_error"] == ""
-
- match = re.match("Select (.+) from the scroll list and click Submit.", obs["goal"])
-
- assert match
-
- options = match.groups()[0].split(", ")
- options = '", "'.join(options)
- action = f"""\
-page.locator("#options").select_option(["{options}"])
-page.get_by_role("button", name="Submit").click()
-"""
-
- obs, reward, term, trunc, info = env.step(action)
-
- assert obs["last_action_error"] == ""
- assert reward == 1
- assert term == True
-
- env.close()
diff --git a/BrowserGym/tests/miniwob/test_use-colorwheel-2.py b/BrowserGym/tests/miniwob/test_use-colorwheel-2.py
deleted file mode 100644
index 45d660d431bc97aa152f658f63306a8e14f611b7..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/miniwob/test_use-colorwheel-2.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import os
-import gymnasium as gym
-import re
-import pytest
-
-# register gym environments
-import browsergym.miniwob
-
-__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
-__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
-
-
-@pytest.mark.parametrize("seed", range(5))
-def test_cheat(seed):
- env = gym.make(
- "browsergym/miniwob.use-colorwheel-2",
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- action_mapping=None,
- )
- obs, info = env.reset(seed=42)
-
- assert obs["last_action_error"] == ""
-
- match = re.match(
- "Select the following color #(.+) with the color picker and hit Submit.", obs["goal"]
- )
-
- assert match
-
- color = match.groups()[0].upper()
-
- obs, reward, term, trunc, info = env.step(
- f"""\
-page.locator("#col").fill("{color}")
-page.get_by_role("button", name="Submit").click()
-"""
- )
-
- assert obs["last_action_error"] == ""
- assert reward == 1
- assert term == True
-
- env.close()
diff --git a/BrowserGym/tests/utils.py b/BrowserGym/tests/utils.py
deleted file mode 100644
index 48595751f2a9aa8070bfebe6ae57142ff3d0d653..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/utils.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import browsergym.core
-import logging
-import playwright.sync_api
-import pytest
-
-
-# setup code, executed ahead of first test
-@pytest.fixture(scope="session", autouse=True)
-def setup_playwright(playwright: playwright.sync_api.Playwright):
- # bugfix: re-use pytest-playwright's playwright instance in browsergym
- # https://github.com/microsoft/playwright-python/issues/2053
- browsergym.core._set_global_playwright(playwright)
- logging.info("Browsergym is using the playwright instance provided by pytest-playwright.")
diff --git a/BrowserGym/tests/visualwebarena/__init__.py b/BrowserGym/tests/visualwebarena/__init__.py
deleted file mode 100644
index 75f09d6fbde51609da41e1041eb3fb8125d808cb..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/visualwebarena/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# bugfix: use same playwright instance in browsergym and pytest
-from ..utils import setup_playwright
diff --git a/BrowserGym/tests/visualwebarena/test_vwa_domains.py b/BrowserGym/tests/visualwebarena/test_vwa_domains.py
deleted file mode 100644
index 80d4a4256e8bb523dc447904fd8561b748d75b0e..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/visualwebarena/test_vwa_domains.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import pytest
-import playwright.sync_api
-
-from browsergym.visualwebarena.instance import VisualWebArenaInstance
-
-
-def test_is_reachable():
- # default URLs
- instance = VisualWebArenaInstance()
- instance.check_status()
-
- # unreacheable URL
- with pytest.raises(RuntimeError):
- instance = VisualWebArenaInstance()
- instance.urls["reddit"] = "https://invalid.url"
- instance.check_status()
-
-
-@pytest.mark.parametrize("site", ["reddit", "shopping", "wikipedia", "classifieds"])
-def test_credentials(page: playwright.sync_api.Page, site: str):
- # default URLs and credentials
- instance = VisualWebArenaInstance()
- instance.ui_login(site=site, page=page)
-
- # TODO: test this more thoroughly
diff --git a/BrowserGym/tests/visualwebarena/test_vwa_tasks_with_reset.py b/BrowserGym/tests/visualwebarena/test_vwa_tasks_with_reset.py
deleted file mode 100644
index e586d2a777934063c196903e30c7beeb503cb6fb..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/visualwebarena/test_vwa_tasks_with_reset.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import logging
-import os
-import random
-
-import gymnasium as gym
-import playwright.sync_api
-import pytest
-from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
-
-# register gym environments
-import browsergym.visualwebarena
-
-__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
-__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
-
-
-from browsergym.visualwebarena import VISUALWEBARENA_TASK_IDS_WITH_RESET
-
-rng = random.Random(1)
-task_ids = rng.sample(VISUALWEBARENA_TASK_IDS_WITH_RESET, 10)
-
-
-@retry(
- stop=stop_after_attempt(5),
- retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
- wait=wait_fixed(2),
- reraise=True,
- before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
-)
-@pytest.mark.parametrize("task_id", task_ids)
-@pytest.mark.slow
-@pytest.mark.serial
-def test_env_generic(task_id):
- env = gym.make(
- f"browsergym/{task_id}",
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- )
- obs, info = env.reset()
- env.close()
diff --git a/BrowserGym/tests/visualwebarena/test_vwa_tasks_without_reset.py b/BrowserGym/tests/visualwebarena/test_vwa_tasks_without_reset.py
deleted file mode 100644
index b3fad322381d8ceed88dae3ae3449d6da02fb197..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/visualwebarena/test_vwa_tasks_without_reset.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import logging
-import os
-import random
-
-import gymnasium as gym
-import playwright.sync_api
-import pytest
-from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
-
-# register gym environments
-import browsergym.visualwebarena
-
-__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
-__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
-
-
-from browsergym.visualwebarena import VISUALWEBARENA_TASK_IDS_WITHOUT_RESET
-
-rng = random.Random(1)
-task_ids = rng.sample(VISUALWEBARENA_TASK_IDS_WITHOUT_RESET, 25)
-
-
-@retry(
- stop=stop_after_attempt(5),
- retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
- wait=wait_fixed(2),
- reraise=True,
- before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
-)
-@pytest.mark.parametrize("task_id", task_ids)
-@pytest.mark.slow
-def test_env_generic(task_id):
- env = gym.make(
- f"browsergym/{task_id}",
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- )
- obs, info = env.reset()
- env.close()
-
-
-@retry(
- stop=stop_after_attempt(5),
- retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
- wait=wait_fixed(2),
- reraise=True,
- before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
-)
-def test_domain_safeguard():
- env = gym.make(
- f"browsergym/visualwebarena.398",
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- )
- obs, info = env.reset()
- assert not obs["last_action_error"]
-
- obs, reward, terminated, truncated, info = env.step("new_tab()")
- assert not obs["last_action_error"]
- assert not (terminated or truncated)
-
- obs, reward, terminated, truncated, info = env.step("tab_close()")
- assert not obs["last_action_error"]
- assert not (terminated or truncated)
-
- obs, reward, terminated, truncated, info = env.step("tab_focus(0)")
- assert not obs["last_action_error"]
- assert not (terminated or truncated)
-
- obs, reward, terminated, truncated, info = env.step('goto("http://www.google.com")')
- assert not obs["last_action_error"]
- assert terminated
-
- env.close()
diff --git a/BrowserGym/tests/webarena/__init__.py b/BrowserGym/tests/webarena/__init__.py
deleted file mode 100644
index 75f09d6fbde51609da41e1041eb3fb8125d808cb..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/webarena/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# bugfix: use same playwright instance in browsergym and pytest
-from ..utils import setup_playwright
diff --git a/BrowserGym/tests/webarena/test_env_general.py b/BrowserGym/tests/webarena/test_env_general.py
deleted file mode 100644
index d4a81b23a7a34d376ed8048e71b31606d91f589d..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/webarena/test_env_general.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import gymnasium as gym
-import logging
-import os
-import playwright.sync_api
-import pytest
-import random
-
-from tenacity import retry, stop_after_attempt, retry_if_exception_type
-
-# register gym environments
-import browsergym.webarena
-
-
-__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
-__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
-
-
-from browsergym.webarena import ALL_WEBARENA_TASK_IDS
-
-rng = random.Random(1)
-task_ids = rng.sample(ALL_WEBARENA_TASK_IDS, 25)
-
-
-@retry(
- stop=stop_after_attempt(5),
- retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
- reraise=True,
- before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
-)
-@pytest.mark.parametrize("task_id", task_ids)
-@pytest.mark.slow
-def test_env_generic(task_id):
- env = gym.make(
- f"browsergym/{task_id}",
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- )
- obs, info = env.reset()
-
- env.close()
diff --git a/BrowserGym/tests/webarena/test_infeasible.py b/BrowserGym/tests/webarena/test_infeasible.py
deleted file mode 100644
index 044b5c404558739529e159d9dd5c357156c90ec8..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/webarena/test_infeasible.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import gymnasium as gym
-import logging
-import os
-import playwright.sync_api
-import pytest
-
-from tenacity import retry, stop_after_attempt, retry_if_exception_type
-
-# register gym environments
-import browsergym.webarena
-
-
-__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
-__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
-
-INFEAS_TASK_IDS = [101, 115, 166]
-FEAS_TASK_IDS = [165, 187, 199]
-
-
-@retry(
- stop=stop_after_attempt(5),
- retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
- reraise=True,
- before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
-)
-@pytest.mark.parametrize(
- "task_id,infeasible",
- [(task_id, True) for task_id in INFEAS_TASK_IDS]
- + [(task_id, False) for task_id in FEAS_TASK_IDS],
-)
-@pytest.mark.slow
-def test_infeasible(task_id, infeasible):
- env = gym.make(
- f"browsergym/webarena.{task_id}",
- headless=__HEADLESS,
- slow_mo=__SLOW_MO,
- )
- obs, info = env.reset()
-
- action = 'report_infeasible("Unachievable task.")'
-
- obs, reward, term, trunc, info = env.step(action)
-
- if infeasible:
- assert term == True and reward == 1.0
-
- else:
- assert term == True and reward == 0.0
-
- env.close()
diff --git a/BrowserGym/tests/webarena/test_instance.py b/BrowserGym/tests/webarena/test_instance.py
deleted file mode 100644
index a538a53f97c7372f72a99445b62843ce30d0c9e7..0000000000000000000000000000000000000000
--- a/BrowserGym/tests/webarena/test_instance.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import pytest
-import playwright.sync_api
-
-from browsergym.webarena.instance import WebArenaInstance
-
-
-def test_is_reachable():
- # default URLs
- instance = WebArenaInstance()
- instance.check_status()
-
- # unreacheable URL
- with pytest.raises(RuntimeError):
- instance = WebArenaInstance()
- instance.urls["reddit"] = "https://invalid.url"
- instance.check_status()
-
-
-@pytest.mark.parametrize(
- "site", ["reddit", "shopping", "shopping_admin", "gitlab", "wikipedia", "map"]
-)
-def test_credentials(page: playwright.sync_api.Page, site: str):
- # default URLs and credentials
- instance = WebArenaInstance()
- instance.ui_login(site=site, page=page)
-
- # TODO: test this more thoroughly