tests: pytest + playwright headless smoke harness
Browse filesFirst Space-side test harness; sets up the floor for the Phase D
bundle's per-commit unit + UI tests (split leaderboard, pending-row
defaults, OAuth-required submit) without slipping a regression past
a build-only check.
requirements-dev.txt pulls -r requirements.txt then adds pytest +
playwright. One `pip install -r requirements-dev.txt && playwright
install chromium` brings a fresh venv up to speed.
tests/conftest.py exposes a session-scoped `app_url` fixture: boots
app.py in a subprocess on a free port, polls until the HTTP server
answers, yields the URL, tears the process down on teardown. Logs
land in a tmp file so a startup failure surfaces the real Gradio
traceback rather than a bare RuntimeError. Boot-time stuck-pending
sweep in submit.py is disabled via CADGENBENCH_DISABLE_BOOT_SWEEP=1
so the fixture is fast and offline-tolerant.
tests/test_smoke.py launches headless Chromium via playwright's sync
API, navigates to the fixture's URL, and asserts the Leaderboard /
Submit / About tabs all render. If the Space won't load these three
tabs every downstream test is meaningless; this is the gate.
None of these files ship into the production image (the Dockerfile
only COPYs *.py from the repo root plus results.jsonl), so the push
rebuilds an identical image and verifying RUNNING is a no-op safety
check.
- requirements-dev.txt +16 -0
- tests/conftest.py +90 -0
- tests/test_smoke.py +28 -0
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dev-only deps for the Space test harness. Use:
|
| 2 |
+
#
|
| 3 |
+
# pip install -r requirements-dev.txt
|
| 4 |
+
# playwright install chromium
|
| 5 |
+
#
|
| 6 |
+
# Runtime deps (gradio, gradio_leaderboard, pandas, ...) are pulled in
|
| 7 |
+
# via the -r line below so a fresh venv only needs the one install
|
| 8 |
+
# command above. cadgenbench itself is installed separately (either
|
| 9 |
+
# editable from the sibling cadgenbench/ checkout, or pip-installed
|
| 10 |
+
# from github.com/huggingface/cadgenbench like the Dockerfile does);
|
| 11 |
+
# the unit tests in later commits will mock the cadgenbench surfaces
|
| 12 |
+
# they actually need.
|
| 13 |
+
-r requirements.txt
|
| 14 |
+
|
| 15 |
+
pytest>=8.0
|
| 16 |
+
playwright>=1.40
|
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Shared pytest fixtures for the Space test suite.
|
| 2 |
+
|
| 3 |
+
Two responsibilities:
|
| 4 |
+
|
| 5 |
+
1. Put the Space root on ``sys.path`` so unit tests in later commits
|
| 6 |
+
can ``import leaderboard`` / ``import submit`` directly without a
|
| 7 |
+
package layout shim.
|
| 8 |
+
2. Expose an ``app_url`` session fixture that boots ``app.py`` in a
|
| 9 |
+
subprocess on a free port, polls until the HTTP server answers,
|
| 10 |
+
yields the URL, and terminates the process on teardown. The
|
| 11 |
+
subprocess's stdout + stderr are captured to a tmp log file so a
|
| 12 |
+
readiness-timeout or early exit surfaces the actual Gradio log.
|
| 13 |
+
"""
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
import os
|
| 17 |
+
import socket
|
| 18 |
+
import subprocess
|
| 19 |
+
import sys
|
| 20 |
+
import time
|
| 21 |
+
from pathlib import Path
|
| 22 |
+
|
| 23 |
+
import pytest
|
| 24 |
+
import requests
|
| 25 |
+
|
| 26 |
+
SPACE_ROOT = Path(__file__).resolve().parent.parent
|
| 27 |
+
sys.path.insert(0, str(SPACE_ROOT))
|
| 28 |
+
|
| 29 |
+
APP_BOOT_TIMEOUT_SECONDS = 90
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _pick_free_port() -> int:
|
| 33 |
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
| 34 |
+
s.bind(("127.0.0.1", 0))
|
| 35 |
+
return s.getsockname()[1]
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def _wait_for_ready(proc: subprocess.Popen, url: str, log_path: Path) -> None:
|
| 39 |
+
deadline = time.time() + APP_BOOT_TIMEOUT_SECONDS
|
| 40 |
+
while time.time() < deadline:
|
| 41 |
+
if proc.poll() is not None:
|
| 42 |
+
raise RuntimeError(
|
| 43 |
+
f"app.py exited with code {proc.returncode} before HTTP "
|
| 44 |
+
f"was ready. Log:\n{log_path.read_text()}"
|
| 45 |
+
)
|
| 46 |
+
try:
|
| 47 |
+
if requests.get(url, timeout=2).status_code == 200:
|
| 48 |
+
return
|
| 49 |
+
except requests.RequestException:
|
| 50 |
+
pass
|
| 51 |
+
time.sleep(0.5)
|
| 52 |
+
raise RuntimeError(
|
| 53 |
+
f"app.py did not respond on {url} within "
|
| 54 |
+
f"{APP_BOOT_TIMEOUT_SECONDS}s. Log:\n{log_path.read_text()}"
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
@pytest.fixture(scope="session")
|
| 59 |
+
def app_url(tmp_path_factory):
|
| 60 |
+
port = _pick_free_port()
|
| 61 |
+
log_path = tmp_path_factory.mktemp("space-smoke") / "app.log"
|
| 62 |
+
env = {
|
| 63 |
+
**os.environ,
|
| 64 |
+
"GRADIO_SERVER_NAME": "127.0.0.1",
|
| 65 |
+
"GRADIO_SERVER_PORT": str(port),
|
| 66 |
+
# submit.py's boot-time stuck-pending sweep hits the Hub on
|
| 67 |
+
# import. Off in tests so a Hub blip doesn't slow the fixture
|
| 68 |
+
# or pollute the log; the sweep is exercised separately in
|
| 69 |
+
# submit-specific tests.
|
| 70 |
+
"CADGENBENCH_DISABLE_BOOT_SWEEP": "1",
|
| 71 |
+
}
|
| 72 |
+
log_file = log_path.open("w", buffering=1)
|
| 73 |
+
proc = subprocess.Popen(
|
| 74 |
+
[sys.executable, "app.py"],
|
| 75 |
+
cwd=str(SPACE_ROOT),
|
| 76 |
+
env=env,
|
| 77 |
+
stdout=log_file,
|
| 78 |
+
stderr=subprocess.STDOUT,
|
| 79 |
+
)
|
| 80 |
+
try:
|
| 81 |
+
_wait_for_ready(proc, f"http://127.0.0.1:{port}", log_path)
|
| 82 |
+
yield f"http://127.0.0.1:{port}"
|
| 83 |
+
finally:
|
| 84 |
+
proc.terminate()
|
| 85 |
+
try:
|
| 86 |
+
proc.wait(timeout=10)
|
| 87 |
+
except subprocess.TimeoutExpired:
|
| 88 |
+
proc.kill()
|
| 89 |
+
proc.wait(timeout=5)
|
| 90 |
+
log_file.close()
|
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Playwright headless smoke test.
|
| 2 |
+
|
| 3 |
+
Boots ``app.py`` in a subprocess (via the ``app_url`` fixture in
|
| 4 |
+
:mod:`conftest`) and asserts the three Gradio tabs render. Acts as
|
| 5 |
+
the Phase D minimum: if the Space won't load any tab, every other
|
| 6 |
+
test downstream is meaningless.
|
| 7 |
+
|
| 8 |
+
Requires:
|
| 9 |
+
- ``pip install -r requirements-dev.txt``
|
| 10 |
+
- ``playwright install chromium`` (one-off, fetches the bundled browser)
|
| 11 |
+
"""
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
from playwright.sync_api import expect, sync_playwright
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def test_three_tabs_render(app_url):
|
| 18 |
+
with sync_playwright() as p:
|
| 19 |
+
browser = p.chromium.launch(headless=True)
|
| 20 |
+
try:
|
| 21 |
+
page = browser.new_page()
|
| 22 |
+
page.goto(app_url)
|
| 23 |
+
for tab_name in ("Leaderboard", "Submit", "About"):
|
| 24 |
+
expect(page.get_by_role("tab", name=tab_name)).to_be_visible(
|
| 25 |
+
timeout=15_000
|
| 26 |
+
)
|
| 27 |
+
finally:
|
| 28 |
+
browser.close()
|