"""Integration test: build + run Docker image, test via client. Usage: uv run python tests/test_docker.py # build + run + test uv run python tests/test_docker.py --skip-build # reuse existing image uv run python tests/test_docker.py --image my:tag # custom image name """ import argparse import subprocess import sys import time from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parents[1])) IMAGE = "explainer-env:latest" CONTAINER = "explainer-env-test" def wait_for_server(url: str, timeout: int = 30): import urllib.request deadline = time.time() + timeout while time.time() < deadline: try: urllib.request.urlopen(f"{url}/health", timeout=2) return True except Exception: time.sleep(1) return False def docker_build(image: str): env_dir = Path(__file__).resolve().parents[1] print(f" building {image} from {env_dir}...") result = subprocess.run( ["docker", "build", "-t", image, "-f", "server/Dockerfile", "."], cwd=str(env_dir), capture_output=True, text=True, ) if result.returncode != 0: print(f"FAIL: docker build\n{result.stderr[-1000:]}", file=sys.stderr) sys.exit(1) print(" build OK") def docker_run(image: str, container: str): # clean up stale container subprocess.run(["docker", "rm", "-f", container], capture_output=True) result = subprocess.run( ["docker", "run", "-d", "--name", container, "-p", "8000:8000", image], capture_output=True, text=True, ) if result.returncode != 0: print(f"FAIL: docker run\n{result.stderr}", file=sys.stderr) sys.exit(1) print(f" container {container} started") def docker_cleanup(container: str): subprocess.run(["docker", "rm", "-f", container], capture_output=True) print(f" container {container} removed") def run_tests(base_url: str): from client import ExplainerEnv from models import ExplainerAction client = ExplainerEnv(base_url=base_url) with client.sync() as sc: result = sc.reset() assert result.observation.topic, "reset should return topic" print(f" reset: topic={result.observation.topic!r}") action = ExplainerAction( action_type="generate", format="marimo", code="import marimo as mo\napp = mo.App()\n@app.cell\ndef _():\n return\n", ) result = sc.step(action) if not result.done: result = sc.step(ExplainerAction( action_type="repair", format="marimo", code="import marimo as mo\napp = mo.App()\n@app.cell\ndef _():\n return\n", )) assert isinstance(result.reward, (int, float)) print(f" step: reward={result.reward:.3f}, done={result.done}") print("PASS: test_docker (2/2)") def main(): parser = argparse.ArgumentParser() parser.add_argument("--skip-build", action="store_true") parser.add_argument("--image", default=IMAGE) args = parser.parse_args() if not args.skip_build: docker_build(args.image) docker_run(args.image, CONTAINER) try: url = "http://localhost:8000" if not wait_for_server(url): logs = subprocess.run( ["docker", "logs", CONTAINER], capture_output=True, text=True ) print(f"FAIL: container didn't start\n{logs.stdout}\n{logs.stderr}", file=sys.stderr) sys.exit(1) run_tests(url) finally: docker_cleanup(CONTAINER) if __name__ == "__main__": main()