Spaces:
Running
Running
| """Integration test: build + run Docker image, test via client. | |
| Usage: | |
| uv run python tests/test_docker.py # build + run + test | |
| uv run python tests/test_docker.py --skip-build # reuse existing image | |
| uv run python tests/test_docker.py --image my:tag # custom image name | |
| """ | |
| import argparse | |
| import subprocess | |
| import sys | |
| import time | |
| from pathlib import Path | |
| sys.path.insert(0, str(Path(__file__).resolve().parents[1])) | |
| IMAGE = "explainer-env:latest" | |
| CONTAINER = "explainer-env-test" | |
| def wait_for_server(url: str, timeout: int = 30): | |
| import urllib.request | |
| deadline = time.time() + timeout | |
| while time.time() < deadline: | |
| try: | |
| urllib.request.urlopen(f"{url}/health", timeout=2) | |
| return True | |
| except Exception: | |
| time.sleep(1) | |
| return False | |
| def docker_build(image: str): | |
| env_dir = Path(__file__).resolve().parents[1] | |
| print(f" building {image} from {env_dir}...") | |
| result = subprocess.run( | |
| ["docker", "build", "-t", image, "-f", "server/Dockerfile", "."], | |
| cwd=str(env_dir), | |
| capture_output=True, | |
| text=True, | |
| ) | |
| if result.returncode != 0: | |
| print(f"FAIL: docker build\n{result.stderr[-1000:]}", file=sys.stderr) | |
| sys.exit(1) | |
| print(" build OK") | |
| def docker_run(image: str, container: str): | |
| # clean up stale container | |
| subprocess.run(["docker", "rm", "-f", container], capture_output=True) | |
| result = subprocess.run( | |
| ["docker", "run", "-d", "--name", container, "-p", "8000:8000", image], | |
| capture_output=True, | |
| text=True, | |
| ) | |
| if result.returncode != 0: | |
| print(f"FAIL: docker run\n{result.stderr}", file=sys.stderr) | |
| sys.exit(1) | |
| print(f" container {container} started") | |
| def docker_cleanup(container: str): | |
| subprocess.run(["docker", "rm", "-f", container], capture_output=True) | |
| print(f" container {container} removed") | |
| def run_tests(base_url: str): | |
| from client import ExplainerEnv | |
| from models import ExplainerAction | |
| client = ExplainerEnv(base_url=base_url) | |
| with client.sync() as sc: | |
| result = sc.reset() | |
| assert result.observation.topic, "reset should return topic" | |
| print(f" reset: topic={result.observation.topic!r}") | |
| action = ExplainerAction( | |
| action_type="generate", | |
| format="marimo", | |
| code="import marimo as mo\napp = mo.App()\n@app.cell\ndef _():\n return\n", | |
| ) | |
| result = sc.step(action) | |
| if not result.done: | |
| result = sc.step(ExplainerAction( | |
| action_type="repair", | |
| format="marimo", | |
| code="import marimo as mo\napp = mo.App()\n@app.cell\ndef _():\n return\n", | |
| )) | |
| assert isinstance(result.reward, (int, float)) | |
| print(f" step: reward={result.reward:.3f}, done={result.done}") | |
| print("PASS: test_docker (2/2)") | |
| def main(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--skip-build", action="store_true") | |
| parser.add_argument("--image", default=IMAGE) | |
| args = parser.parse_args() | |
| if not args.skip_build: | |
| docker_build(args.image) | |
| docker_run(args.image, CONTAINER) | |
| try: | |
| url = "http://localhost:8000" | |
| if not wait_for_server(url): | |
| logs = subprocess.run( | |
| ["docker", "logs", CONTAINER], capture_output=True, text=True | |
| ) | |
| print(f"FAIL: container didn't start\n{logs.stdout}\n{logs.stderr}", file=sys.stderr) | |
| sys.exit(1) | |
| run_tests(url) | |
| finally: | |
| docker_cleanup(CONTAINER) | |
| if __name__ == "__main__": | |
| main() | |