explainer-env / tests /test_docker.py
kgdrathan's picture
Upload folder using huggingface_hub
43f41de verified
"""Integration test: build + run Docker image, test via client.
Usage:
uv run python tests/test_docker.py # build + run + test
uv run python tests/test_docker.py --skip-build # reuse existing image
uv run python tests/test_docker.py --image my:tag # custom image name
"""
import argparse
import subprocess
import sys
import time
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
IMAGE = "explainer-env:latest"
CONTAINER = "explainer-env-test"
def wait_for_server(url: str, timeout: int = 30):
import urllib.request
deadline = time.time() + timeout
while time.time() < deadline:
try:
urllib.request.urlopen(f"{url}/health", timeout=2)
return True
except Exception:
time.sleep(1)
return False
def docker_build(image: str):
env_dir = Path(__file__).resolve().parents[1]
print(f" building {image} from {env_dir}...")
result = subprocess.run(
["docker", "build", "-t", image, "-f", "server/Dockerfile", "."],
cwd=str(env_dir),
capture_output=True,
text=True,
)
if result.returncode != 0:
print(f"FAIL: docker build\n{result.stderr[-1000:]}", file=sys.stderr)
sys.exit(1)
print(" build OK")
def docker_run(image: str, container: str):
# clean up stale container
subprocess.run(["docker", "rm", "-f", container], capture_output=True)
result = subprocess.run(
["docker", "run", "-d", "--name", container, "-p", "8000:8000", image],
capture_output=True,
text=True,
)
if result.returncode != 0:
print(f"FAIL: docker run\n{result.stderr}", file=sys.stderr)
sys.exit(1)
print(f" container {container} started")
def docker_cleanup(container: str):
subprocess.run(["docker", "rm", "-f", container], capture_output=True)
print(f" container {container} removed")
def run_tests(base_url: str):
from client import ExplainerEnv
from models import ExplainerAction
client = ExplainerEnv(base_url=base_url)
with client.sync() as sc:
result = sc.reset()
assert result.observation.topic, "reset should return topic"
print(f" reset: topic={result.observation.topic!r}")
action = ExplainerAction(
action_type="generate",
format="marimo",
code="import marimo as mo\napp = mo.App()\n@app.cell\ndef _():\n return\n",
)
result = sc.step(action)
if not result.done:
result = sc.step(ExplainerAction(
action_type="repair",
format="marimo",
code="import marimo as mo\napp = mo.App()\n@app.cell\ndef _():\n return\n",
))
assert isinstance(result.reward, (int, float))
print(f" step: reward={result.reward:.3f}, done={result.done}")
print("PASS: test_docker (2/2)")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--skip-build", action="store_true")
parser.add_argument("--image", default=IMAGE)
args = parser.parse_args()
if not args.skip_build:
docker_build(args.image)
docker_run(args.image, CONTAINER)
try:
url = "http://localhost:8000"
if not wait_for_server(url):
logs = subprocess.run(
["docker", "logs", CONTAINER], capture_output=True, text=True
)
print(f"FAIL: container didn't start\n{logs.stdout}\n{logs.stderr}", file=sys.stderr)
sys.exit(1)
run_tests(url)
finally:
docker_cleanup(CONTAINER)
if __name__ == "__main__":
main()