Spaces:
Sleeping
Sleeping
File size: 3,690 Bytes
eb1ebe6 43f41de eb1ebe6 43f41de eb1ebe6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | """Integration test: build + run Docker image, test via client.
Usage:
uv run python tests/test_docker.py # build + run + test
uv run python tests/test_docker.py --skip-build # reuse existing image
uv run python tests/test_docker.py --image my:tag # custom image name
"""
import argparse
import subprocess
import sys
import time
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
IMAGE = "explainer-env:latest"
CONTAINER = "explainer-env-test"
def wait_for_server(url: str, timeout: int = 30):
import urllib.request
deadline = time.time() + timeout
while time.time() < deadline:
try:
urllib.request.urlopen(f"{url}/health", timeout=2)
return True
except Exception:
time.sleep(1)
return False
def docker_build(image: str):
env_dir = Path(__file__).resolve().parents[1]
print(f" building {image} from {env_dir}...")
result = subprocess.run(
["docker", "build", "-t", image, "-f", "server/Dockerfile", "."],
cwd=str(env_dir),
capture_output=True,
text=True,
)
if result.returncode != 0:
print(f"FAIL: docker build\n{result.stderr[-1000:]}", file=sys.stderr)
sys.exit(1)
print(" build OK")
def docker_run(image: str, container: str):
# clean up stale container
subprocess.run(["docker", "rm", "-f", container], capture_output=True)
result = subprocess.run(
["docker", "run", "-d", "--name", container, "-p", "8000:8000", image],
capture_output=True,
text=True,
)
if result.returncode != 0:
print(f"FAIL: docker run\n{result.stderr}", file=sys.stderr)
sys.exit(1)
print(f" container {container} started")
def docker_cleanup(container: str):
subprocess.run(["docker", "rm", "-f", container], capture_output=True)
print(f" container {container} removed")
def run_tests(base_url: str):
from client import ExplainerEnv
from models import ExplainerAction
client = ExplainerEnv(base_url=base_url)
with client.sync() as sc:
result = sc.reset()
assert result.observation.topic, "reset should return topic"
print(f" reset: topic={result.observation.topic!r}")
action = ExplainerAction(
action_type="generate",
format="marimo",
code="import marimo as mo\napp = mo.App()\n@app.cell\ndef _():\n return\n",
)
result = sc.step(action)
if not result.done:
result = sc.step(ExplainerAction(
action_type="repair",
format="marimo",
code="import marimo as mo\napp = mo.App()\n@app.cell\ndef _():\n return\n",
))
assert isinstance(result.reward, (int, float))
print(f" step: reward={result.reward:.3f}, done={result.done}")
print("PASS: test_docker (2/2)")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--skip-build", action="store_true")
parser.add_argument("--image", default=IMAGE)
args = parser.parse_args()
if not args.skip_build:
docker_build(args.image)
docker_run(args.image, CONTAINER)
try:
url = "http://localhost:8000"
if not wait_for_server(url):
logs = subprocess.run(
["docker", "logs", CONTAINER], capture_output=True, text=True
)
print(f"FAIL: container didn't start\n{logs.stdout}\n{logs.stderr}", file=sys.stderr)
sys.exit(1)
run_tests(url)
finally:
docker_cleanup(CONTAINER)
if __name__ == "__main__":
main()
|