File size: 3,690 Bytes
eb1ebe6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43f41de
eb1ebe6
 
 
 
43f41de
 
 
 
 
 
eb1ebe6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""Integration test: build + run Docker image, test via client.

Usage:
    uv run python tests/test_docker.py                    # build + run + test
    uv run python tests/test_docker.py --skip-build       # reuse existing image
    uv run python tests/test_docker.py --image my:tag     # custom image name
"""

import argparse
import subprocess
import sys
import time
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parents[1]))

IMAGE = "explainer-env:latest"
CONTAINER = "explainer-env-test"


def wait_for_server(url: str, timeout: int = 30):
    import urllib.request

    deadline = time.time() + timeout
    while time.time() < deadline:
        try:
            urllib.request.urlopen(f"{url}/health", timeout=2)
            return True
        except Exception:
            time.sleep(1)
    return False


def docker_build(image: str):
    env_dir = Path(__file__).resolve().parents[1]
    print(f"  building {image} from {env_dir}...")
    result = subprocess.run(
        ["docker", "build", "-t", image, "-f", "server/Dockerfile", "."],
        cwd=str(env_dir),
        capture_output=True,
        text=True,
    )
    if result.returncode != 0:
        print(f"FAIL: docker build\n{result.stderr[-1000:]}", file=sys.stderr)
        sys.exit(1)
    print("  build OK")


def docker_run(image: str, container: str):
    # clean up stale container
    subprocess.run(["docker", "rm", "-f", container], capture_output=True)
    result = subprocess.run(
        ["docker", "run", "-d", "--name", container, "-p", "8000:8000", image],
        capture_output=True,
        text=True,
    )
    if result.returncode != 0:
        print(f"FAIL: docker run\n{result.stderr}", file=sys.stderr)
        sys.exit(1)
    print(f"  container {container} started")


def docker_cleanup(container: str):
    subprocess.run(["docker", "rm", "-f", container], capture_output=True)
    print(f"  container {container} removed")


def run_tests(base_url: str):
    from client import ExplainerEnv
    from models import ExplainerAction

    client = ExplainerEnv(base_url=base_url)
    with client.sync() as sc:
        result = sc.reset()
        assert result.observation.topic, "reset should return topic"
        print(f"  reset: topic={result.observation.topic!r}")

        action = ExplainerAction(
            action_type="generate",
            format="marimo",
            code="import marimo as mo\napp = mo.App()\n@app.cell\ndef _():\n    return\n",
        )
        result = sc.step(action)
        if not result.done:
            result = sc.step(ExplainerAction(
                action_type="repair",
                format="marimo",
                code="import marimo as mo\napp = mo.App()\n@app.cell\ndef _():\n    return\n",
            ))
        assert isinstance(result.reward, (int, float))
        print(f"  step:  reward={result.reward:.3f}, done={result.done}")

    print("PASS: test_docker (2/2)")


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--skip-build", action="store_true")
    parser.add_argument("--image", default=IMAGE)
    args = parser.parse_args()

    if not args.skip_build:
        docker_build(args.image)

    docker_run(args.image, CONTAINER)
    try:
        url = "http://localhost:8000"
        if not wait_for_server(url):
            logs = subprocess.run(
                ["docker", "logs", CONTAINER], capture_output=True, text=True
            )
            print(f"FAIL: container didn't start\n{logs.stdout}\n{logs.stderr}", file=sys.stderr)
            sys.exit(1)
        run_tests(url)
    finally:
        docker_cleanup(CONTAINER)


if __name__ == "__main__":
    main()