File size: 8,301 Bytes
4de7d31
85b7ac8
 
 
eb895b1
85b7ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498f684
85b7ac8
 
 
 
 
a7caaff
 
 
 
85b7ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7caaff
 
 
 
85b7ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7caaff
 
 
 
 
 
85b7ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4de7d31
85b7ac8
eb895b1
85b7ac8
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
"""Comprehensive smoke test for the Cloud-Native DevOps Debug FastAPI server.

Usage:
  .\\.venv\\Scripts\\python.exe smoke_test.py
  .\\.venv\\Scripts\\python.exe smoke_test.py --mode live --base-url http://127.0.0.1:7860

Modes:
  - inprocess (default): uses FastAPI TestClient, no running server needed.
  - live: uses requests against a running server.
"""


import argparse
import json
import sys
from dataclasses import dataclass
from typing import Any, Dict, Optional, Tuple


@dataclass
class TestResult:
    name: str
    ok: bool
    details: str = ""


class EndpointClient:
    def get(self, path: str) -> Tuple[int, Dict[str, Any]]:
        raise NotImplementedError

    def post(self, path: str, body: Optional[Dict[str, Any]] = None) -> Tuple[int, Dict[str, Any]]:
        raise NotImplementedError


class InProcessClient(EndpointClient):
    def __init__(self):
        from fastapi.testclient import TestClient
        from server.app import app

        self._client = TestClient(app)

    def get(self, path: str) -> Tuple[int, Dict[str, Any]]:
        response = self._client.get(path)
        try:
            data = response.json()
        except Exception:
            data = {}
        return response.status_code, data

    def post(self, path: str, body: Optional[Dict[str, Any]] = None) -> Tuple[int, Dict[str, Any]]:
        response = self._client.post(path, json=body or {})
        data = response.json() if response.content else {}
        return response.status_code, data


class LiveClient(EndpointClient):
    def __init__(self, base_url: str):
        import requests

        self._requests = requests
        self._base_url = base_url.rstrip("/")

    def get(self, path: str) -> Tuple[int, Dict[str, Any]]:
        response = self._requests.get(f"{self._base_url}{path}", timeout=20)
        try:
            data = response.json()
        except Exception:
            data = {}
        return response.status_code, data

    def post(self, path: str, body: Optional[Dict[str, Any]] = None) -> Tuple[int, Dict[str, Any]]:
        response = self._requests.post(f"{self._base_url}{path}", json=body or {}, timeout=20)
        data = response.json() if response.content else {}
        return response.status_code, data


def assert_true(name: str, cond: bool, details: str = "") -> TestResult:
    return TestResult(name=name, ok=bool(cond), details=details if not cond else "")


def run_smoke(client: EndpointClient) -> int:
    results = []

    # root serves the landing page now (HTML), just check it's 200
    status, _ = client.get("/")
    results.append(assert_true("GET / landing page", status == 200))

    status, health = client.get("/health")
    results.append(assert_true("GET /health", status == 200 and health.get("status") == "healthy", str(health)))

    status, info = client.get("/info")
    results.append(assert_true("GET /info", status == 200 and isinstance(info.get("tasks"), list), str(info)))

    status, tasks_payload = client.get("/tasks")
    tasks = tasks_payload.get("tasks", []) if isinstance(tasks_payload, dict) else []
    results.append(assert_true("GET /tasks", status == 200 and len(tasks) >= 6, str(tasks_payload)))

    status, reset_data = client.post("/reset", {"seed": 123})
    obs = reset_data.get("observation", {})
    results.append(
        assert_true(
            "POST /reset random",
            status == 200 and isinstance(obs.get("task_id"), str) and isinstance(obs.get("files"), list),
            str(reset_data),
        )
    )

    status_int, reset_int = client.post("/reset", {"task_id": 1, "seed": 1})
    status_str, reset_str = client.post("/reset", {"task_id": "1", "seed": 1})
    int_task = reset_int.get("observation", {}).get("task_id")
    str_task = reset_str.get("observation", {}).get("task_id")
    results.append(
        assert_true(
            "POST /reset accepts int/string index",
            status_int == 200 and status_str == 200 and int_task == str_task,
            f"int={status_int}:{int_task}, str={status_str}:{str_task}",
        )
    )

    status_a, reset_a = client.post("/reset", {"seed": 999})
    status_b, reset_b = client.post("/reset", {"seed": 999})
    a_obs = reset_a.get("observation", {})
    b_obs = reset_b.get("observation", {})
    results.append(
        assert_true(
            "Deterministic reset with seed",
            status_a == 200
            and status_b == 200
            and a_obs.get("task_id") == b_obs.get("task_id")
            and a_obs.get("error", {}).get("error_message") == b_obs.get("error", {}).get("error_message"),
            f"A={a_obs.get('task_id')} B={b_obs.get('task_id')}",
        )
    )

    status, _ = client.post("/reset", {"task_id": "dockerfile_syntax", "scenario_id": "typo_filename", "seed": 7})
    results.append(assert_true("POST /reset specific scenario", status == 200))

    status, step_hint = client.post(
        "/step",
        {"action": {"action_type": "request_hint", "reasoning": "Need help"}},
    )
    results.append(
        assert_true(
            "POST /step request_hint",
            status == 200 and "observation" in step_hint and "reward" in step_hint,
            str(step_hint),
        )
    )

    status, step_fix = client.post(
        "/step",
        {
            "action": {
                "action_type": "replace_line",
                "edits": [{"file_path": "Dockerfile", "line_number": 3, "new_content": "COPY requirements.txt ."}],
                "reasoning": "Fix typo",
            }
        },
    )
    fix_info = step_fix.get("info", {})
    results.append(
        assert_true(
            "POST /step replace_line",
            status == 200 and fix_info.get("issues_fixed", 0) >= 1,
            str(step_fix),
        )
    )

    status, state = client.get("/state")
    results.append(assert_true("GET /state", status == 200 and "observation" in state, str(state)))

    status, submit = client.post("/step", {"action": {"action_type": "submit", "reasoning": "Done"}})
    results.append(assert_true("POST /step submit", status == 200 and submit.get("done") is True, str(submit)))

    trajectory = [
        {
            "step": 1,
            "action": {"action_type": "replace_line", "edits": [{"file_path": "Dockerfile", "line_number": 3}]},
            "reward": 0.3,
            "done": False,
            "info": {"issues_fixed": 1, "issues_total": 1},
        },
        {
            "step": 2,
            "action": {"action_type": "submit"},
            "reward": 0.7,
            "done": True,
            "info": {"issues_fixed": 1, "issues_total": 1},
        },
    ]
    status, grader = client.post("/grader", {"task_id": "dockerfile_syntax", "trajectory": trajectory})
    score = grader.get("result", {}).get("score")
    results.append(
        assert_true(
            "POST /grader",
            status == 200 and isinstance(score, (int, float)) and 0.0 <= float(score) <= 1.0,
            str(grader),
        )
    )

    status, baseline = client.post("/baseline", {"task_id": "dockerfile_syntax", "num_episodes": 1})
    results.append(
        assert_true(
            "POST /baseline",
            status == 200 and isinstance(baseline.get("results"), list),
            str(baseline),
        )
    )

    passed = sum(1 for r in results if r.ok)
    total = len(results)

    print("\n=== Smoke Test Results ===")
    for r in results:
        marker = "PASS" if r.ok else "FAIL"
        print(f"[{marker}] {r.name}")
        if not r.ok and r.details:
            detail = r.details
            if len(detail) > 300:
                detail = detail[:300] + "..."
            print(f"       {detail}")

    print(f"\nSummary: {passed}/{total} passed")
    return 0 if passed == total else 1


def main() -> int:
    parser = argparse.ArgumentParser(description="Smoke test Cloud-Native DevOps Debug FastAPI server")
    parser.add_argument("--mode", choices=["inprocess", "live"], default="inprocess")
    parser.add_argument("--base-url", default="http://127.0.0.1:7860")
    args = parser.parse_args()

    if args.mode == "inprocess":
        client = InProcessClient()
    else:
        client = LiveClient(args.base_url)

    return run_smoke(client)


if __name__ == "__main__":
    raise SystemExit(main())