File size: 14,063 Bytes
dc4e6da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
"""
Reliability tests β€” repeated correctness, error handling, recovery
===================================================================

3.1  Repeated identical requests β†’ same status code every time
3.2  Invalid-input handling β†’ correct HTTP status codes for each bad-input class
3.3  Recovery after a bad request β†’ next valid request still works
3.4  Health endpoint availability under sustained request fire
3.5  Sustained load test β€” repeated lightweight calls with spacing
3.6  Error-response contract consistency
"""
import json
import pathlib
import time
import statistics
import pytest
import requests
from tests.conftest import (
    BASE_URL, TIMEOUT, SEED_IMAGE_URL,
    NONEXISTENT_REQUEST_ID, NONEXISTENT_USER_ID,
    MINIMAL_GENERATE_PAYLOAD,
)

ARTIFACTS = pathlib.Path(__file__).parent.parent / "artifacts"
ARTIFACTS.mkdir(exist_ok=True)

N_REPEAT = 4   # repetitions for idempotency / consistency checks
SPACING  = 2   # seconds between calls in sustained load test

_reliability_results: dict = {}


# ---------------------------------------------------------------------------
# Session-end fixture β€” persist metrics to JSON for compile_results.py
# ---------------------------------------------------------------------------

@pytest.fixture(scope="session", autouse=True)
def _persist_reliability_metrics():
    """Yield during the test session; write metrics JSON afterwards."""
    yield
    try:
        (ARTIFACTS / "reliability_metrics.json").write_text(
            json.dumps(_reliability_results, indent=2)
        )
    except Exception as e:
        print(f"Warning: could not save reliability metrics: {e}")


# ---------------------------------------------------------------------------
# Retry helper β€” HuggingFace Spaces can throttle and reset connections
# ---------------------------------------------------------------------------

def _get(session: requests.Session, url: str, **kw) -> requests.Response:
    """GET with up to 3 retries on connection errors (500ms backoff each)."""
    for attempt in range(3):
        try:
            return session.get(url, **kw)
        except (requests.ConnectionError, requests.Timeout):
            if attempt == 2:
                raise
            time.sleep(0.5 * (attempt + 1))


def _post(session: requests.Session, url: str, **kw) -> requests.Response:
    """POST with up to 3 retries on connection errors (500ms backoff each)."""
    for attempt in range(3):
        try:
            return session.post(url, **kw)
        except (requests.ConnectionError, requests.Timeout):
            if attempt == 2:
                raise
            time.sleep(0.5 * (attempt + 1))


# ---------------------------------------------------------------------------
# 3.1  Repeated identical requests are consistent
# ---------------------------------------------------------------------------

class TestRepeatedRequestConsistency:
    """The same request should always return the same status code."""

    def test_health_always_returns_200(self, http):
        statuses = [
            _get(http, f"{BASE_URL}/health", timeout=TIMEOUT).status_code
            for _ in range(N_REPEAT)
        ]
        _reliability_results["repeated_health"] = {
            "iterations": N_REPEAT, "statuses": statuses,
            "consistent": len(set(statuses)) == 1,
        }
        assert all(s == 200 for s in statuses), (
            f"Health did not return 200 every time: {statuses}"
        )

    def test_root_always_returns_200(self, http):
        statuses = [
            _get(http, f"{BASE_URL}/", timeout=TIMEOUT).status_code
            for _ in range(N_REPEAT)
        ]
        assert all(s == 200 for s in statuses), (
            f"Root did not return 200 every time: {statuses}"
        )

    def test_unknown_job_always_returns_same_code(self, http):
        url = f"{BASE_URL}/jobs/{NONEXISTENT_REQUEST_ID}/status"
        statuses = [_get(http, url, timeout=TIMEOUT).status_code for _ in range(N_REPEAT)]
        _reliability_results["repeated_job_status"] = {
            "iterations": N_REPEAT, "statuses": statuses,
            "consistent": len(set(statuses)) == 1,
        }
        # Must be consistent (all same code), even if it's 404 or 500
        assert len(set(statuses)) == 1, (
            f"Inconsistent status codes for same unknown job: {statuses}"
        )

    def test_user_jobs_always_returns_200(self, http):
        url = f"{BASE_URL}/jobs/user/{NONEXISTENT_USER_ID}"
        statuses = [_get(http, url, timeout=TIMEOUT).status_code for _ in range(N_REPEAT)]
        assert all(s == 200 for s in statuses), (
            f"User-jobs did not return 200 every time: {statuses}"
        )

    def test_422_always_returned_for_missing_request_id(self, http):
        statuses = [
            _post(http, f"{BASE_URL}/generate/pdf", json={}, timeout=TIMEOUT).status_code
            for _ in range(N_REPEAT)
        ]
        assert all(s == 422 for s in statuses), (
            f"Schema validation should always give 422, got: {statuses}"
        )


# ---------------------------------------------------------------------------
# 3.2  Invalid-input handling β€” correct codes for each bad-input class
# ---------------------------------------------------------------------------

class TestInvalidInputHandling:
    """Each class of bad input should yield a predictable HTTP code."""

    CASES = [
        # (description, endpoint, method, payload/params, expected_codes)
        (
            "missing_request_id_pdf",
            f"{BASE_URL}/generate/pdf", "POST",
            {"seed_images": [SEED_IMAGE_URL]},
            [422],
        ),
        (
            "missing_request_id_async",
            f"{BASE_URL}/generate/async", "POST",
            {"seed_images": [SEED_IMAGE_URL]},
            [422],
        ),
        (
            "empty_seed_images_pdf",
            f"{BASE_URL}/generate/pdf", "POST",
            {**MINIMAL_GENERATE_PAYLOAD, "seed_images": []},
            [422],
        ),
        (
            "num_solutions_zero_pdf",
            f"{BASE_URL}/generate/pdf", "POST",
            {**MINIMAL_GENERATE_PAYLOAD,
             "prompt_params": {**MINIMAL_GENERATE_PAYLOAD["prompt_params"], "num_solutions": 0}},
            [422],
        ),
        (
            "non_int_user_id",
            f"{BASE_URL}/jobs/user/abc", "GET",
            None,
            [422],
        ),
        (
            "nonexistent_job_status",
            f"{BASE_URL}/jobs/{NONEXISTENT_REQUEST_ID}/status", "GET",
            None,
            [404, 500],   # Supabase 404 or internal 500
        ),
        (
            "nonexistent_request_id_pdf",
            f"{BASE_URL}/generate/pdf", "POST",
            MINIMAL_GENERATE_PAYLOAD,
            [404],
        ),
    ]

    @pytest.mark.parametrize("description,url,method,payload,expected", CASES)
    def test_case(self, http, description, url, method, payload, expected):
        if method == "POST":
            r = _post(http, url, json=payload, timeout=TIMEOUT)
        else:
            r = _get(http, url, timeout=TIMEOUT)

        _reliability_results.setdefault("invalid_input_cases", {})[description] = {
            "status_code": r.status_code,
            "allowed": expected,
            "ok": r.status_code in expected,
        }
        assert r.status_code in expected, (
            f"[{description}] Expected {expected}, got {r.status_code}: {r.text[:200]}"
        )


# ---------------------------------------------------------------------------
# 3.3  Recovery after a bad request
# ---------------------------------------------------------------------------

class TestRecoveryAfterBadRequest:
    """A valid request after a bad request should not be contaminated."""

    def test_health_recovers_after_bad_generate_pdf(self, http):
        # Fire a bad request first
        _post(http, f"{BASE_URL}/generate/pdf", json={}, timeout=TIMEOUT)
        # Immediately hit /health
        r = _get(http, f"{BASE_URL}/health", timeout=TIMEOUT)
        assert r.status_code == 200

    def test_user_jobs_recovers_after_bad_job_status(self, http):
        # Bad request
        _get(http, f"{BASE_URL}/jobs/{NONEXISTENT_REQUEST_ID}/status", timeout=TIMEOUT)
        # Valid request immediately after
        url = f"{BASE_URL}/jobs/user/{NONEXISTENT_USER_ID}"
        r = _get(http, url, timeout=TIMEOUT)
        assert r.status_code == 200

    def test_sequential_mixed_valid_invalid(self, http):
        """Interleave valid and invalid requests β€” valid ones must always succeed."""
        for i in range(4):
            # Alternate: bad, good, bad, good …
            if i % 2 == 0:
                _post(http, f"{BASE_URL}/generate/async", json={}, timeout=TIMEOUT)
            else:
                r = _get(http, f"{BASE_URL}/health", timeout=TIMEOUT)
                assert r.status_code == 200, (
                    f"Health failed after a bad request at iteration {i}"
                )

        _reliability_results["recovery"] = {"passed": True}


# ---------------------------------------------------------------------------
# 3.4  Health availability while other endpoints are called
# ---------------------------------------------------------------------------

class TestHealthAvailabilityUnderLoad:
    """Health endpoint must remain available while other calls are in-flight."""

    def test_health_available_during_job_status_calls(self, http):
        health_codes = []
        for _ in range(3):
            # Trigger a Supabase lookup (non-trivial)
            _get(http, f"{BASE_URL}/jobs/{NONEXISTENT_REQUEST_ID}/status", timeout=TIMEOUT)
            time.sleep(0.5)   # brief pause so Space doesn't throttle
            # Check health immediately after
            r = _get(http, f"{BASE_URL}/health", timeout=TIMEOUT)
            health_codes.append(r.status_code)

        _reliability_results["health_under_load"] = {
            "health_pings": len(health_codes),
            "health_200s": health_codes.count(200),
        }
        assert all(c == 200 for c in health_codes), (
            f"Health endpoint returned non-200 while load was happening: {health_codes}"
        )


# ---------------------------------------------------------------------------
# 3.5  Sustained load test
# ---------------------------------------------------------------------------

class TestSustainedLoad:
    """
    Fire N sequential /health calls with a small spacing.
    Measures stability: no degradation in response time over time.
    """

    def test_sustained_health_calls(self, http):
        n       = 6
        samples = []
        ok      = 0
        wall_start = time.perf_counter()

        for i in range(n):
            t0 = time.perf_counter()
            r  = _get(http, f"{BASE_URL}/health", timeout=TIMEOUT)
            elapsed = time.perf_counter() - t0
            samples.append(elapsed)
            if r.status_code == 200:
                ok += 1
            if i < n - 1:
                time.sleep(SPACING)

        wall = time.perf_counter() - wall_start
        result = {
            "iterations": n, "ok": ok, "fail": n - ok,
            "success_rate": round(ok / n, 4),
            "min_s":  round(min(samples), 3),
            "mean_s": round(statistics.mean(samples), 3),
            "max_s":  round(max(samples), 3),
            "stdev_s": round(statistics.stdev(samples), 3) if n > 1 else 0,
            "wall_s": round(wall, 3),
        }
        _reliability_results["sustained_load"] = result
        print(f"\n  Sustained load β€” {result}")

        assert ok == n, f"Expected all {n} requests to succeed, got {ok}"
        assert result["success_rate"] == 1.0


# ---------------------------------------------------------------------------
# 3.6  Error-response contract consistency
# ---------------------------------------------------------------------------

class TestErrorResponseContract:
    """Error responses must always be JSON with a 'detail' field.

    NOTE: A 3-second cooldown is applied at the start of each test so the
    HuggingFace Space can recover after the sustained-load tests above.
    """

    @pytest.fixture(autouse=True)
    def _cooldown(self):
        """Give the Space 3s to recover after sustained-load tests."""
        time.sleep(3)

    def test_422_has_detail_list(self, http):
        r = _post(http, f"{BASE_URL}/generate/pdf", json={}, timeout=TIMEOUT)
        assert r.status_code == 422
        body = r.json()
        assert "detail" in body
        # FastAPI validation errors use a list of error objects
        assert isinstance(body["detail"], list)
        for err in body["detail"]:
            assert "loc"  in err, f"Validation error missing 'loc': {err}"
            assert "msg"  in err, f"Validation error missing 'msg': {err}"
            assert "type" in err, f"Validation error missing 'type': {err}"

    def test_404_has_detail_string(self, http):
        r = _post(
            http, f"{BASE_URL}/generate/pdf",
            json=MINIMAL_GENERATE_PAYLOAD,
            timeout=TIMEOUT,
        )
        if r.status_code == 404:
            body = r.json()
            assert "detail" in body
            assert isinstance(body["detail"], str)

    def test_503_has_detail_if_redis_unavailable(self, http):
        r = _post(
            http, f"{BASE_URL}/generate/async",
            json=MINIMAL_GENERATE_PAYLOAD,
            timeout=TIMEOUT,
        )
        if r.status_code == 503:
            body = r.json()
            assert "detail" in body

    def test_repeated_422_response_is_stable(self, http):
        """The structure of 422 responses must be identical across calls."""
        responses = [
            _post(http, f"{BASE_URL}/generate/pdf", json={}, timeout=TIMEOUT).json()
            for _ in range(3)
        ]
        # All must have "detail" and it must be a list each time
        for body in responses:
            assert "detail" in body
            assert isinstance(body["detail"], list)