""" Reliability tests — repeated correctness, error handling, recovery =================================================================== 3.1 Repeated identical requests → same status code every time 3.2 Invalid-input handling → correct HTTP status codes for each bad-input class 3.3 Recovery after a bad request → next valid request still works 3.4 Health endpoint availability under sustained request fire 3.5 Sustained load test — repeated lightweight calls with spacing 3.6 Error-response contract consistency """ import json import pathlib import time import statistics import pytest import requests from tests.conftest import ( BASE_URL, TIMEOUT, SEED_IMAGE_URL, NONEXISTENT_REQUEST_ID, NONEXISTENT_USER_ID, MINIMAL_GENERATE_PAYLOAD, ) ARTIFACTS = pathlib.Path(__file__).parent.parent / "artifacts" ARTIFACTS.mkdir(exist_ok=True) N_REPEAT = 4 # repetitions for idempotency / consistency checks SPACING = 2 # seconds between calls in sustained load test _reliability_results: dict = {} # --------------------------------------------------------------------------- # Session-end fixture — persist metrics to JSON for compile_results.py # --------------------------------------------------------------------------- @pytest.fixture(scope="session", autouse=True) def _persist_reliability_metrics(): """Yield during the test session; write metrics JSON afterwards.""" yield try: (ARTIFACTS / "reliability_metrics.json").write_text( json.dumps(_reliability_results, indent=2) ) except Exception as e: print(f"Warning: could not save reliability metrics: {e}") # --------------------------------------------------------------------------- # Retry helper — HuggingFace Spaces can throttle and reset connections # --------------------------------------------------------------------------- def _get(session: requests.Session, url: str, **kw) -> requests.Response: """GET with up to 3 retries on connection errors (500ms backoff each).""" for attempt in range(3): try: return session.get(url, **kw) except (requests.ConnectionError, requests.Timeout): if attempt == 2: raise time.sleep(0.5 * (attempt + 1)) def _post(session: requests.Session, url: str, **kw) -> requests.Response: """POST with up to 3 retries on connection errors (500ms backoff each).""" for attempt in range(3): try: return session.post(url, **kw) except (requests.ConnectionError, requests.Timeout): if attempt == 2: raise time.sleep(0.5 * (attempt + 1)) # --------------------------------------------------------------------------- # 3.1 Repeated identical requests are consistent # --------------------------------------------------------------------------- class TestRepeatedRequestConsistency: """The same request should always return the same status code.""" def test_health_always_returns_200(self, http): statuses = [ _get(http, f"{BASE_URL}/health", timeout=TIMEOUT).status_code for _ in range(N_REPEAT) ] _reliability_results["repeated_health"] = { "iterations": N_REPEAT, "statuses": statuses, "consistent": len(set(statuses)) == 1, } assert all(s == 200 for s in statuses), ( f"Health did not return 200 every time: {statuses}" ) def test_root_always_returns_200(self, http): statuses = [ _get(http, f"{BASE_URL}/", timeout=TIMEOUT).status_code for _ in range(N_REPEAT) ] assert all(s == 200 for s in statuses), ( f"Root did not return 200 every time: {statuses}" ) def test_unknown_job_always_returns_same_code(self, http): url = f"{BASE_URL}/jobs/{NONEXISTENT_REQUEST_ID}/status" statuses = [_get(http, url, timeout=TIMEOUT).status_code for _ in range(N_REPEAT)] _reliability_results["repeated_job_status"] = { "iterations": N_REPEAT, "statuses": statuses, "consistent": len(set(statuses)) == 1, } # Must be consistent (all same code), even if it's 404 or 500 assert len(set(statuses)) == 1, ( f"Inconsistent status codes for same unknown job: {statuses}" ) def test_user_jobs_always_returns_200(self, http): url = f"{BASE_URL}/jobs/user/{NONEXISTENT_USER_ID}" statuses = [_get(http, url, timeout=TIMEOUT).status_code for _ in range(N_REPEAT)] assert all(s == 200 for s in statuses), ( f"User-jobs did not return 200 every time: {statuses}" ) def test_422_always_returned_for_missing_request_id(self, http): statuses = [ _post(http, f"{BASE_URL}/generate/pdf", json={}, timeout=TIMEOUT).status_code for _ in range(N_REPEAT) ] assert all(s == 422 for s in statuses), ( f"Schema validation should always give 422, got: {statuses}" ) # --------------------------------------------------------------------------- # 3.2 Invalid-input handling — correct codes for each bad-input class # --------------------------------------------------------------------------- class TestInvalidInputHandling: """Each class of bad input should yield a predictable HTTP code.""" CASES = [ # (description, endpoint, method, payload/params, expected_codes) ( "missing_request_id_pdf", f"{BASE_URL}/generate/pdf", "POST", {"seed_images": [SEED_IMAGE_URL]}, [422], ), ( "missing_request_id_async", f"{BASE_URL}/generate/async", "POST", {"seed_images": [SEED_IMAGE_URL]}, [422], ), ( "empty_seed_images_pdf", f"{BASE_URL}/generate/pdf", "POST", {**MINIMAL_GENERATE_PAYLOAD, "seed_images": []}, [422], ), ( "num_solutions_zero_pdf", f"{BASE_URL}/generate/pdf", "POST", {**MINIMAL_GENERATE_PAYLOAD, "prompt_params": {**MINIMAL_GENERATE_PAYLOAD["prompt_params"], "num_solutions": 0}}, [422], ), ( "non_int_user_id", f"{BASE_URL}/jobs/user/abc", "GET", None, [422], ), ( "nonexistent_job_status", f"{BASE_URL}/jobs/{NONEXISTENT_REQUEST_ID}/status", "GET", None, [404, 500], # Supabase 404 or internal 500 ), ( "nonexistent_request_id_pdf", f"{BASE_URL}/generate/pdf", "POST", MINIMAL_GENERATE_PAYLOAD, [404], ), ] @pytest.mark.parametrize("description,url,method,payload,expected", CASES) def test_case(self, http, description, url, method, payload, expected): if method == "POST": r = _post(http, url, json=payload, timeout=TIMEOUT) else: r = _get(http, url, timeout=TIMEOUT) _reliability_results.setdefault("invalid_input_cases", {})[description] = { "status_code": r.status_code, "allowed": expected, "ok": r.status_code in expected, } assert r.status_code in expected, ( f"[{description}] Expected {expected}, got {r.status_code}: {r.text[:200]}" ) # --------------------------------------------------------------------------- # 3.3 Recovery after a bad request # --------------------------------------------------------------------------- class TestRecoveryAfterBadRequest: """A valid request after a bad request should not be contaminated.""" def test_health_recovers_after_bad_generate_pdf(self, http): # Fire a bad request first _post(http, f"{BASE_URL}/generate/pdf", json={}, timeout=TIMEOUT) # Immediately hit /health r = _get(http, f"{BASE_URL}/health", timeout=TIMEOUT) assert r.status_code == 200 def test_user_jobs_recovers_after_bad_job_status(self, http): # Bad request _get(http, f"{BASE_URL}/jobs/{NONEXISTENT_REQUEST_ID}/status", timeout=TIMEOUT) # Valid request immediately after url = f"{BASE_URL}/jobs/user/{NONEXISTENT_USER_ID}" r = _get(http, url, timeout=TIMEOUT) assert r.status_code == 200 def test_sequential_mixed_valid_invalid(self, http): """Interleave valid and invalid requests — valid ones must always succeed.""" for i in range(4): # Alternate: bad, good, bad, good … if i % 2 == 0: _post(http, f"{BASE_URL}/generate/async", json={}, timeout=TIMEOUT) else: r = _get(http, f"{BASE_URL}/health", timeout=TIMEOUT) assert r.status_code == 200, ( f"Health failed after a bad request at iteration {i}" ) _reliability_results["recovery"] = {"passed": True} # --------------------------------------------------------------------------- # 3.4 Health availability while other endpoints are called # --------------------------------------------------------------------------- class TestHealthAvailabilityUnderLoad: """Health endpoint must remain available while other calls are in-flight.""" def test_health_available_during_job_status_calls(self, http): health_codes = [] for _ in range(3): # Trigger a Supabase lookup (non-trivial) _get(http, f"{BASE_URL}/jobs/{NONEXISTENT_REQUEST_ID}/status", timeout=TIMEOUT) time.sleep(0.5) # brief pause so Space doesn't throttle # Check health immediately after r = _get(http, f"{BASE_URL}/health", timeout=TIMEOUT) health_codes.append(r.status_code) _reliability_results["health_under_load"] = { "health_pings": len(health_codes), "health_200s": health_codes.count(200), } assert all(c == 200 for c in health_codes), ( f"Health endpoint returned non-200 while load was happening: {health_codes}" ) # --------------------------------------------------------------------------- # 3.5 Sustained load test # --------------------------------------------------------------------------- class TestSustainedLoad: """ Fire N sequential /health calls with a small spacing. Measures stability: no degradation in response time over time. """ def test_sustained_health_calls(self, http): n = 6 samples = [] ok = 0 wall_start = time.perf_counter() for i in range(n): t0 = time.perf_counter() r = _get(http, f"{BASE_URL}/health", timeout=TIMEOUT) elapsed = time.perf_counter() - t0 samples.append(elapsed) if r.status_code == 200: ok += 1 if i < n - 1: time.sleep(SPACING) wall = time.perf_counter() - wall_start result = { "iterations": n, "ok": ok, "fail": n - ok, "success_rate": round(ok / n, 4), "min_s": round(min(samples), 3), "mean_s": round(statistics.mean(samples), 3), "max_s": round(max(samples), 3), "stdev_s": round(statistics.stdev(samples), 3) if n > 1 else 0, "wall_s": round(wall, 3), } _reliability_results["sustained_load"] = result print(f"\n Sustained load — {result}") assert ok == n, f"Expected all {n} requests to succeed, got {ok}" assert result["success_rate"] == 1.0 # --------------------------------------------------------------------------- # 3.6 Error-response contract consistency # --------------------------------------------------------------------------- class TestErrorResponseContract: """Error responses must always be JSON with a 'detail' field. NOTE: A 3-second cooldown is applied at the start of each test so the HuggingFace Space can recover after the sustained-load tests above. """ @pytest.fixture(autouse=True) def _cooldown(self): """Give the Space 3s to recover after sustained-load tests.""" time.sleep(3) def test_422_has_detail_list(self, http): r = _post(http, f"{BASE_URL}/generate/pdf", json={}, timeout=TIMEOUT) assert r.status_code == 422 body = r.json() assert "detail" in body # FastAPI validation errors use a list of error objects assert isinstance(body["detail"], list) for err in body["detail"]: assert "loc" in err, f"Validation error missing 'loc': {err}" assert "msg" in err, f"Validation error missing 'msg': {err}" assert "type" in err, f"Validation error missing 'type': {err}" def test_404_has_detail_string(self, http): r = _post( http, f"{BASE_URL}/generate/pdf", json=MINIMAL_GENERATE_PAYLOAD, timeout=TIMEOUT, ) if r.status_code == 404: body = r.json() assert "detail" in body assert isinstance(body["detail"], str) def test_503_has_detail_if_redis_unavailable(self, http): r = _post( http, f"{BASE_URL}/generate/async", json=MINIMAL_GENERATE_PAYLOAD, timeout=TIMEOUT, ) if r.status_code == 503: body = r.json() assert "detail" in body def test_repeated_422_response_is_stable(self, http): """The structure of 422 responses must be identical across calls.""" responses = [ _post(http, f"{BASE_URL}/generate/pdf", json={}, timeout=TIMEOUT).json() for _ in range(3) ] # All must have "detail" and it must be a list each time for body in responses: assert "detail" in body assert isinstance(body["detail"], list)