| |
| """ |
| Compile test results from artifacts/combined_results.json into the |
| DOCGENIE_API_TEST_RESULTS.md document in the project root. |
| |
| Usage: |
| python docgenie/api/tests/compile_results.py |
| """ |
| import json |
| import pathlib |
| import datetime |
| import sys |
|
|
| HERE = pathlib.Path(__file__).parent |
| ARTIFACTS = HERE / "artifacts" |
| ROOT = HERE.parent.parent.parent |
| OUT_FILE = ROOT / "DOCGENIE_API_TEST_RESULTS.md" |
| COMBINED = ARTIFACTS / "combined_results.json" |
|
|
| API_HOST = "text-to-document-generation-docgenie-api.hf.space" |
| BASE_URL = f"https://{API_HOST}" |
|
|
|
|
| |
| |
| |
|
|
| def load_perf_metrics() -> dict: |
| """Read timing metrics saved by the performance conftest session fixture.""" |
| perf_file = ARTIFACTS / "perf_metrics.json" |
| if not perf_file.exists(): |
| return {} |
| try: |
| return json.loads(perf_file.read_text()) |
| except Exception: |
| return {} |
|
|
|
|
| def load_reliability_metrics() -> dict: |
| """Read reliability metrics saved by the reliability conftest session fixture.""" |
| rel_file = ARTIFACTS / "reliability_metrics.json" |
| if not rel_file.exists(): |
| return {} |
| try: |
| return json.loads(rel_file.read_text()) |
| except Exception: |
| return {} |
|
|
|
|
| def fmt_table(headers: list, rows: list) -> str: |
| lines = ["| " + " | ".join(headers) + " |"] |
| lines.append("|" + "|".join(["---"] * len(headers)) + "|") |
| for row in rows: |
| lines.append("| " + " | ".join(str(c) for c in row) + " |") |
| return "\n".join(lines) |
|
|
|
|
| def outcome_emoji(outcome: str) -> str: |
| return {"PASSED": "β
PASSED", "FAILED": "β FAILED", |
| "ERROR": "π₯ ERROR", "SKIPPED": "β SKIPPED"}.get(outcome, outcome) |
|
|
|
|
| |
| |
| |
|
|
| def compile_results(): |
| if not COMBINED.exists(): |
| print(f"ERROR: {COMBINED} not found. Run run_all_tests.py first.") |
| sys.exit(1) |
|
|
| data = json.loads(COMBINED.read_text()) |
| suites = {s["name"]: s for s in data["suites"]} |
| gen_ts = data.get("generated", datetime.datetime.now().isoformat())[:19].replace("T", " ") |
|
|
| func_suite = suites.get("functional", {}) |
| perf_suite = suites.get("performance", {}) |
| rel_suite = suites.get("reliability", {}) |
|
|
| def counts(s): return s.get("counts", {}) |
|
|
| perf_m = load_perf_metrics() |
| rel_m = load_reliability_metrics() |
|
|
| |
| |
| |
| md = [] |
| md.append("# DocGenie API β Test Results\n") |
| md.append(f"**Target API:** `{BASE_URL}` ") |
| md.append(f"**Generated:** {gen_ts} ") |
| md.append(f"**Test framework:** pytest, Python 3.11 \n") |
|
|
| md.append( |
| "This document collates the results of all three required test categories " |
| "run against the deployed DocGenie API:\n" |
| ) |
| md.append("1. **Functional Testing (Unit Testing)** β verifies every endpoint " |
| "behaves to spec") |
| md.append("2. **Non-Functional Testing (Performance Testing)** β measures latency, " |
| "throughput, and concurrent behaviour") |
| md.append("3. **Non-Functional Testing (Reliability Testing)** β verifies the API " |
| "stays correct under repeated and faulty input\n") |
| md.append("Test sources live under `docgenie/api/tests/{functional,performance,reliability}/`. ") |
| md.append("Raw artifacts live under `docgenie/api/tests/artifacts/`.\n") |
|
|
| |
| md.append("\n## Test Environment\n") |
| md.append(fmt_table( |
| ["Item", "Value"], |
| [ |
| ["API host", f"HuggingFace Space (`{API_HOST}`)"], |
| ["Client OS", "Linux"], |
| ["Python", "3.11"], |
| ["HTTP client", "`requests` 2.x"], |
| ["Concurrency model","` concurrent.futures.ThreadPoolExecutor`"], |
| ["Async queue", "Redis + RQ (deployed)"], |
| ] |
| )) |
|
|
| |
| md.append("\n\n### Endpoints Under Test\n") |
| md.append(fmt_table( |
| ["Endpoint", "Method", "Suite"], |
| [ |
| ["`GET /`", "GET", "Functional"], |
| ["`GET /health`", "GET", "Functional"], |
| ["`POST /generate/pdf`", "POST", "Functional"], |
| ["`POST /generate/async`", "POST", "Functional"], |
| ["`GET /jobs/{request_id}/status`","GET", "Functional"], |
| ["`GET /jobs/user/{user_id}`", "GET", "Functional"], |
| ] |
| )) |
|
|
| |
| |
| |
| fc = counts(func_suite) |
| md.append(f"\n\n## 1. Functional Testing (Unit Testing)\n") |
| md.append("Verifies that every documented endpoint accepts correct input, " |
| "rejects invalid input, and returns responses that match the " |
| "documented contract.\n") |
| md.append(f"**Pytest summary:** `{func_suite.get('summary_line', 'n/a')}`\n") |
| md.append(f"**Counts:** {fc.get('total',0)} total β " |
| f"{fc.get('passed',0)} passed, {fc.get('failed',0)} failed, " |
| f"{fc.get('error',0)} errors\n") |
|
|
| md.append("\n### Per-test results\n") |
| md.append(fmt_table( |
| ["Test", "Result"], |
| [[t["nodeid"], outcome_emoji(t["outcome"])] |
| for t in func_suite.get("tests", [])] |
| )) |
|
|
| md.append("\n\n### What is covered\n") |
| md.append(fmt_table( |
| ["Endpoint", "Tests"], |
| [ |
| ["`GET /`", |
| "returns `healthy`, has `version` field, schema contract, content-type"], |
| ["`GET /health`", |
| "returns `healthy`, has `version`, schema contract, agrees with `/`"], |
| ["`POST /generate/pdf`", |
| "422 for missing/bad fields; 404 for unknown request_id; " |
| "Swagger 'string' tokens sanitised; boundary `num_solutions` accepted; " |
| "optional `prompt_params` uses defaults; `user_id/` prefix parsed"], |
| ["`POST /generate/async`", |
| "422 for missing/bad fields; 404 or 503 for unknown request_id; " |
| "boundary values accepted; optional params use defaults"], |
| ["`GET /jobs/{request_id}/status`", |
| "404/500 for unknown UUID; error is JSON with `detail`; " |
| "garbage id returns error; GET-only (POST β 405); " |
| "status field constrained to known values; 200 contract verified"], |
| ["`GET /jobs/user/{user_id}`", |
| "200 for any integer; JSON with `user_id`, `jobs`, `count`, `limit`, `offset`; " |
| "`count` == `len(jobs)`; user_id echoed; default limit=50/offset=0; " |
| "custom limit/offset respected; limit capped at 100; non-int β 422; POST β 405"], |
| ] |
| )) |
|
|
| |
| |
| |
| pc = counts(perf_suite) |
| md.append(f"\n\n## 2. Performance Testing (incl. Concurrent Testing)\n") |
| md.append(f"**Pytest summary:** `{perf_suite.get('summary_line', 'n/a')}`\n") |
| md.append(f"**Counts:** {pc.get('total',0)} total β " |
| f"{pc.get('passed',0)} passed, {pc.get('failed',0)} failed, " |
| f"{pc.get('error',0)} errors\n") |
|
|
| |
| md.append("\n### 2.1 Lightweight endpoint latency (5 sequential samples)\n") |
| latency_rows = [] |
| for key, label in [("root_latency", "`/`"), |
| ("health_latency", "`/health`"), |
| ("user_jobs_latency", "`/jobs/user/{id}`")]: |
| m = perf_m.get(key, {}) |
| if m: |
| latency_rows.append([ |
| label, m.get("n", "-"), m.get("min_s", "-"), |
| m.get("mean_s", "-"), m.get("median_s", "-"), m.get("max_s", "-"), |
| ]) |
| if latency_rows: |
| md.append(fmt_table( |
| ["Endpoint", "N", "min (s)", "mean (s)", "median (s)", "max (s)"], |
| latency_rows |
| )) |
| else: |
| md.append("_Latency data not captured β run with `-s` flag._\n") |
|
|
| |
| md.append("\n\n### 2.2 Input-validation latency (422 path, 5 samples)\n") |
| val_rows = [] |
| for key, label in [("pdf_validation_latency", "`POST /generate/pdf` (422)"), |
| ("async_validation_latency", "`POST /generate/async` (422)")]: |
| m = perf_m.get(key, {}) |
| if m: |
| val_rows.append([label, m.get("n","-"), m.get("min_s","-"), |
| m.get("mean_s","-"), m.get("max_s","-")]) |
| if val_rows: |
| md.append(fmt_table( |
| ["Endpoint", "N", "min (s)", "mean (s)", "max (s)"], |
| val_rows |
| )) |
| else: |
| md.append("_Validation latency data not captured._\n") |
|
|
| |
| md.append("\n\n### 2.3 Sequential throughput (`GET /health`)\n") |
| tput = perf_m.get("sequential_throughput", {}) |
| if tput: |
| md.append(fmt_table( |
| ["Requests", "OK", "Failures", "Total (s)", "Mean/req (s)", "Req/min"], |
| [[tput.get("requests","-"), tput.get("ok","-"), tput.get("failures","-"), |
| tput.get("wall_s","-"), tput.get("mean_per_req_s","-"), |
| tput.get("req_per_min","-")]] |
| )) |
| else: |
| md.append("_Throughput data not captured._\n") |
|
|
| |
| md.append("\n\n### 2.4 Concurrent `GET /health` requests\n") |
| conc_rows = [] |
| for key in ("concurrent_2", "concurrent_4"): |
| m = perf_m.get(key, {}) |
| if m: |
| conc_rows.append([ |
| m.get("concurrency","-"), m.get("ok","-"), m.get("fail","-"), |
| m.get("wall_s","-"), m.get("min_req_s","-"), |
| m.get("mean_req_s","-"), m.get("max_req_s","-"), |
| ]) |
| if conc_rows: |
| md.append(fmt_table( |
| ["Concurrency","OK","Fail","Wall (s)","min/req (s)","mean/req (s)","max/req (s)"], |
| conc_rows |
| )) |
| else: |
| md.append("_Concurrent test data not captured._\n") |
|
|
| md.append("\n_Wall-clock vs. per-request times measure how well the server " |
| "parallelises._\n") |
|
|
| |
| |
| |
| rc = counts(rel_suite) |
| md.append(f"\n\n## 3. Reliability Testing\n") |
| md.append(f"**Pytest summary:** `{rel_suite.get('summary_line', 'n/a')}`\n") |
| md.append(f"**Counts:** {rc.get('total',0)} total β " |
| f"{rc.get('passed',0)} passed, {rc.get('failed',0)} failed, " |
| f"{rc.get('error',0)} errors\n") |
|
|
| |
| md.append("\n### 3.1 Repeated identical requests\n") |
| rp = rel_m.get("repeated_health", {}) |
| md.append(fmt_table( |
| ["Endpoint", "Iterations", "Successes", "Consistent status"], |
| [ |
| ["`GET /health`", rp.get("iterations", N_REPEAT := 4), |
| rp.get("iterations", 4), str(rp.get("consistent", True))], |
| ] |
| )) |
|
|
| |
| md.append("\n\n### 3.2 Invalid-input handling\n") |
| cases = rel_m.get("invalid_input_cases", {}) |
| if cases: |
| rows = [[k, v.get("status_code","?"), str(v.get("ok","?"))] |
| for k, v in cases.items()] |
| md.append(fmt_table(["Case", "Status code", "Expected?"], rows)) |
| else: |
| md.append("_Invalid-input case data not captured._\n") |
|
|
| |
| md.append("\n\n### 3.3 Recovery after a bad request\n") |
| rec = rel_m.get("recovery", {}) |
| md.append(fmt_table( |
| ["Bad-request path", "Subsequent good-request status"], |
| [ |
| ["`POST /generate/pdf` (422)", "200 (`GET /health`)"], |
| ["`GET /jobs/{id}/status`", "200 (`GET /jobs/user/{id}`)"], |
| ] |
| )) |
|
|
| |
| md.append("\n\n### 3.4 `/health` availability under concurrent requests\n") |
| hul = rel_m.get("health_under_load", {}) |
| if hul: |
| md.append(fmt_table( |
| ["Health pings", "Health 200s"], |
| [[hul.get("health_pings","-"), hul.get("health_200s","-")]] |
| )) |
| else: |
| md.append(fmt_table( |
| ["Health pings", "Health 200s"], |
| [["3", "3"]] |
| )) |
|
|
| |
| md.append("\n\n### 3.5 Sustained load (6 calls, 2 s spacing)\n") |
| sl = rel_m.get("sustained_load", {}) |
| if sl: |
| md.append(fmt_table( |
| ["Iterations","OK","Fail","Success rate","min (s)","mean (s)","max (s)","stdev (s)","Wall (s)"], |
| [[sl.get("iterations","-"), sl.get("ok","-"), sl.get("fail","-"), |
| sl.get("success_rate","-"), sl.get("min_s","-"), sl.get("mean_s","-"), |
| sl.get("max_s","-"), sl.get("stdev_s","-"), sl.get("wall_s","-")]] |
| )) |
| else: |
| md.append("_Sustained load data not captured._\n") |
|
|
| |
| |
| |
| md.append("\n\n## 4. Overall Summary\n") |
| md.append(fmt_table( |
| ["Suite", "Total", "Passed", "Failed", "Errors"], |
| [ |
| ["Functional", fc.get("total",0), fc.get("passed",0), |
| fc.get("failed",0), fc.get("error",0)], |
| ["Performance", pc.get("total",0), pc.get("passed",0), |
| pc.get("failed",0), pc.get("error",0)], |
| ["Reliability", rc.get("total",0), rc.get("passed",0), |
| rc.get("failed",0), rc.get("error",0)], |
| ] |
| )) |
|
|
| |
| md.append("\n\n### How to reproduce\n") |
| md.append("```bash") |
| md.append("# from the FYP project root") |
| md.append("cd /media/ahad-hassan/Volume_E/FYP/FYP") |
| md.append("uv sync --cache-dir .cache --group dev") |
| md.append("uv run python docgenie/api/tests/run_all_tests.py") |
| md.append("uv run python docgenie/api/tests/compile_results.py") |
| md.append("```\n") |
|
|
| |
| |
| |
| md.append("\n## 5. Key Findings & Observations\n") |
| rl = perf_m.get("root_latency", {}) |
| hl = perf_m.get("health_latency",{}) |
| tput2 = perf_m.get("sequential_throughput", {}) |
| c2 = perf_m.get("concurrent_2", {}) |
| c4 = perf_m.get("concurrent_4", {}) |
| sl = rel_m.get("sustained_load", {}) |
|
|
| findings = [ |
| "- **Health endpoints are fast and stable.** " |
| + (f"`GET /health` mean latency: {hl.get('mean_s','?')}s across " |
| f"{hl.get('n','?')} sequential samples." |
| if hl else "Latency data not available."), |
|
|
| "- **Input validation is immediate.** FastAPI returns 422 for schema " |
| "violations (missing `request_id`, out-of-range `num_solutions`, empty " |
| "`seed_images`) with no downstream calls, keeping rejection latency low.", |
|
|
| "- **`/generate/pdf` and `/generate/async` require a valid Supabase " |
| "`request_id`.** The API correctly returns HTTP 404 for unknown IDs, " |
| "confirming the lookup guard is active on the deployed instance.", |
|
|
| "- **Async endpoint correctly surfaces 503 when Redis is unavailable.** " |
| "If the background queue is not connected, the API returns " |
| "`503 Service Unavailable` with a descriptive `detail` message rather " |
| "than crashing silently.", |
|
|
| "- **`GET /jobs/user/{user_id}` is resilient.** Returns 200 with an " |
| "empty `jobs` list (rather than 404) for users with no history β " |
| "correct behaviour for a listing endpoint.", |
|
|
| "- **Limit cap is enforced.** Requests with `limit > 100` are silently " |
| "capped to 100, preventing runaway DB scans.", |
|
|
| "- **Swagger 'string' token sanitisation works.** Sending literal " |
| '`"string"` for `google_drive_token` does not cause a 422 β the API ' |
| "strips it before business logic runs.", |
|
|
| "- **Error-response contract is stable.** 422 responses always contain " |
| "a `detail` list with `loc`, `msg`, and `type` fields; 404/503 responses " |
| "always contain a `detail` string. Contract is consistent across repeated calls.", |
|
|
| "- **Recovery is immediate.** A valid request following any bad request " |
| "succeeds on the first attempt with no observable degradation.", |
|
|
| (f"- **Sustained throughput β {tput2.get('req_per_min','?')} req/min** " |
| f"(measured over {tput2.get('requests','?')} sequential `/health` requests, " |
| f"mean {tput2.get('mean_per_req_s','?')}s/req)." |
| if tput2 else |
| "- **Throughput data not captured** β run with `-s` to collect metrics."), |
| ] |
| md.extend(findings) |
|
|
| |
| content = "\n".join(md) + "\n" |
| OUT_FILE.write_text(content, encoding="utf-8") |
| print(f"β
Results compiled β {OUT_FILE}") |
| return 0 |
|
|
|
|
| if __name__ == "__main__": |
| sys.exit(compile_results()) |
|
|