tei-annotator / scripts /smoke_test_webservice.py
cmboulanger's picture
feat: Add webservice for demonstration
c3f33b6
#!/usr/bin/env python
"""
Smoke test for the TEI Annotator webservice.
Assumes a locally running instance. Start it first:
cd webservice && uvicorn main:app --reload
The base URL is derived from HOST and PORT in webservice/.env.
If that file does not exist, the script exits with instructions.
Usage:
python scripts/smoke_test_webservice.py [--base-url URL]
uv run scripts/smoke_test_webservice.py
"""
from __future__ import annotations
import argparse
import json
import re
import sys
import urllib.error
import urllib.request
from pathlib import Path
# ---------------------------------------------------------------------------
# Resolve base URL from webservice/.env
# ---------------------------------------------------------------------------
_REPO = Path(__file__).parent.parent
_ENV_FILE = _REPO / "webservice" / ".env"
_ENV_TEMPLATE = _REPO / "webservice" / ".env.template"
def _load_webservice_env() -> dict[str, str]:
"""Parse webservice/.env and return key→value pairs (no shell expansion)."""
if not _ENV_FILE.exists():
print(
f"ERROR: {_ENV_FILE} not found.\n"
f" Create it from the template first:\n"
f" cp {_ENV_TEMPLATE} {_ENV_FILE}\n"
f" Then fill in at least one API key.",
file=sys.stderr,
)
sys.exit(1)
env: dict[str, str] = {}
for line in _ENV_FILE.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, _, value = line.partition("=")
env[key.strip()] = value.strip().strip('"').strip("'")
return env
def _default_base_url() -> str:
env = _load_webservice_env()
host = env.get("HOST", "localhost") or "localhost"
# When HOST is 0.0.0.0 (listen on all interfaces), connect via localhost
if host in ("0.0.0.0", ""):
host = "localhost"
port = env.get("PORT", "8000") or "8000"
return f"http://{host}:{port}"
_TEST_TEXT = (
"Marie Curie was born in Warsaw, Poland, and later conducted her research "
"in Paris, France. Together with her husband Pierre Curie, she discovered "
"polonium and radium."
)
_MINIMAL_SCHEMA = {
"elements": [
{"tag": "persName", "description": "a person's name"},
{"tag": "placeName", "description": "a geographical place name"},
],
"rules": [],
}
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _get(url: str) -> tuple[int, str]:
try:
with urllib.request.urlopen(url, timeout=30) as resp:
return resp.status, resp.read().decode(errors="replace")
except urllib.error.HTTPError as exc:
return exc.code, exc.read().decode(errors="replace")
except urllib.error.URLError as exc:
print(
f"\nERROR: Cannot reach the webservice at {url}\n"
f" {exc.reason}\n"
f" Is the server running? Start it with:\n"
f" cd webservice && uvicorn main:app --reload",
file=sys.stderr,
)
sys.exit(2)
def _post_json(url: str, payload: dict) -> tuple[int, dict | str]:
body = json.dumps(payload).encode()
req = urllib.request.Request(
url, data=body, headers={"Content-Type": "application/json"}, method="POST"
)
try:
with urllib.request.urlopen(req, timeout=60) as resp:
return resp.status, json.loads(resp.read())
except urllib.error.HTTPError as exc:
return exc.code, exc.read().decode(errors="replace")
except urllib.error.URLError as exc:
print(
f"\nERROR: Cannot reach the webservice at {url}\n"
f" {exc.reason}\n"
f" Is the server running? Start it with:\n"
f" cd webservice && uvicorn main:app --reload",
file=sys.stderr,
)
sys.exit(2)
def _check(name: str, ok: bool, detail: str = "") -> bool:
status = "PASS" if ok else "FAIL"
msg = f" [{status}] {name}"
if detail:
msg += f"\n {detail}"
print(msg)
return ok
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
def test_html_ui(base_url: str) -> bool:
status, body = _get(f"{base_url}/")
return _check(
"GET / returns HTTP 200 with HTML form",
status == 200 and "<form" in body,
f"status={status}",
)
def test_api_annotate(base_url: str) -> bool:
status, data = _post_json(
f"{base_url}/api/annotate",
{"text": _TEST_TEXT, "schema": _MINIMAL_SCHEMA},
)
if not _check(
"POST /api/annotate returns HTTP 200",
status == 200,
f"status={status} body={str(data)[:200]}",
):
return False
xml = data.get("xml", "") if isinstance(data, dict) else ""
plain = re.sub(r"<[^>]+>", "", xml)
ok_nonempty = _check("Response xml is non-empty", bool(xml.strip()))
ok_plain = _check(
"Plain text preserved after stripping tags",
plain == _TEST_TEXT,
f"expected: {_TEST_TEXT!r}\n got: {plain!r}",
)
ok_fuzzy = _check(
"Response contains fuzzy_spans list",
isinstance(data.get("fuzzy_spans"), list) if isinstance(data, dict) else False,
)
return ok_nonempty and ok_plain and ok_fuzzy
def test_api_no_schema(base_url: str) -> bool:
"""Omitting schema should fall back to the built-in BLBL schema."""
bibl = "Curie, Marie. 1898. Sur une nouvelle substance radioactive. Paris."
status, data = _post_json(f"{base_url}/api/annotate", {"text": bibl})
return _check(
"POST /api/annotate without schema uses BLBL default (HTTP 200)",
status == 200 and isinstance(data, dict) and bool(data.get("xml")),
f"status={status}",
)
def test_api_unknown_provider(base_url: str) -> bool:
status, _ = _post_json(
f"{base_url}/api/annotate",
{"text": "hello", "provider": "nonexistent"},
)
return _check(
"POST /api/annotate with unknown provider returns 400",
status == 400,
f"status={status}",
)
def test_openapi_docs(base_url: str) -> bool:
status, _ = _get(f"{base_url}/docs")
return _check("GET /docs (OpenAPI UI) returns HTTP 200", status == 200)
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main() -> int:
default_url = _default_base_url()
parser = argparse.ArgumentParser(description="Smoke-test the TEI Annotator webservice.")
parser.add_argument(
"--base-url",
default=default_url,
help=f"Base URL of the running server (default: derived from webservice/.env, currently {default_url})",
)
args = parser.parse_args()
base = args.base_url.rstrip("/")
print(f"Smoke-testing {base}\n")
results = [
test_html_ui(base),
test_openapi_docs(base),
test_api_unknown_provider(base),
test_api_no_schema(base),
test_api_annotate(base),
]
passed = sum(results)
total = len(results)
print(f"\n{'═' * 50}")
print(f" {passed}/{total} checks passed")
print(f"{'═' * 50}")
return 0 if all(results) else 1
if __name__ == "__main__":
sys.exit(main())