Spaces:

cmboulanger
/

tei-annotator

Runtime error

App Files Files Community

tei-annotator / scripts /smoke_test_webservice.py

cmboulanger

feat: Add webservice for demonstration

c3f33b6 about 1 month ago

raw

history blame contribute delete

7.58 kB

	#!/usr/bin/env python
	"""
	Smoke test for the TEI Annotator webservice.

	Assumes a locally running instance. Start it first:
	cd webservice && uvicorn main:app --reload

	The base URL is derived from HOST and PORT in webservice/.env.
	If that file does not exist, the script exits with instructions.

	Usage:
	python scripts/smoke_test_webservice.py [--base-url URL]
	uv run scripts/smoke_test_webservice.py
	"""

	from __future__ import annotations

	import argparse
	import json
	import re
	import sys
	import urllib.error
	import urllib.request
	from pathlib import Path

	# ---------------------------------------------------------------------------
	# Resolve base URL from webservice/.env
	# ---------------------------------------------------------------------------

	_REPO = Path(__file__).parent.parent
	_ENV_FILE = _REPO / "webservice" / ".env"
	_ENV_TEMPLATE = _REPO / "webservice" / ".env.template"


	def _load_webservice_env() -> dict[str, str]:
	"""Parse webservice/.env and return key→value pairs (no shell expansion)."""
	if not _ENV_FILE.exists():
	print(
	f"ERROR: {_ENV_FILE} not found.\n"
	f" Create it from the template first:\n"
	f" cp {_ENV_TEMPLATE} {_ENV_FILE}\n"
	f" Then fill in at least one API key.",
	file=sys.stderr,
	)
	sys.exit(1)
	env: dict[str, str] = {}
	for line in _ENV_FILE.read_text(encoding="utf-8").splitlines():
	line = line.strip()
	if not line or line.startswith("#") or "=" not in line:
	continue
	key, _, value = line.partition("=")
	env[key.strip()] = value.strip().strip('"').strip("'")
	return env


	def _default_base_url() -> str:
	env = _load_webservice_env()
	host = env.get("HOST", "localhost") or "localhost"
	# When HOST is 0.0.0.0 (listen on all interfaces), connect via localhost
	if host in ("0.0.0.0", ""):
	host = "localhost"
	port = env.get("PORT", "8000") or "8000"
	return f"http://{host}:{port}"

	_TEST_TEXT = (
	"Marie Curie was born in Warsaw, Poland, and later conducted her research "
	"in Paris, France. Together with her husband Pierre Curie, she discovered "
	"polonium and radium."
	)

	_MINIMAL_SCHEMA = {
	"elements": [
	{"tag": "persName", "description": "a person's name"},
	{"tag": "placeName", "description": "a geographical place name"},
	],
	"rules": [],
	}


	# ---------------------------------------------------------------------------
	# Helpers
	# ---------------------------------------------------------------------------


	def _get(url: str) -> tuple[int, str]:
	try:
	with urllib.request.urlopen(url, timeout=30) as resp:
	return resp.status, resp.read().decode(errors="replace")
	except urllib.error.HTTPError as exc:
	return exc.code, exc.read().decode(errors="replace")
	except urllib.error.URLError as exc:
	print(
	f"\nERROR: Cannot reach the webservice at {url}\n"
	f" {exc.reason}\n"
	f" Is the server running? Start it with:\n"
	f" cd webservice && uvicorn main:app --reload",
	file=sys.stderr,
	)
	sys.exit(2)


	def _post_json(url: str, payload: dict) -> tuple[int, dict \| str]:
	body = json.dumps(payload).encode()
	req = urllib.request.Request(
	url, data=body, headers={"Content-Type": "application/json"}, method="POST"
	)
	try:
	with urllib.request.urlopen(req, timeout=60) as resp:
	return resp.status, json.loads(resp.read())
	except urllib.error.HTTPError as exc:
	return exc.code, exc.read().decode(errors="replace")
	except urllib.error.URLError as exc:
	print(
	f"\nERROR: Cannot reach the webservice at {url}\n"
	f" {exc.reason}\n"
	f" Is the server running? Start it with:\n"
	f" cd webservice && uvicorn main:app --reload",
	file=sys.stderr,
	)
	sys.exit(2)


	def _check(name: str, ok: bool, detail: str = "") -> bool:
	status = "PASS" if ok else "FAIL"
	msg = f" [{status}] {name}"
	if detail:
	msg += f"\n {detail}"
	print(msg)
	return ok


	# ---------------------------------------------------------------------------
	# Tests
	# ---------------------------------------------------------------------------


	def test_html_ui(base_url: str) -> bool:
	status, body = _get(f"{base_url}/")
	return _check(
	"GET / returns HTTP 200 with HTML form",
	status == 200 and "<form" in body,
	f"status={status}",
	)


	def test_api_annotate(base_url: str) -> bool:
	status, data = _post_json(
	f"{base_url}/api/annotate",
	{"text": _TEST_TEXT, "schema": _MINIMAL_SCHEMA},
	)
	if not _check(
	"POST /api/annotate returns HTTP 200",
	status == 200,
	f"status={status} body={str(data)[:200]}",
	):
	return False

	xml = data.get("xml", "") if isinstance(data, dict) else ""
	plain = re.sub(r"<[^>]+>", "", xml)

	ok_nonempty = _check("Response xml is non-empty", bool(xml.strip()))
	ok_plain = _check(
	"Plain text preserved after stripping tags",
	plain == _TEST_TEXT,
	f"expected: {_TEST_TEXT!r}\n got: {plain!r}",
	)
	ok_fuzzy = _check(
	"Response contains fuzzy_spans list",
	isinstance(data.get("fuzzy_spans"), list) if isinstance(data, dict) else False,
	)
	return ok_nonempty and ok_plain and ok_fuzzy


	def test_api_no_schema(base_url: str) -> bool:
	"""Omitting schema should fall back to the built-in BLBL schema."""
	bibl = "Curie, Marie. 1898. Sur une nouvelle substance radioactive. Paris."
	status, data = _post_json(f"{base_url}/api/annotate", {"text": bibl})
	return _check(
	"POST /api/annotate without schema uses BLBL default (HTTP 200)",
	status == 200 and isinstance(data, dict) and bool(data.get("xml")),
	f"status={status}",
	)


	def test_api_unknown_provider(base_url: str) -> bool:
	status, _ = _post_json(
	f"{base_url}/api/annotate",
	{"text": "hello", "provider": "nonexistent"},
	)
	return _check(
	"POST /api/annotate with unknown provider returns 400",
	status == 400,
	f"status={status}",
	)


	def test_openapi_docs(base_url: str) -> bool:
	status, _ = _get(f"{base_url}/docs")
	return _check("GET /docs (OpenAPI UI) returns HTTP 200", status == 200)


	# ---------------------------------------------------------------------------
	# Main
	# ---------------------------------------------------------------------------


	def main() -> int:
	default_url = _default_base_url()
	parser = argparse.ArgumentParser(description="Smoke-test the TEI Annotator webservice.")
	parser.add_argument(
	"--base-url",
	default=default_url,
	help=f"Base URL of the running server (default: derived from webservice/.env, currently {default_url})",
	)
	args = parser.parse_args()
	base = args.base_url.rstrip("/")

	print(f"Smoke-testing {base}\n")

	results = [
	test_html_ui(base),
	test_openapi_docs(base),
	test_api_unknown_provider(base),
	test_api_no_schema(base),
	test_api_annotate(base),
	]

	passed = sum(results)
	total = len(results)
	print(f"\n{'═' * 50}")
	print(f" {passed}/{total} checks passed")
	print(f"{'═' * 50}")
	return 0 if all(results) else 1


	if __name__ == "__main__":
	sys.exit(main())