Spaces:

lablab-ai-amd-developer-hackathon
/

signbridge

Sleeping

App Files Files Community

signbridge / tests /test_integration.py

LucasLooTan

feat: SignBridge initial scaffold + tests

18d028b 13 days ago

raw

history blame contribute delete

7.09 kB

	"""Integration tests — exercise multi-step user flows end-to-end."""

	from __future__ import annotations

	import base64
	import io

	import numpy as np
	import pytest
	from fastapi.testclient import TestClient
	from PIL import Image

	from signbridge.backend import app
	from signbridge.space import _capture_sign, _clear, _new_session, _speak


	@pytest.fixture()
	def client() -> TestClient:
	return TestClient(app)


	def _frame(rgb: tuple[int, int, int] = (180, 200, 160), size: int = 96) -> np.ndarray:
	return np.full((size, size, 3), rgb, dtype=np.uint8)


	def _frame_b64(rgb: tuple[int, int, int] = (180, 200, 160), size: int = 96) -> str:
	arr = _frame(rgb, size)
	img = Image.fromarray(arr)
	buf = io.BytesIO()
	img.save(buf, format="JPEG", quality=80)
	return base64.b64encode(buf.getvalue()).decode("ascii")


	class TestUserFlowFingerspell:
	"""User fingerspells L-U-C-A-S then presses Speak."""

	def test_via_space_helpers(self, monkeypatch: pytest.MonkeyPatch) -> None:
	# Stub VLM to return one letter at a time.
	from signbridge.recognizer import vlm

	responses = iter(["L", "U", "C", "A", "S"])

	class _Resp:
	def __init__(self, c: str) -> None:
	self.choices = [type("C", (), {"message": type("M", (), {"content": c})()})()]

	class _FakeClient:
	class chat: # noqa: N801
	class completions: # noqa: N801
	@staticmethod
	def create(**_: object) -> _Resp:
	return _Resp(next(responses))

	monkeypatch.setattr(vlm, "_resolve_client", lambda: (_FakeClient(), "test"))

	state = _new_session()
	for _ in range(5):
	_, _, state = _capture_sign(_frame(), state)
	assert state.sign_history == ["L", "U", "C", "A", "S"]

	sentence, audio_path, state = _speak(state)
	# Composer fallback (no API keys for composer in this test) → naive joiner
	assert "Lucas" in sentence
	assert audio_path # silent-stub WAV exists

	def test_via_backend_endpoints(self, client: TestClient) -> None:
	# Direct multi-step flow over HTTP, exercising every endpoint.
	for _letter in "LUCAS":
	r = client.post("/recognize", json={"frame": _frame_b64()})
	assert r.status_code == 200
	# No API keys → token is "" but endpoint succeeds.
	assert r.json()["token"] == ""

	# Compose a manually-curated sequence
	r = client.post("/compose", json={"signs": ["L", "U", "C", "A", "S"]})
	assert r.status_code == 200
	assert "Lucas" in r.json()["sentence"]

	# Speak
	r = client.post("/speak", json={"text": "My name is Lucas."})
	assert r.status_code == 200
	assert len(r.content) > 0


	class TestClearResetsCleanly:
	def test_full_round_trip(self, monkeypatch: pytest.MonkeyPatch) -> None:
	from signbridge.recognizer import vlm

	class _FakeClient:
	class chat: # noqa: N801
	class completions: # noqa: N801
	@staticmethod
	def create(**_: object):
	return type(
	"R",
	(),
	{
	"choices": [
	type(
	"C",
	(),
	{"message": type("M", (), {"content": "hello"})()},
	)()
	]
	},
	)()

	monkeypatch.setattr(vlm, "_resolve_client", lambda: (_FakeClient(), "test"))

	state = _new_session()
	_, _, state = _capture_sign(_frame(), state)
	_, _, state = _capture_sign(_frame(), state)
	assert state.sign_history == ["hello", "hello"]

	sentence, audio, state = _speak(state)
	assert sentence
	assert audio

	latest, history, sentence_box, audio_out, state = _clear(state)
	assert state.sign_history == []
	assert state.last_sentence == ""
	assert state.last_audio_path is None
	assert "no signs" in history.lower()


	class TestEdgeCases:
	def test_huge_sign_sequence(self, client: TestClient) -> None:
	# 200 fingerspelled letters — make sure compose endpoint doesn't crash.
	signs = list("ABCDEFGHIJ" * 20)
	r = client.post("/compose", json={"signs": signs})
	assert r.status_code == 200
	assert r.json()["sentence"] # non-empty

	def test_unicode_in_compose(self, client: TestClient) -> None:
	# Synthetic unicode token should pass through naive joiner unscathed.
	r = client.post("/compose", json={"signs": ["héllo", "wörld"]})
	assert r.status_code == 200

	def test_speak_very_long_text(self, client: TestClient) -> None:
	r = client.post("/speak", json={"text": "a " * 500})
	assert r.status_code == 200

	def test_recognize_jpeg_with_data_url_jpg(self, client: TestClient) -> None:
	b64 = _frame_b64()
	r = client.post(
	"/recognize", json={"frame": f"data:image/jpg;base64,{b64}"}
	)
	# Slightly malformed data URL (jpg vs jpeg) — should still work via tolerant decoder.
	assert r.status_code == 200

	def test_recognize_png_frame(self, client: TestClient) -> None:
	arr = _frame()
	img = Image.fromarray(arr)
	buf = io.BytesIO()
	img.save(buf, format="PNG")
	b64 = base64.b64encode(buf.getvalue()).decode("ascii")
	r = client.post("/recognize", json={"frame": b64})
	assert r.status_code == 200

	def test_compose_with_only_punctuation_glosses(self, client: TestClient) -> None:
	# Tokens that are 1 char, lowercase letters — should not be misread as fingerspelling.
	r = client.post("/compose", json={"signs": ["a", "b"]})
	assert r.status_code == 200
	# Naive joiner only treats UPPERCASE single letters as fingerspelling.
	# Lowercase 'a' / 'b' are full glosses → should appear with a space, no concat.
	assert r.json()["sentence"] == "A b."

	def test_health_after_recognize_failure(self, client: TestClient) -> None:
	# Even after a 400, /healthz should still respond.
	client.post("/recognize", json={"frame": "%%%bad%%%"})
	r = client.get("/healthz")
	assert r.status_code == 200


	class TestBackendInfoEndpoint:
	def test_info_reflects_env(self, client: TestClient, monkeypatch: pytest.MonkeyPatch) -> None:
	monkeypatch.setenv("SIGNBRIDGE_PROVIDER", "openai")
	monkeypatch.setenv(
	"SIGNBRIDGE_COMPOSER_MODEL", "meta-llama/Llama-3.1-8B-Instruct"
	)
	r = client.get("/info")
	assert r.status_code == 200
	body = r.json()
	assert body["provider"] == "openai"
	assert body["composer_model"].endswith("Llama-3.1-8B-Instruct")