Spaces:

build-small-hackathon
/

multi-agent-lab

Running on Zero

multi-agent-lab / tests /test_inference_backends.py

agharsallah

Implement local in-process inference backend for transformers models

c6cdf25 21 days ago

4.26 kB

	"""Tests for the unified inference backend registry (Modal + Hugging Face).

	The registry is the façade the router, the config loader, and the Lab UI all read:
	backend-qualified keys, a uniform entries/binding view across backends, bare keys
	defaulting to Modal (backward compatibility), and per-backend credential gates.
	"""

	from __future__ import annotations

	import pytest

	from src.models import hf_catalogue, inference, modal_catalogue


	def test_modal_and_hf_are_registered():
	keys = {b.key for b in inference.backends()}
	assert {"modal", "hf"} <= keys
	assert inference.DEFAULT_BACKEND == "modal"


	def test_split_key_defaults_bare_to_modal():
	assert inference.split_key("gemma-4-12b") == ("modal", "gemma-4-12b")
	assert inference.split_key("modal:gemma-4-12b") == ("modal", "gemma-4-12b")
	assert inference.split_key("hf:org/model") == ("hf", "org/model")
	# An unknown prefix is treated as part of a bare Modal key, not a backend.
	assert inference.split_key("weird:thing") == ("modal", "weird:thing")


	def test_qualify_keeps_modal_bare_and_prefixes_others():
	assert inference.qualify("modal", "gemma-4-12b") == "gemma-4-12b"
	assert inference.qualify("hf", "org/model") == "hf:org/model"


	def test_entries_are_tagged_and_qualified():
	modal_keys = {e["key"] for e in inference.entries("modal")}
	hf_keys = {e["key"] for e in inference.entries("hf")}
	# Modal entries keep bare keys; HF entries are qualified; the two are disjoint.
	assert modal_keys == {e["key"] for e in modal_catalogue.entries()}
	assert all(k.startswith("hf:") for k in hf_keys)
	assert modal_keys.isdisjoint(hf_keys)
	# The unqualified call returns every backend's models, each tagged with its backend.
	everything = inference.entries()
	assert {"modal", "hf"} <= {e["backend"] for e in everything}
	assert len(everything) == len(inference.entries("modal")) + len(inference.entries("hf")) + len(
	inference.entries("local")
	)


	def test_entry_by_key_round_trips_both_backends():
	modal_key = modal_catalogue.entries()[0]["key"]
	hf_key = inference.qualify("hf", hf_catalogue.entries()[0]["key"])
	assert inference.entry_by_key(modal_key)["backend"] == "modal"
	assert inference.entry_by_key(hf_key)["backend"] == "hf"
	assert inference.entry_by_key("nope:nothing") is None


	def test_binding_dispatches_to_the_right_backend():
	hf_key = inference.qualify("hf", hf_catalogue.default_key_for_profile("tiny"))
	binding = inference.binding_for(hf_key, env={"HF_TOKEN": "tok"})
	assert binding["base_url"] == hf_catalogue.DEFAULT_BASE_URL
	assert binding["api_key"] == "tok"

	modal_key = modal_catalogue.default_key_for_profile("balanced")
	modal_binding = inference.binding_for(modal_key, env={"MODAL_WORKSPACE": "ws", "MODAL_LLM_KEY": "EMPTY"})
	assert "modal.run" in modal_binding["base_url"]


	def test_default_key_for_profile_is_backend_scoped():
	# HF currently tags only the tiny tier (its single live chat model); Modal tags
	# every tier. The point here is that keys are namespaced per backend.
	hf_default = inference.default_key_for_profile("tiny", "hf")
	assert hf_default is not None and hf_default.startswith("hf:")
	modal_default = inference.default_key_for_profile("strong", "modal")
	assert modal_default is not None and not modal_default.startswith("hf:")


	def test_backend_available_and_configured_backends():
	assert inference.backend_available("modal", env={"MODAL_WORKSPACE": "ws"}) is True
	assert inference.backend_available("hf", env={"HF_TOKEN": "x"}) is True
	assert inference.backend_available("modal", env={}) is False
	assert inference.backend_available("hf", env={}) is False
	assert inference.backend_available("nope", env={"HF_TOKEN": "x"}) is False

	both = inference.configured_backends(env={"MODAL_WORKSPACE": "ws", "HF_TOKEN": "x"})
	assert both == ["modal", "hf"] # display order: Modal first
	assert inference.configured_backends(env={}) == []


	def test_binding_unknown_backend_raises():
	# entry_by_key tolerates unknown keys, but binding_for surfaces a config error.
	with pytest.raises(KeyError):
	inference.binding_for("hf:does/not-exist", env={"HF_TOKEN": "x"})