multi-agent-lab / tests /test_inference_backends.py
agharsallah
Implement local in-process inference backend for transformers models
c6cdf25
Raw
History Blame Contribute Delete
4.26 kB
"""Tests for the unified inference backend registry (Modal + Hugging Face).
The registry is the façade the router, the config loader, and the Lab UI all read:
backend-qualified keys, a uniform entries/binding view across backends, bare keys
defaulting to Modal (backward compatibility), and per-backend credential gates.
"""
from __future__ import annotations
import pytest
from src.models import hf_catalogue, inference, modal_catalogue
def test_modal_and_hf_are_registered():
keys = {b.key for b in inference.backends()}
assert {"modal", "hf"} <= keys
assert inference.DEFAULT_BACKEND == "modal"
def test_split_key_defaults_bare_to_modal():
assert inference.split_key("gemma-4-12b") == ("modal", "gemma-4-12b")
assert inference.split_key("modal:gemma-4-12b") == ("modal", "gemma-4-12b")
assert inference.split_key("hf:org/model") == ("hf", "org/model")
# An unknown prefix is treated as part of a bare Modal key, not a backend.
assert inference.split_key("weird:thing") == ("modal", "weird:thing")
def test_qualify_keeps_modal_bare_and_prefixes_others():
assert inference.qualify("modal", "gemma-4-12b") == "gemma-4-12b"
assert inference.qualify("hf", "org/model") == "hf:org/model"
def test_entries_are_tagged_and_qualified():
modal_keys = {e["key"] for e in inference.entries("modal")}
hf_keys = {e["key"] for e in inference.entries("hf")}
# Modal entries keep bare keys; HF entries are qualified; the two are disjoint.
assert modal_keys == {e["key"] for e in modal_catalogue.entries()}
assert all(k.startswith("hf:") for k in hf_keys)
assert modal_keys.isdisjoint(hf_keys)
# The unqualified call returns every backend's models, each tagged with its backend.
everything = inference.entries()
assert {"modal", "hf"} <= {e["backend"] for e in everything}
assert len(everything) == len(inference.entries("modal")) + len(inference.entries("hf")) + len(
inference.entries("local")
)
def test_entry_by_key_round_trips_both_backends():
modal_key = modal_catalogue.entries()[0]["key"]
hf_key = inference.qualify("hf", hf_catalogue.entries()[0]["key"])
assert inference.entry_by_key(modal_key)["backend"] == "modal"
assert inference.entry_by_key(hf_key)["backend"] == "hf"
assert inference.entry_by_key("nope:nothing") is None
def test_binding_dispatches_to_the_right_backend():
hf_key = inference.qualify("hf", hf_catalogue.default_key_for_profile("tiny"))
binding = inference.binding_for(hf_key, env={"HF_TOKEN": "tok"})
assert binding["base_url"] == hf_catalogue.DEFAULT_BASE_URL
assert binding["api_key"] == "tok"
modal_key = modal_catalogue.default_key_for_profile("balanced")
modal_binding = inference.binding_for(modal_key, env={"MODAL_WORKSPACE": "ws", "MODAL_LLM_KEY": "EMPTY"})
assert "modal.run" in modal_binding["base_url"]
def test_default_key_for_profile_is_backend_scoped():
# HF currently tags only the tiny tier (its single live chat model); Modal tags
# every tier. The point here is that keys are namespaced per backend.
hf_default = inference.default_key_for_profile("tiny", "hf")
assert hf_default is not None and hf_default.startswith("hf:")
modal_default = inference.default_key_for_profile("strong", "modal")
assert modal_default is not None and not modal_default.startswith("hf:")
def test_backend_available_and_configured_backends():
assert inference.backend_available("modal", env={"MODAL_WORKSPACE": "ws"}) is True
assert inference.backend_available("hf", env={"HF_TOKEN": "x"}) is True
assert inference.backend_available("modal", env={}) is False
assert inference.backend_available("hf", env={}) is False
assert inference.backend_available("nope", env={"HF_TOKEN": "x"}) is False
both = inference.configured_backends(env={"MODAL_WORKSPACE": "ws", "HF_TOKEN": "x"})
assert both == ["modal", "hf"] # display order: Modal first
assert inference.configured_backends(env={}) == []
def test_binding_unknown_backend_raises():
# entry_by_key tolerates unknown keys, but binding_for surfaces a config error.
with pytest.raises(KeyError):
inference.binding_for("hf:does/not-exist", env={"HF_TOKEN": "x"})