Spaces:
Running on Zero
Running on Zero
| """Tests for the unified inference backend registry (Modal + Hugging Face). | |
| The registry is the façade the router, the config loader, and the Lab UI all read: | |
| backend-qualified keys, a uniform entries/binding view across backends, bare keys | |
| defaulting to Modal (backward compatibility), and per-backend credential gates. | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| from src.models import hf_catalogue, inference, modal_catalogue | |
| def test_modal_and_hf_are_registered(): | |
| keys = {b.key for b in inference.backends()} | |
| assert {"modal", "hf"} <= keys | |
| assert inference.DEFAULT_BACKEND == "modal" | |
| def test_split_key_defaults_bare_to_modal(): | |
| assert inference.split_key("gemma-4-12b") == ("modal", "gemma-4-12b") | |
| assert inference.split_key("modal:gemma-4-12b") == ("modal", "gemma-4-12b") | |
| assert inference.split_key("hf:org/model") == ("hf", "org/model") | |
| # An unknown prefix is treated as part of a bare Modal key, not a backend. | |
| assert inference.split_key("weird:thing") == ("modal", "weird:thing") | |
| def test_qualify_keeps_modal_bare_and_prefixes_others(): | |
| assert inference.qualify("modal", "gemma-4-12b") == "gemma-4-12b" | |
| assert inference.qualify("hf", "org/model") == "hf:org/model" | |
| def test_entries_are_tagged_and_qualified(): | |
| modal_keys = {e["key"] for e in inference.entries("modal")} | |
| hf_keys = {e["key"] for e in inference.entries("hf")} | |
| # Modal entries keep bare keys; HF entries are qualified; the two are disjoint. | |
| assert modal_keys == {e["key"] for e in modal_catalogue.entries()} | |
| assert all(k.startswith("hf:") for k in hf_keys) | |
| assert modal_keys.isdisjoint(hf_keys) | |
| # The unqualified call returns every backend's models, each tagged with its backend. | |
| everything = inference.entries() | |
| assert {"modal", "hf"} <= {e["backend"] for e in everything} | |
| assert len(everything) == len(inference.entries("modal")) + len(inference.entries("hf")) + len( | |
| inference.entries("local") | |
| ) | |
| def test_entry_by_key_round_trips_both_backends(): | |
| modal_key = modal_catalogue.entries()[0]["key"] | |
| hf_key = inference.qualify("hf", hf_catalogue.entries()[0]["key"]) | |
| assert inference.entry_by_key(modal_key)["backend"] == "modal" | |
| assert inference.entry_by_key(hf_key)["backend"] == "hf" | |
| assert inference.entry_by_key("nope:nothing") is None | |
| def test_binding_dispatches_to_the_right_backend(): | |
| hf_key = inference.qualify("hf", hf_catalogue.default_key_for_profile("tiny")) | |
| binding = inference.binding_for(hf_key, env={"HF_TOKEN": "tok"}) | |
| assert binding["base_url"] == hf_catalogue.DEFAULT_BASE_URL | |
| assert binding["api_key"] == "tok" | |
| modal_key = modal_catalogue.default_key_for_profile("balanced") | |
| modal_binding = inference.binding_for(modal_key, env={"MODAL_WORKSPACE": "ws", "MODAL_LLM_KEY": "EMPTY"}) | |
| assert "modal.run" in modal_binding["base_url"] | |
| def test_default_key_for_profile_is_backend_scoped(): | |
| # HF currently tags only the tiny tier (its single live chat model); Modal tags | |
| # every tier. The point here is that keys are namespaced per backend. | |
| hf_default = inference.default_key_for_profile("tiny", "hf") | |
| assert hf_default is not None and hf_default.startswith("hf:") | |
| modal_default = inference.default_key_for_profile("strong", "modal") | |
| assert modal_default is not None and not modal_default.startswith("hf:") | |
| def test_backend_available_and_configured_backends(): | |
| assert inference.backend_available("modal", env={"MODAL_WORKSPACE": "ws"}) is True | |
| assert inference.backend_available("hf", env={"HF_TOKEN": "x"}) is True | |
| assert inference.backend_available("modal", env={}) is False | |
| assert inference.backend_available("hf", env={}) is False | |
| assert inference.backend_available("nope", env={"HF_TOKEN": "x"}) is False | |
| both = inference.configured_backends(env={"MODAL_WORKSPACE": "ws", "HF_TOKEN": "x"}) | |
| assert both == ["modal", "hf"] # display order: Modal first | |
| assert inference.configured_backends(env={}) == [] | |
| def test_binding_unknown_backend_raises(): | |
| # entry_by_key tolerates unknown keys, but binding_for surfaces a config error. | |
| with pytest.raises(KeyError): | |
| inference.binding_for("hf:does/not-exist", env={"HF_TOKEN": "x"}) | |