Spaces:

build-small-hackathon
/

multi-agent-lab

Running on Zero

App Files Files Community

agharsallah commited on 20 days ago

Commit

83e16dc

1 Parent(s): 52b556c

feat(models): Allow reliable models and update model selection logic in UI

Browse files

Files changed (3) hide show

config/models.yaml +4 -4
src/ui/fishbowl/lab.py +18 -2
tests/test_fishbowl_lab.py +6 -2

config/models.yaml CHANGED Viewed

@@ -48,7 +48,7 @@ profiles:
     endpoint: gemma-4-12b            # Google Gemma 4 12B (reasoning)
     temperature: 0.8
     max_tokens: 768
-  strong:
-    endpoint: gemma-4-26b            # Google Gemma 4 26B-A4B-it (MoE, ~4B active; reasoning)
-    temperature: 0.6
-    max_tokens: 1024

     endpoint: gemma-4-12b            # Google Gemma 4 12B (reasoning)
     temperature: 0.8
     max_tokens: 768
+  # strong:
+  #   endpoint: gemma-4-26b            # Google Gemma 4 26B-A4B-it (MoE, ~4B active; reasoning)
+  #   temperature: 0.6
+  #   max_tokens: 1024

src/ui/fishbowl/lab.py CHANGED Viewed

@@ -66,6 +66,16 @@ _TOOL_LABELS: dict[str, str] = {tool_id: label for label, tool_id in TOOL_CHOICE
 # The scenario we lead with — the hackathon's north-star world.
 _PREFERRED_SCENARIO = "thousand-token-wood"
 # ── data sourcing (read-only over the registry) ─────────────────────────────────
@@ -144,6 +154,10 @@ def model_choices(backend: str = inference.DEFAULT_BACKEND) -> list[tuple[str, s
     backend-qualified key (``hf:<repo>`` for HF; a bare slug for Modal)."""
     choices: list[tuple[str, str]] = []
     for entry in inference.entries(backend):
         served = entry["served_model_id"].split("/")[-1]
         params = f"{entry['params_b']:g}B" if entry.get("params_b") else "?"
         tier = entry["profile"] or "specialist"
@@ -160,10 +174,12 @@ def _default_model_key(manifest: AgentManifest, backend: str = inference.DEFAULT
     default model for the manifest's tier, else the first model in that backend's
     catalogue (or None when it is empty)."""
     if backend == inference.DEFAULT_BACKEND and manifest.model_endpoint:
-        return manifest.model_endpoint
     tiered = inference.default_key_for_profile(manifest.model_profile, backend)
     if tiered:
-        return tiered
     entries = inference.entries(backend)
     return entries[0]["key"] if entries else None

 # The scenario we lead with — the hackathon's north-star world.
 _PREFERRED_SCENARIO = "thousand-token-wood"
+# Models retired from the Lab picker.  ``gemma-4-26b`` stays in the catalogue (and the
+# engine can still resolve it if a config names it), but it is not offered or selectable
+# in the UI — any default that would land on it is substituted with ``_DISABLED_MODELS``'
+# replacement (``gemma-4-12b``).  Keyed/valued by the catalogue casting slug.
+_DISABLED_MODELS: dict[str, str] = {
+    "gemma-4-26b": "gemma-4-12b",  # use the 12B in the UI instead of the 26B
+    "nemotron-cascade-14b": "nemotron-3-nano-4b",  # use the 4B nano until cascade is resolved
+    "minicpm-o-4-5": "minicpm-4-1-8b",  # use the 8B text model instead of the multimodal -o
+}
 # ── data sourcing (read-only over the registry) ─────────────────────────────────
     backend-qualified key (``hf:<repo>`` for HF; a bare slug for Modal)."""
     choices: list[tuple[str, str]] = []
     for entry in inference.entries(backend):
+        # gemma-4-26b is retired from the UI picker (see _DISABLED_MODELS): skip it so it
+        # is never offered or selectable; the cast uses gemma-4-12b instead.
+        if entry["key"] in _DISABLED_MODELS:
+            continue
         served = entry["served_model_id"].split("/")[-1]
         params = f"{entry['params_b']:g}B" if entry.get("params_b") else "?"
         tier = entry["profile"] or "specialist"
     default model for the manifest's tier, else the first model in that backend's
     catalogue (or None when it is empty)."""
     if backend == inference.DEFAULT_BACKEND and manifest.model_endpoint:
+        return _DISABLED_MODELS.get(manifest.model_endpoint, manifest.model_endpoint)
     tiered = inference.default_key_for_profile(manifest.model_profile, backend)
     if tiered:
+        # A UI-disabled default (e.g. the strong tier's gemma-4-26b) is swapped for its
+        # replacement so the picker never seeds a model it won't display.
+        return _DISABLED_MODELS.get(tiered, tiered)
     entries = inference.entries(backend)
     return entries[0]["key"] if entries else None

tests/test_fishbowl_lab.py CHANGED Viewed

@@ -83,10 +83,14 @@ def test_judge_model_dropdown_offers_only_catalogue_models():
     assert values, "judge model dropdown should list the catalogue"
-def test_model_choices_are_all_catalogue_keys():
     choices = lab.model_choices()  # defaults to the Modal backend (bare keys)
     # Every selectable value is a real catalogue endpoint key — nothing else is offerable.
-    assert {key for _label, key in choices} == set(_CATALOGUE_KEYS)
     # Labels are human-readable and name the served model.
     assert all(" · " in label for label, _ in choices)

     assert values, "judge model dropdown should list the catalogue"
+def test_model_choices_are_catalogue_keys_minus_ui_disabled():
     choices = lab.model_choices()  # defaults to the Modal backend (bare keys)
+    keys = {key for _label, key in choices}
     # Every selectable value is a real catalogue endpoint key — nothing else is offerable.
+    assert keys == set(_CATALOGUE_KEYS) - set(lab._DISABLED_MODELS)
+    # UI-disabled models (e.g. gemma-4-26b) are never offered, even though they remain
+    # in the catalogue for the engine.
+    assert keys.isdisjoint(lab._DISABLED_MODELS)
     # Labels are human-readable and name the served model.
     assert all(" · " in label for label, _ in choices)