Spaces:
Running on Zero
Running on Zero
agharsallah commited on
Commit Β·
83e16dc
1
Parent(s): 52b556c
feat(models): Allow reliable models and update model selection logic in UI
Browse files- config/models.yaml +4 -4
- src/ui/fishbowl/lab.py +18 -2
- tests/test_fishbowl_lab.py +6 -2
config/models.yaml
CHANGED
|
@@ -48,7 +48,7 @@ profiles:
|
|
| 48 |
endpoint: gemma-4-12b # Google Gemma 4 12B (reasoning)
|
| 49 |
temperature: 0.8
|
| 50 |
max_tokens: 768
|
| 51 |
-
strong:
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
| 48 |
endpoint: gemma-4-12b # Google Gemma 4 12B (reasoning)
|
| 49 |
temperature: 0.8
|
| 50 |
max_tokens: 768
|
| 51 |
+
# strong:
|
| 52 |
+
# endpoint: gemma-4-26b # Google Gemma 4 26B-A4B-it (MoE, ~4B active; reasoning)
|
| 53 |
+
# temperature: 0.6
|
| 54 |
+
# max_tokens: 1024
|
src/ui/fishbowl/lab.py
CHANGED
|
@@ -66,6 +66,16 @@ _TOOL_LABELS: dict[str, str] = {tool_id: label for label, tool_id in TOOL_CHOICE
|
|
| 66 |
# The scenario we lead with β the hackathon's north-star world.
|
| 67 |
_PREFERRED_SCENARIO = "thousand-token-wood"
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
# ββ data sourcing (read-only over the registry) βββββββββββββββββββββββββββββββββ
|
| 71 |
|
|
@@ -144,6 +154,10 @@ def model_choices(backend: str = inference.DEFAULT_BACKEND) -> list[tuple[str, s
|
|
| 144 |
backend-qualified key (``hf:<repo>`` for HF; a bare slug for Modal)."""
|
| 145 |
choices: list[tuple[str, str]] = []
|
| 146 |
for entry in inference.entries(backend):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
served = entry["served_model_id"].split("/")[-1]
|
| 148 |
params = f"{entry['params_b']:g}B" if entry.get("params_b") else "?"
|
| 149 |
tier = entry["profile"] or "specialist"
|
|
@@ -160,10 +174,12 @@ def _default_model_key(manifest: AgentManifest, backend: str = inference.DEFAULT
|
|
| 160 |
default model for the manifest's tier, else the first model in that backend's
|
| 161 |
catalogue (or None when it is empty)."""
|
| 162 |
if backend == inference.DEFAULT_BACKEND and manifest.model_endpoint:
|
| 163 |
-
return manifest.model_endpoint
|
| 164 |
tiered = inference.default_key_for_profile(manifest.model_profile, backend)
|
| 165 |
if tiered:
|
| 166 |
-
|
|
|
|
|
|
|
| 167 |
entries = inference.entries(backend)
|
| 168 |
return entries[0]["key"] if entries else None
|
| 169 |
|
|
|
|
| 66 |
# The scenario we lead with β the hackathon's north-star world.
|
| 67 |
_PREFERRED_SCENARIO = "thousand-token-wood"
|
| 68 |
|
| 69 |
+
# Models retired from the Lab picker. ``gemma-4-26b`` stays in the catalogue (and the
|
| 70 |
+
# engine can still resolve it if a config names it), but it is not offered or selectable
|
| 71 |
+
# in the UI β any default that would land on it is substituted with ``_DISABLED_MODELS``'
|
| 72 |
+
# replacement (``gemma-4-12b``). Keyed/valued by the catalogue casting slug.
|
| 73 |
+
_DISABLED_MODELS: dict[str, str] = {
|
| 74 |
+
"gemma-4-26b": "gemma-4-12b", # use the 12B in the UI instead of the 26B
|
| 75 |
+
"nemotron-cascade-14b": "nemotron-3-nano-4b", # use the 4B nano until cascade is resolved
|
| 76 |
+
"minicpm-o-4-5": "minicpm-4-1-8b", # use the 8B text model instead of the multimodal -o
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
|
| 80 |
# ββ data sourcing (read-only over the registry) βββββββββββββββββββββββββββββββββ
|
| 81 |
|
|
|
|
| 154 |
backend-qualified key (``hf:<repo>`` for HF; a bare slug for Modal)."""
|
| 155 |
choices: list[tuple[str, str]] = []
|
| 156 |
for entry in inference.entries(backend):
|
| 157 |
+
# gemma-4-26b is retired from the UI picker (see _DISABLED_MODELS): skip it so it
|
| 158 |
+
# is never offered or selectable; the cast uses gemma-4-12b instead.
|
| 159 |
+
if entry["key"] in _DISABLED_MODELS:
|
| 160 |
+
continue
|
| 161 |
served = entry["served_model_id"].split("/")[-1]
|
| 162 |
params = f"{entry['params_b']:g}B" if entry.get("params_b") else "?"
|
| 163 |
tier = entry["profile"] or "specialist"
|
|
|
|
| 174 |
default model for the manifest's tier, else the first model in that backend's
|
| 175 |
catalogue (or None when it is empty)."""
|
| 176 |
if backend == inference.DEFAULT_BACKEND and manifest.model_endpoint:
|
| 177 |
+
return _DISABLED_MODELS.get(manifest.model_endpoint, manifest.model_endpoint)
|
| 178 |
tiered = inference.default_key_for_profile(manifest.model_profile, backend)
|
| 179 |
if tiered:
|
| 180 |
+
# A UI-disabled default (e.g. the strong tier's gemma-4-26b) is swapped for its
|
| 181 |
+
# replacement so the picker never seeds a model it won't display.
|
| 182 |
+
return _DISABLED_MODELS.get(tiered, tiered)
|
| 183 |
entries = inference.entries(backend)
|
| 184 |
return entries[0]["key"] if entries else None
|
| 185 |
|
tests/test_fishbowl_lab.py
CHANGED
|
@@ -83,10 +83,14 @@ def test_judge_model_dropdown_offers_only_catalogue_models():
|
|
| 83 |
assert values, "judge model dropdown should list the catalogue"
|
| 84 |
|
| 85 |
|
| 86 |
-
def
|
| 87 |
choices = lab.model_choices() # defaults to the Modal backend (bare keys)
|
|
|
|
| 88 |
# Every selectable value is a real catalogue endpoint key β nothing else is offerable.
|
| 89 |
-
assert
|
|
|
|
|
|
|
|
|
|
| 90 |
# Labels are human-readable and name the served model.
|
| 91 |
assert all(" Β· " in label for label, _ in choices)
|
| 92 |
|
|
|
|
| 83 |
assert values, "judge model dropdown should list the catalogue"
|
| 84 |
|
| 85 |
|
| 86 |
+
def test_model_choices_are_catalogue_keys_minus_ui_disabled():
|
| 87 |
choices = lab.model_choices() # defaults to the Modal backend (bare keys)
|
| 88 |
+
keys = {key for _label, key in choices}
|
| 89 |
# Every selectable value is a real catalogue endpoint key β nothing else is offerable.
|
| 90 |
+
assert keys == set(_CATALOGUE_KEYS) - set(lab._DISABLED_MODELS)
|
| 91 |
+
# UI-disabled models (e.g. gemma-4-26b) are never offered, even though they remain
|
| 92 |
+
# in the catalogue for the engine.
|
| 93 |
+
assert keys.isdisjoint(lab._DISABLED_MODELS)
|
| 94 |
# Labels are human-readable and name the served model.
|
| 95 |
assert all(" Β· " in label for label, _ in choices)
|
| 96 |
|