agharsallah commited on
Commit
83e16dc
Β·
1 Parent(s): 52b556c

feat(models): Allow reliable models and update model selection logic in UI

Browse files
config/models.yaml CHANGED
@@ -48,7 +48,7 @@ profiles:
48
  endpoint: gemma-4-12b # Google Gemma 4 12B (reasoning)
49
  temperature: 0.8
50
  max_tokens: 768
51
- strong:
52
- endpoint: gemma-4-26b # Google Gemma 4 26B-A4B-it (MoE, ~4B active; reasoning)
53
- temperature: 0.6
54
- max_tokens: 1024
 
48
  endpoint: gemma-4-12b # Google Gemma 4 12B (reasoning)
49
  temperature: 0.8
50
  max_tokens: 768
51
+ # strong:
52
+ # endpoint: gemma-4-26b # Google Gemma 4 26B-A4B-it (MoE, ~4B active; reasoning)
53
+ # temperature: 0.6
54
+ # max_tokens: 1024
src/ui/fishbowl/lab.py CHANGED
@@ -66,6 +66,16 @@ _TOOL_LABELS: dict[str, str] = {tool_id: label for label, tool_id in TOOL_CHOICE
66
  # The scenario we lead with β€” the hackathon's north-star world.
67
  _PREFERRED_SCENARIO = "thousand-token-wood"
68
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  # ── data sourcing (read-only over the registry) ─────────────────────────────────
71
 
@@ -144,6 +154,10 @@ def model_choices(backend: str = inference.DEFAULT_BACKEND) -> list[tuple[str, s
144
  backend-qualified key (``hf:<repo>`` for HF; a bare slug for Modal)."""
145
  choices: list[tuple[str, str]] = []
146
  for entry in inference.entries(backend):
 
 
 
 
147
  served = entry["served_model_id"].split("/")[-1]
148
  params = f"{entry['params_b']:g}B" if entry.get("params_b") else "?"
149
  tier = entry["profile"] or "specialist"
@@ -160,10 +174,12 @@ def _default_model_key(manifest: AgentManifest, backend: str = inference.DEFAULT
160
  default model for the manifest's tier, else the first model in that backend's
161
  catalogue (or None when it is empty)."""
162
  if backend == inference.DEFAULT_BACKEND and manifest.model_endpoint:
163
- return manifest.model_endpoint
164
  tiered = inference.default_key_for_profile(manifest.model_profile, backend)
165
  if tiered:
166
- return tiered
 
 
167
  entries = inference.entries(backend)
168
  return entries[0]["key"] if entries else None
169
 
 
66
  # The scenario we lead with β€” the hackathon's north-star world.
67
  _PREFERRED_SCENARIO = "thousand-token-wood"
68
 
69
+ # Models retired from the Lab picker. ``gemma-4-26b`` stays in the catalogue (and the
70
+ # engine can still resolve it if a config names it), but it is not offered or selectable
71
+ # in the UI β€” any default that would land on it is substituted with ``_DISABLED_MODELS``'
72
+ # replacement (``gemma-4-12b``). Keyed/valued by the catalogue casting slug.
73
+ _DISABLED_MODELS: dict[str, str] = {
74
+ "gemma-4-26b": "gemma-4-12b", # use the 12B in the UI instead of the 26B
75
+ "nemotron-cascade-14b": "nemotron-3-nano-4b", # use the 4B nano until cascade is resolved
76
+ "minicpm-o-4-5": "minicpm-4-1-8b", # use the 8B text model instead of the multimodal -o
77
+ }
78
+
79
 
80
  # ── data sourcing (read-only over the registry) ─────────────────────────────────
81
 
 
154
  backend-qualified key (``hf:<repo>`` for HF; a bare slug for Modal)."""
155
  choices: list[tuple[str, str]] = []
156
  for entry in inference.entries(backend):
157
+ # gemma-4-26b is retired from the UI picker (see _DISABLED_MODELS): skip it so it
158
+ # is never offered or selectable; the cast uses gemma-4-12b instead.
159
+ if entry["key"] in _DISABLED_MODELS:
160
+ continue
161
  served = entry["served_model_id"].split("/")[-1]
162
  params = f"{entry['params_b']:g}B" if entry.get("params_b") else "?"
163
  tier = entry["profile"] or "specialist"
 
174
  default model for the manifest's tier, else the first model in that backend's
175
  catalogue (or None when it is empty)."""
176
  if backend == inference.DEFAULT_BACKEND and manifest.model_endpoint:
177
+ return _DISABLED_MODELS.get(manifest.model_endpoint, manifest.model_endpoint)
178
  tiered = inference.default_key_for_profile(manifest.model_profile, backend)
179
  if tiered:
180
+ # A UI-disabled default (e.g. the strong tier's gemma-4-26b) is swapped for its
181
+ # replacement so the picker never seeds a model it won't display.
182
+ return _DISABLED_MODELS.get(tiered, tiered)
183
  entries = inference.entries(backend)
184
  return entries[0]["key"] if entries else None
185
 
tests/test_fishbowl_lab.py CHANGED
@@ -83,10 +83,14 @@ def test_judge_model_dropdown_offers_only_catalogue_models():
83
  assert values, "judge model dropdown should list the catalogue"
84
 
85
 
86
- def test_model_choices_are_all_catalogue_keys():
87
  choices = lab.model_choices() # defaults to the Modal backend (bare keys)
 
88
  # Every selectable value is a real catalogue endpoint key β€” nothing else is offerable.
89
- assert {key for _label, key in choices} == set(_CATALOGUE_KEYS)
 
 
 
90
  # Labels are human-readable and name the served model.
91
  assert all(" Β· " in label for label, _ in choices)
92
 
 
83
  assert values, "judge model dropdown should list the catalogue"
84
 
85
 
86
+ def test_model_choices_are_catalogue_keys_minus_ui_disabled():
87
  choices = lab.model_choices() # defaults to the Modal backend (bare keys)
88
+ keys = {key for _label, key in choices}
89
  # Every selectable value is a real catalogue endpoint key β€” nothing else is offerable.
90
+ assert keys == set(_CATALOGUE_KEYS) - set(lab._DISABLED_MODELS)
91
+ # UI-disabled models (e.g. gemma-4-26b) are never offered, even though they remain
92
+ # in the catalogue for the engine.
93
+ assert keys.isdisjoint(lab._DISABLED_MODELS)
94
  # Labels are human-readable and name the served model.
95
  assert all(" Β· " in label for label, _ in choices)
96