"""
Experiments / model lifecycle view.
Surfaces the model lifecycle that sits on top of the registry:
- registered models and their versions,
- a run history (every scoring run, with version + score statistics),
- a run-vs-run comparison (e.g. two versions of the same model) showing how
per-sequence scores shifted.
"""
from __future__ import annotations
from typing import TYPE_CHECKING, Dict, List, Optional
import panel as pn
import param
if TYPE_CHECKING:
from ui.state import AppState
def _fmt(x: Optional[float]) -> str:
return "—" if x is None or (isinstance(x, float) and x != x) else f"{x:.3f}"
class ExperimentView(param.Parameterized):
"""Model lifecycle / experiment tracking panel."""
def __init__(self, state: "AppState", **params: object) -> None:
super().__init__(**params)
self._state = state
self._run_a = pn.widgets.Select(name="Run A (baseline)", width=320, margin=(4, 10))
self._run_b = pn.widgets.Select(name="Run B (compare)", width=320, margin=(4, 10))
# ── registered models ─────────────────────────────────────────────────────
def _models_table(self) -> pn.pane.HTML:
reg = self._state.model_registry
models = reg.all_models if reg else []
if not models:
return pn.pane.HTML('
No models registered yet.
')
rows = ""
for m in models:
try:
ver = m.model.version
except Exception:
ver = "—"
rows += (
f''
f'| {m.model.name} | '
f'{m.model_type} | '
f'v{ver} | '
f'{m.source} | '
f'
'
)
return pn.pane.HTML(
''
''
'| Model | Type | '
'Version | Source |
'
f'{rows}
'
)
# ── run history ───────────────────────────────────────────────────────────
def _runs_table(self) -> pn.pane.HTML:
runs = self._state.run_history.runs
if not runs:
return pn.pane.HTML(
'No runs yet. Score a worklist '
'with a model (Worklist → Run) to record an experiment.
'
)
rows = ""
for r in reversed(runs): # newest first
rows += (
f''
f'| {r.timestamp} | '
f'{r.model_name} | '
f'v{r.model_version} | '
f'{r.n_sequences} | '
f'{_fmt(r.score_mean)} | '
f''
f'{_fmt(r.score_min)}–{_fmt(r.score_max)} | '
f'{r.worklist_name} | '
f'
'
)
return pn.pane.HTML(
''
''
'| Time | Model | '
'Version | N | '
'Mean | Range | '
'Worklist |
'
f'{rows}
'
)
# ── comparison ────────────────────────────────────────────────────────────
def _run_options(self) -> Dict[str, object]:
return {f"{r.run_id} · {r.label}": r.run_id for r in reversed(self._state.run_history.runs)}
def _name_lookup(self) -> Dict[str, str]:
names: Dict[str, str] = {}
for item in self._state.worklist.items:
names[item.sequence.id] = item.sequence.name
return names
def _render_comparison(self, run_a_id: str, run_b_id: str) -> pn.viewable.Viewable:
from models.runs import RunHistory
runs = {r.run_id: r for r in self._state.run_history.runs}
ra, rb = runs.get(run_a_id), runs.get(run_b_id)
if not ra or not rb:
return pn.pane.HTML('Pick two runs to compare.
')
if ra.run_id == rb.run_id:
return pn.pane.HTML('Pick two different runs.
')
cmp = RunHistory.compare(ra, rb)
if not cmp.shared_ids:
return pn.pane.HTML(
'No shared sequences between these runs.
'
)
d = cmp.mean_delta
dcolor = "#059669" if d > 0 else "#DC2626" if d < 0 else "#64748B"
summary = pn.pane.HTML(f"""
▲ {cmp.n_improved} improved
▼ {cmp.n_worsened} worsened
= {cmp.n_unchanged} unchanged
{len(cmp.shared_ids)} shared sequences
""")
names = self._name_lookup()
ordered = sorted(cmp.deltas.items(), key=lambda kv: kv[1]) # worst→best
rows = ""
for sid, delta in ordered[:50]:
c = "#059669" if delta > 0 else "#DC2626" if delta < 0 else "#64748B"
nm = names.get(sid, sid[:8])
rows += (
f''
f'| {nm} | '
f'{_fmt(ra.scores.get(sid))} | '
f'{_fmt(rb.scores.get(sid))} | '
f'{delta:+.3f} | '
f'
'
)
table = pn.pane.HTML(
''
''
'| Sequence | '
f'A (v{ra.model_version}) | '
f'B (v{rb.model_version}) | '
'Δ |
'
f'{rows}
'
)
return pn.Column(summary, table, sizing_mode="stretch_width")
# ── panel ─────────────────────────────────────────────────────────────────
@param.depends("_state.run_history", "_state.model_registry")
def panel(self) -> pn.Column:
# refresh comparison dropdown options
opts = self._run_options()
self._run_a.options = opts
self._run_b.options = opts
run_ids = list(opts.values())
if len(run_ids) >= 2:
self._run_a.value = run_ids[1] # older of the two newest
self._run_b.value = run_ids[0] # newest
elif run_ids:
self._run_a.value = self._run_b.value = run_ids[0]
comparison = pn.bind(self._render_comparison, self._run_a, self._run_b)
def card(title: str, body: pn.viewable.Viewable) -> pn.Column:
return pn.Column(
pn.pane.HTML(f'{title}
'),
body,
styles={"background": "white", "border": "1px solid #CBD5E1",
"border-radius": "8px", "padding": "12px 14px"},
margin=(0, 0, 12, 0), sizing_mode="stretch_width",
)
return pn.Column(
pn.pane.HTML(
'Experiments
'
''
'Model versions, scoring-run history, and version-to-version comparison.
'
),
card("Registered models", self._models_table()),
card("Run history", self._runs_table()),
card("Compare runs (version A → B)",
pn.Column(pn.Row(self._run_a, self._run_b), pn.panel(comparison),
sizing_mode="stretch_width")),
sizing_mode="stretch_width",
styles={"padding": "8px 16px", "max-height": "78vh", "overflow-y": "auto"},
)