""" Experiments / model lifecycle view. Surfaces the model lifecycle that sits on top of the registry: - registered models and their versions, - a run history (every scoring run, with version + score statistics), - a run-vs-run comparison (e.g. two versions of the same model) showing how per-sequence scores shifted. """ from __future__ import annotations from typing import TYPE_CHECKING, Dict, List, Optional import panel as pn import param if TYPE_CHECKING: from ui.state import AppState def _fmt(x: Optional[float]) -> str: return "—" if x is None or (isinstance(x, float) and x != x) else f"{x:.3f}" class ExperimentView(param.Parameterized): """Model lifecycle / experiment tracking panel.""" def __init__(self, state: "AppState", **params: object) -> None: super().__init__(**params) self._state = state self._run_a = pn.widgets.Select(name="Run A (baseline)", width=320, margin=(4, 10)) self._run_b = pn.widgets.Select(name="Run B (compare)", width=320, margin=(4, 10)) # ── registered models ───────────────────────────────────────────────────── def _models_table(self) -> pn.pane.HTML: reg = self._state.model_registry models = reg.all_models if reg else [] if not models: return pn.pane.HTML('
No models registered yet.
') rows = "" for m in models: try: ver = m.model.version except Exception: ver = "—" rows += ( f'' f'{m.model.name}' f'{m.model_type}' f'v{ver}' f'{m.source}' f'' ) return pn.pane.HTML( '' '' '' '' f'{rows}
ModelTypeVersionSource
' ) # ── run history ─────────────────────────────────────────────────────────── def _runs_table(self) -> pn.pane.HTML: runs = self._state.run_history.runs if not runs: return pn.pane.HTML( '
No runs yet. Score a worklist ' 'with a model (Worklist → Run) to record an experiment.
' ) rows = "" for r in reversed(runs): # newest first rows += ( f'' f'{r.timestamp}' f'{r.model_name}' f'v{r.model_version}' f'{r.n_sequences}' f'{_fmt(r.score_mean)}' f'' f'{_fmt(r.score_min)}–{_fmt(r.score_max)}' f'{r.worklist_name}' f'' ) return pn.pane.HTML( '' '' '' '' '' '' f'{rows}
TimeModelVersionNMeanRangeWorklist
' ) # ── comparison ──────────────────────────────────────────────────────────── def _run_options(self) -> Dict[str, object]: return {f"{r.run_id} · {r.label}": r.run_id for r in reversed(self._state.run_history.runs)} def _name_lookup(self) -> Dict[str, str]: names: Dict[str, str] = {} for item in self._state.worklist.items: names[item.sequence.id] = item.sequence.name return names def _render_comparison(self, run_a_id: str, run_b_id: str) -> pn.viewable.Viewable: from models.runs import RunHistory runs = {r.run_id: r for r in self._state.run_history.runs} ra, rb = runs.get(run_a_id), runs.get(run_b_id) if not ra or not rb: return pn.pane.HTML('
Pick two runs to compare.
') if ra.run_id == rb.run_id: return pn.pane.HTML('
Pick two different runs.
') cmp = RunHistory.compare(ra, rb) if not cmp.shared_ids: return pn.pane.HTML( '
No shared sequences between these runs.
' ) d = cmp.mean_delta dcolor = "#059669" if d > 0 else "#DC2626" if d < 0 else "#64748B" summary = pn.pane.HTML(f"""
MEAN Δ (B − A)
{d:+.3f}
▲ {cmp.n_improved} improved
▼ {cmp.n_worsened} worsened
= {cmp.n_unchanged} unchanged
{len(cmp.shared_ids)} shared sequences
""") names = self._name_lookup() ordered = sorted(cmp.deltas.items(), key=lambda kv: kv[1]) # worst→best rows = "" for sid, delta in ordered[:50]: c = "#059669" if delta > 0 else "#DC2626" if delta < 0 else "#64748B" nm = names.get(sid, sid[:8]) rows += ( f'' f'{nm}' f'{_fmt(ra.scores.get(sid))}' f'{_fmt(rb.scores.get(sid))}' f'{delta:+.3f}' f'' ) table = pn.pane.HTML( '' '' '' f'' f'' '' f'{rows}
SequenceA (v{ra.model_version})B (v{rb.model_version})Δ
' ) return pn.Column(summary, table, sizing_mode="stretch_width") # ── panel ───────────────────────────────────────────────────────────────── @param.depends("_state.run_history", "_state.model_registry") def panel(self) -> pn.Column: # refresh comparison dropdown options opts = self._run_options() self._run_a.options = opts self._run_b.options = opts run_ids = list(opts.values()) if len(run_ids) >= 2: self._run_a.value = run_ids[1] # older of the two newest self._run_b.value = run_ids[0] # newest elif run_ids: self._run_a.value = self._run_b.value = run_ids[0] comparison = pn.bind(self._render_comparison, self._run_a, self._run_b) def card(title: str, body: pn.viewable.Viewable) -> pn.Column: return pn.Column( pn.pane.HTML(f'
{title}
'), body, styles={"background": "white", "border": "1px solid #CBD5E1", "border-radius": "8px", "padding": "12px 14px"}, margin=(0, 0, 12, 0), sizing_mode="stretch_width", ) return pn.Column( pn.pane.HTML( '
Experiments
' '
' 'Model versions, scoring-run history, and version-to-version comparison.
' ), card("Registered models", self._models_table()), card("Run history", self._runs_table()), card("Compare runs (version A → B)", pn.Column(pn.Row(self._run_a, self._run_b), pn.panel(comparison), sizing_mode="stretch_width")), sizing_mode="stretch_width", styles={"padding": "8px 16px", "max-height": "78vh", "overflow-y": "auto"}, )