Spaces:

garvitsachdeva
/

SpindleFlow-RL

Runtime error

App Files Files Community

garvitsachdeva commited on 26 days ago

Commit

61e8a52

1 Parent(s): 51db558

fix: importlib wrapper with correct file; remove garbage files

Browse files

Files changed (3) hide show

=4.40.0 +0 -0
=5.22.0 +0 -0
streamlit_app.py +18 -2128

=4.40.0 DELETED Viewed

File without changes

=5.22.0 DELETED Viewed

File without changes

streamlit_app.py CHANGED Viewed

@@ -1,2132 +1,22 @@
-"""
-SpindleFlow RL — Streamlit Dashboard
-=====================================
-Run:  cd spindleflow-rl && streamlit run demo/streamlit_app.py
-URL:  http://localhost:8501
-"""
-from __future__ import annotations
-import os, sys, json, html as _html
 from pathlib import Path
-import numpy as np
-from dotenv import load_dotenv
-load_dotenv()  # load OPENAI_API_KEY (and any other vars) from .env
-# HF_HUB_OFFLINE intentionally NOT set — manual HF Hub downloads must work
-sys.path.insert(0, str(Path(__file__).resolve().parent))
-sys.path.insert(0, str(Path(__file__).resolve().parent / "demo"))
-import streamlit as st
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
 try:
-    from env.spindleflow_env import SpindleFlowEnv
-    from env.state import EpisodeState
-    from env.specialist_registry import SpecialistRegistry
-    from orchestrator_widget import render_orchestrator
-except Exception as _import_err:
-    import traceback as _tb
-    st.error(f"Import failed: {_import_err}")
-    st.code(_tb.format_exc())
-    st.stop()
-# ─────────────────────────────────────────────────────────
-# Page config  (must be first Streamlit call)
-# ─────────────────────────────────────────────────────────
-st.set_page_config(
-    page_title="SpindleFlow RL",
-    page_icon="⚡",
-    layout="wide",
-    initial_sidebar_state="collapsed",
-)
-# ─────────────────────────────────────────────────────────
-# Constants
-# ─────────────────────────────────────────────────────────
-CONFIG  = "configs/training_config.yaml"
-CATALOG = "configs/specialist_catalog.yaml"
-ASSETS  = Path("demo/assets")
-SPEC_COLORS = {
-    "frontend_react":     "#00d4ff",
-    "backend_api":        "#7c3aed",
-    "database_architect": "#f59e0b",
-    "devops_engineer":    "#10b981",
-    "security_analyst":   "#ef4444",
-    "product_strategist": "#8b5cf6",
-    "ux_designer":        "#ec4899",
-    "tech_writer":        "#94a3b8",
-}
-@st.cache_resource
-def _get_preset_tasks(n: int = 8) -> list[str]:
-    """Sample n live tasks from TaskBank at page load — no hardcoded strings."""
-    try:
-        from training.task_bank import TaskBank
-        bank = TaskBank(phase=1)
-        return [bank.sample() for _ in range(n)]
-    except Exception:
-        # Fallback only if TaskBank is unavailable (e.g. missing config)
-        return ["Describe a software engineering task requiring specialist collaboration"]
-PRESET_TASKS = _get_preset_tasks()
-HF_MODEL_REPO = "garvitsachdeva/spindleflow-rl"
-@st.cache_resource
-def _load_trained_model(hf_repo: str):
-    """Download RecurrentPPO + VecNormalize stats from HF Hub.
-    Returns (model, obs_mean, obs_var, clip_obs, error_str).
-    Temporarily lifts the HF_HUB_OFFLINE flag set at module level.
-    """
-    import pickle
-    try:
-        from huggingface_hub import hf_hub_download
-        from sb3_contrib import RecurrentPPO
-        _tok = os.getenv("HF_TOKEN") or None
-        # Try final model first, fall back to latest periodic checkpoint
-        try:
-            _model_path = hf_hub_download(hf_repo, "spindleflow_model.zip", token=_tok)
-        except Exception:
-            _model_path = hf_hub_download(hf_repo, "spindleflow_model_latest.zip", token=_tok)
-        model = RecurrentPPO.load(_model_path, device="cpu")
-        obs_mean = obs_var = None
-        clip_obs = 10.0
-        try:
-            try:
-                stats_path = hf_hub_download(hf_repo, "vec_normalize.pkl", token=_tok)
-            except Exception:
-                stats_path = hf_hub_download(hf_repo, "vec_normalize_latest.pkl", token=_tok)
-            with open(stats_path, "rb") as f:
-                vn = pickle.load(f)
-            obs_mean = vn.obs_rms.mean.copy()
-            obs_var  = vn.obs_rms.var.copy()
-            clip_obs = float(vn.clip_obs)
-        except Exception:
-            pass
-        return model, obs_mean, obs_var, clip_obs, None
-    except Exception as exc:
-        return None, None, None, 10.0, str(exc)
-    finally:
-        pass
-def _predict(model, obs: np.ndarray, lstm_states, episode_starts,
-             obs_mean, obs_var, clip_obs: float):
-    """Normalize obs and call model.predict(); return (action, new_lstm_states)."""
-    obs_arr = obs[np.newaxis, :].copy().astype(np.float32)
-    if obs_mean is not None and obs_var is not None:
-        obs_arr = np.clip(
-            (obs_arr - obs_mean) / np.sqrt(obs_var + 1e-8),
-            -clip_obs, clip_obs,
-        )
-    action_batch, new_states = model.predict(
-        obs_arr,
-        state=lstm_states,
-        episode_start=episode_starts,
-        deterministic=True,
-    )
-    return action_batch[0], new_states
-DARK = dict(
-    paper_bgcolor="rgba(0,0,0,0)",
-    plot_bgcolor="rgba(0,0,0,0)",
-    font=dict(color="#e2e8f0", family="Inter, system-ui, sans-serif"),
-    margin=dict(l=44, r=20, t=44, b=40),
-)
-DARK_AXES = dict(
-    xaxis=dict(gridcolor="rgba(255,255,255,0.05)", zerolinecolor="rgba(255,255,255,0.08)"),
-    yaxis=dict(gridcolor="rgba(255,255,255,0.05)", zerolinecolor="rgba(255,255,255,0.08)"),
-)
-# ─────────────────────────────────────────────────────────
-# Session state
-# ─────────────────────────────────────────────────────────
-class Session:
-    def __init__(self):
-        self.env: SpindleFlowEnv | None = None
-        self.registry: SpecialistRegistry | None = None
-        self.rewards: list[float] = []
-        self.actions: list[dict] = []
-        self.step_n  = 0
-        self.done    = False
-        self.task    = ""
-        # Full episode history for replay
-        self.episode_history: list[dict] = []
-        # Action entropy per step (policy confidence)
-        self.step_entropies: list[float] = []
-        # Observation vector stats per step
-        self.obs_history: list[dict] = []
-        # Specialists auto-spawned for this episode
-        self.spawned_specialists: list[str] = []
-        # Trained policy inference state
-        self.obs_current: np.ndarray | None = None
-        self.lstm_states = None
-        self.episode_starts = np.array([True])
-    def boot(self):
-        if self.env is None:
-            self.env = SpindleFlowEnv(
-                config_path=CONFIG, catalog_path=CATALOG,
-                use_real_spindleflow=False, phase=1,
-            )
-            self.registry = self.env.registry
-    def reset(self, phase: int = 1):
-        self.boot()
-        self.env.phase = int(phase)
-        obs, info = self.env.reset()
-        self.rewards = []
-        self.actions = []
-        self.step_n  = 0
-        self.done    = False
-        self.task    = info.get("task", "")
-        self.episode_history = []
-        self.step_entropies  = []
-        self.obs_history     = []
-        self.spawned_specialists: list[str] = list(info.get("spawned_specialists", []))
-        self.obs_current    = obs
-        self.lstm_states    = None
-        self.episode_starts = np.array([True])
-        return obs, info
-    def step(self, action):
-        if self.env is None or self.done:
-            return None, 0.0, True, False, {}
-        obs, r, term, trunc, info = self.env.step(action)
-        self.rewards.append(r)
-        self.actions.append(info)
-        self.step_n += 1
-        self.done = term or trunc
-        self.obs_current    = obs
-        self.episode_starts = np.array([self.done])
-        # Capture step snapshot for replay
-        called = info.get("called_specialists", [])
-        edges  = [(e.caller_id, e.callee_id)
-                  for e in self.env.delegation_graph.get_delegation_path()]
-        self.episode_history.append({
-            "step":        self.step_n,
-            "reward":      r,
-            "action_name": info.get("action_name", "UNKNOWN"),
-            "called":      list(called),
-            "edges":       list(edges),
-            "components":  dict(info.get("reward_components", {})),
-            "mode":        info.get("delegation_mode", ""),
-            "cumulative":  float(sum(self.rewards)),
-            "latencies":   dict(info.get("specialist_latencies", {})),
-        })
-        # Compute real action entropy (specialist-selection logits)
-        if self.env is not None:
-            n = self.env.max_specialists
-            spec_logits = action[1: 1 + n].copy()
-            spec_logits = spec_logits - spec_logits.max()
-            exp_l  = np.exp(spec_logits)
-            probs  = exp_l / (exp_l.sum() + 1e-8)
-            entropy = float(-np.sum(probs * np.log(probs + 1e-8)))
-            self.step_entropies.append(entropy)
-        # Capture observation norm for state trace
-        if obs is not None:
-            self.obs_history.append({
-                "step":     self.step_n,
-                "obs_norm": float(np.linalg.norm(obs)),
-                "obs_mean": float(obs.mean()),
-                "obs_max":  float(obs.max()),
-            })
-        return obs, r, term, trunc, info
-def _S() -> Session:
-    if "session" not in st.session_state:
-        st.session_state.session = Session()
-    return st.session_state.session
-def _load_catalog() -> list[dict]:
-    import yaml
-    with open(CATALOG) as f:
-        return yaml.safe_load(f)["specialists"]
-def _exec_mode_badges(S: "Session") -> str:
-    """Return inline HTML badge strip showing execution and task-generation modes."""
-    import os
-    has_key   = bool(os.getenv("OPENAI_API_KEY"))
-    llm_tasks = S.env is not None and S.env.task_bank._client is not None
-    exec_b = (
-        '<span style="padding:3px 10px;border-radius:999px;font-size:10px;font-weight:700;'
-        'background:rgba(16,185,129,0.1);color:#34d399;'
-        'border:1px solid rgba(16,185,129,0.22);">● LLM BASELINE</span>'
-        if has_key else
-        '<span style="padding:3px 10px;border-radius:999px;font-size:10px;font-weight:700;'
-        'background:rgba(245,158,11,0.1);color:#fbbf24;'
-        'border:1px solid rgba(245,158,11,0.22);">'
-        '⚡ SIMULATION MODE — specialist outputs templated · set OPENAI_API_KEY for real LLM</span>'
-    )
-    task_b = (
-        '<span style="padding:3px 10px;border-radius:999px;font-size:10px;font-weight:700;'
-        'background:rgba(16,185,129,0.1);color:#34d399;'
-        'border:1px solid rgba(16,185,129,0.22);">● LLM TASKS</span>'
-        if llm_tasks else
-        '<span style="padding:3px 10px;border-radius:999px;font-size:10px;font-weight:700;'
-        'background:rgba(148,163,184,0.08);color:#64748b;'
-        'border:1px solid rgba(148,163,184,0.18);">⚡ CATALOG TASKS</span>'
-    ) if S.env is not None else ""
-    return (
-        f'<div style="display:flex;gap:8px;flex-wrap:wrap;margin:4px 0 12px;">'
-        f'{exec_b}{task_b}</div>'
-    )
-# ─────────────────────────────────────────────────────────
-# Chart builders
-# ─────────────────────────────────────────────────────────
-def fig_reward_curve(rewards: list[float]) -> go.Figure:
-    if not rewards:
-        fig = go.Figure()
-        fig.update_layout(
-            **DARK, **DARK_AXES,
-            title=dict(text="Episode Reward", font=dict(size=13, color="#64748b")),
-            annotations=[dict(text="Reset the environment to begin",
-                              x=0.5, y=0.5, showarrow=False,
-                              font=dict(color="#334155", size=13))],
-        )
-        return fig
-    steps = list(range(len(rewards)))
-    cumul = np.cumsum(rewards).tolist()
-    fig   = make_subplots(rows=2, cols=1, shared_xaxes=True,
-                          row_heights=[0.62, 0.38], vertical_spacing=0.04)
-    fig.add_trace(go.Scatter(
-        x=steps, y=cumul, mode="lines",
-        line=dict(color="#00d4ff", width=2.5),
-        fill="tozeroy", fillcolor="rgba(0,212,255,0.07)",
-        name="Cumulative",
-    ), row=1, col=1)
-    fig.add_trace(go.Bar(
-        x=steps, y=rewards,
-        marker_color=["#10b981" if r >= 0 else "#ef4444" for r in rewards],
-        marker_line_width=0, name="Per-step",
-    ), row=2, col=1)
-    fig.update_layout(**DARK, height=300, showlegend=False,
-                      title=dict(text="Episode Reward", font=dict(size=13, color="#94a3b8")))
-    fig.update_xaxes(gridcolor="rgba(255,255,255,0.05)")
-    fig.update_yaxes(gridcolor="rgba(255,255,255,0.05)",
-                     title_text="Cumul.", row=1, col=1, title_font_size=10)
-    fig.update_yaxes(title_text="Step", row=2, col=1, title_font_size=10)
-    return fig
-def fig_delegation_graph(
-    S: Session,
-    called_ids: list[str],
-    edges: list[tuple],
-    highlight_latest: bool = True,
-    spawned_ids: list[str] | None = None,
-) -> go.Figure:
-    """
-    Professional hierarchical DAG layout.
-    Orchestrator at top, called specialists in middle, uncalled dimmed at bottom.
-    """
-    all_ids     = list(S.registry.list_ids()) if S.registry else []
-    called_set  = set(called_ids)
-    spawned_set = set(spawned_ids or S.spawned_specialists)
-    uncalled    = [x for x in all_ids if x not in called_set]
-    # ── Build node positions (hierarchical layout) ───────────────────
-    pos = {"orchestrator": (0.5, 0.92)}
-    n_called = len(called_ids)
-    if n_called > 0:
-        for i, sid in enumerate(called_ids):
-            x = (i + 1) / (n_called + 1)
-            pos[sid] = (x, 0.55)
-    n_uncalled = len(uncalled)
-    if n_uncalled > 0:
-        for i, sid in enumerate(uncalled):
-            x = (i + 1) / (n_uncalled + 1)
-            pos[sid] = (x, 0.12)
-    fig = go.Figure()
-    # ── Background depth ring ────────────────────────────────────────
-    max_depth   = getattr(S.env, "max_depth", 2) if S.env else 2
-    cur_depth   = S.env.delegation_graph.depth if S.env else 0
-    depth_frac  = cur_depth / max(max_depth, 1)
-    ring_color  = ("#10b981" if depth_frac < 0.7
-                   else ("#f59e0b" if depth_frac < 1.0 else "#ef4444"))
-    fig.add_shape(type="rect",
-        x0=0.0, y0=0.0, x1=1.0, y1=1.0,
-        line=dict(color=ring_color, width=2, dash="dot"),
-        fillcolor="rgba(0,0,0,0)", xref="x", yref="y",
-    )
-    fig.add_annotation(
-        x=0.98, y=0.98, xref="x", yref="y",
-        text=f"Depth {cur_depth}/{max_depth}", showarrow=False,
-        font=dict(size=9, color=ring_color), xanchor="right", yanchor="top",
-    )
-    # ── Edges ────────────────────────────────────────────────────────
-    latest_edge = edges[-1] if edges else None
-    for src, dst in edges:
-        if src not in pos or dst not in pos:
-            continue
-        x0, y0 = pos[src]
-        x1, y1 = pos[dst]
-        is_latest = (latest_edge and highlight_latest and (src, dst) == latest_edge)
-        color = "rgba(0,212,255,0.9)" if is_latest else "rgba(0,212,255,0.45)"
-        width = 2.5 if is_latest else 1.8
-        dash  = "dash" if is_latest else "solid"
-        fig.add_trace(go.Scatter(
-            x=[x0, x1, None], y=[y0, y1, None], mode="lines",
-            line=dict(color=color, width=width, dash=dash),
-            hoverinfo="skip", showlegend=False,
-        ))
-        fig.add_annotation(
-            ax=x0, ay=y0, x=x1, y=y1,
-            xref="x", yref="y", axref="x", ayref="y",
-            arrowhead=3, arrowsize=1.4, arrowwidth=2,
-            arrowcolor=color, showarrow=True,
-        )
-    # ── Orchestrator node ────────────────────────────────────────────
-    ox, oy = pos["orchestrator"]
-    fig.add_trace(go.Scatter(
-        x=[ox], y=[oy], mode="markers+text",
-        marker=dict(size=44, color="#f59e0b", symbol="circle",
-                    line=dict(color="#fcd34d", width=2.5), opacity=1.0),
-        text=["<b>ORCH</b>"], textposition="middle center",
-        textfont=dict(size=9, color="#0a0f1a", family="Inter, sans-serif"),
-        hovertext=["<b>Orchestrator</b><br>Root node — makes all delegation decisions"],
-        hoverinfo="text", showlegend=False, name="orchestrator",
-    ))
-    # ── Called specialist nodes ──────────────────────────────────────
-    for sid in called_ids:
-        if sid not in pos:
-            continue
-        x, y      = pos[sid]
-        c         = SPEC_COLORS.get(sid, "#7c3aed")
-        spec      = S.registry.get(sid) if S.registry else None
-        role      = spec.role if spec else sid
-        lat       = f"{spec.avg_latency_ms}ms" if spec else ""
-        is_spawned = sid in spawned_set
-        symbol    = "star" if is_spawned else "circle"
-        size      = 38 if is_spawned else 32
-        border_c  = "#fbbf24" if is_spawned else "rgba(255,255,255,0.4)"
-        hover_tag = " ⚡ AUTO-SPAWNED" if is_spawned else ""
-        label     = (("⚡ " if is_spawned else "") + sid).replace("_", "<br>")
-        fig.add_trace(go.Scatter(
-            x=[x], y=[y], mode="markers+text",
-            marker=dict(size=size, color=c, symbol=symbol,
-                        line=dict(color=border_c, width=2.5), opacity=1.0),
-            text=[label], textposition="bottom center",
-            textfont=dict(size=8, color="#fbbf24" if is_spawned else "#e2e8f0"),
-            hovertext=[f"<b>{role}</b><br>Called ✓{hover_tag}<br>{lat}"],
-            hoverinfo="text", showlegend=False,
-        ))
-    # ── Uncalled specialist nodes (dimmed) ───────────────────────────
-    for sid in uncalled:
-        if sid not in pos:
-            continue
-        x, y  = pos[sid]
-        c     = SPEC_COLORS.get(sid, "#334155")
-        spec  = S.registry.get(sid) if S.registry else None
-        role  = spec.role if spec else sid
-        label = sid.replace("_", "<br>")
-        fig.add_trace(go.Scatter(
-            x=[x], y=[y], mode="markers+text",
-            marker=dict(size=16, color="#1e293b", symbol="circle",
-                        line=dict(color=c, width=1), opacity=0.5),
-            text=[label], textposition="bottom center",
-            textfont=dict(size=7, color="rgba(148,163,184,0.45)"),
-            hovertext=[f"<b>{role}</b><br>Not called"],
-            hoverinfo="text", showlegend=False,
-        ))
-    # ── Section labels ───────────────────────────────────────────────
-    fig.add_annotation(x=0.01, y=0.96, xref="x", yref="y",
-        text="ORCHESTRATOR", showarrow=False,
-        font=dict(size=8, color="#475569"), xanchor="left")
-    if called_ids:
-        fig.add_annotation(x=0.01, y=0.62, xref="x", yref="y",
-            text="CALLED", showarrow=False,
-            font=dict(size=8, color="#00d4ff"), xanchor="left")
-    if uncalled:
-        fig.add_annotation(x=0.01, y=0.19, xref="x", yref="y",
-            text="AVAILABLE", showarrow=False,
-            font=dict(size=8, color="#334155"), xanchor="left")
-    fig.update_layout(
-        **DARK, height=420,
-        title=dict(
-            text=(f"Delegation Graph  ·  {len(called_ids)} specialists called"
-                  f"  ·  Depth {cur_depth}/{max_depth}"),
-            font=dict(size=13, color="#94a3b8"),
-        ),
-        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-0.05, 1.05]),
-        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-0.05, 1.08]),
-    )
-    return fig
-def fig_reward_breakdown(components: dict) -> go.Figure:
-    if not components:
-        components = {k: 0.0 for k in [
-            "quality_delta", "efficiency_penalty", "failure_penalty",
-            "recovery_bonus", "conflict_penalty", "conflict_bonus",
-            "consistency_bonus", "latency_penalty", "explanation_bonus",
-        ]}
-    names  = list(components.keys())
-    values = [components[k] for k in names]
-    fig = go.Figure(go.Bar(
-        x=values,
-        y=[n.replace("_", " ").title() for n in names],
-        orientation="h",
-        marker_color=["#10b981" if v >= 0 else "#ef4444" for v in values],
-        marker_line_width=0,
-        text=[f"{v:+.3f}" for v in values],
-        textposition="outside",
-        textfont=dict(color="#94a3b8", size=9),
-    ))
-    fig.add_vline(x=0, line_color="rgba(255,255,255,0.15)", line_width=1)
-    fig.update_layout(**DARK, height=310,
-                      title=dict(text="Reward Breakdown", font=dict(size=13, color="#94a3b8")),
-                      xaxis=dict(gridcolor="rgba(255,255,255,0.05)", title="Value"),
-                      yaxis=dict(gridcolor="rgba(255,255,255,0.05)"))
-    return fig
-def fig_policy_confidence(
-    entropies: list[float],
-    step_labels: list[int] | None = None,
-) -> go.Figure:
-    """
-    Policy confidence chart — specialist-selection entropy per step.
-    High entropy = uncertain/exploring. Low = confident/committed.
-    Real data from actual action vectors used each step.
-    """
-    if not entropies:
-        fig = go.Figure()
-        fig.update_layout(
-            **DARK, **DARK_AXES,
-            title=dict(text="Policy Confidence (Action Entropy)",
-                       font=dict(size=13, color="#64748b")),
-            annotations=[dict(text="Run an episode to see real action entropy",
-                              x=0.5, y=0.5, showarrow=False,
-                              font=dict(color="#334155", size=12))],
-        )
-        return fig
-    steps   = step_labels or list(range(1, len(entropies) + 1))
-    max_e   = float(np.log(max(len(entropies), 2)))
-    norm_e  = [min(1.0, max(0.0, e / max(max_e, 1e-8))) for e in entropies]
-    colors  = [
-        f"rgba({int(0 + 124 * ne)},{int(212 - 154 * ne)},{int(255 - 58 * ne)},0.85)"
-        for ne in norm_e
-    ]
-    fig = go.Figure()
-    fig.add_trace(go.Bar(
-        x=steps, y=norm_e,
-        marker_color=colors, marker_line_width=0,
-        name="Normalised entropy",
-        text=[f"{e:.3f}" for e in entropies],
-        textposition="outside",
-        textfont=dict(size=8, color="#94a3b8"),
-        hovertemplate="Step %{x}<br>Entropy: %{text}<extra></extra>",
-    ))
-    fig.add_hline(y=0.5, line_dash="dot", line_color="rgba(148,163,184,0.3)",
-                  annotation_text="Mid-entropy", annotation_font_color="#475569")
-    fig.update_layout(
-        **DARK, height=260,
-        title=dict(text="Policy Confidence — Specialist Selection Entropy per Step",
-                   font=dict(size=12, color="#94a3b8")),
-        xaxis=dict(title="Episode Step", gridcolor="rgba(255,255,255,0.05)",
-                   zerolinecolor="rgba(255,255,255,0.08)"),
-        yaxis=dict(title="Entropy (0=certain, 1=uniform)", range=[0, 1.15],
-                   gridcolor="rgba(255,255,255,0.05)", zerolinecolor="rgba(255,255,255,0.08)"),
-        showlegend=False,
-    )
-    return fig
-def fig_similarity(registry: SpecialistRegistry) -> go.Figure:
-    ids = registry.list_ids()
-    n   = len(ids)
-    if n == 0:
-        fig = go.Figure()
-        fig.update_layout(**DARK, title=dict(text="No specialists in registry",
-                                              font=dict(size=13, color="#64748b")))
-        return fig
-    missing = [sid for sid in ids if registry.get(sid).embedding is None]
-    if missing:
-        fig = go.Figure()
-        fig.update_layout(
-            **DARK, **DARK_AXES,
-            title=dict(text="Embeddings not computed — boot the environment first",
-                       font=dict(size=13, color="#64748b")),
-            annotations=[dict(text=f"Missing embeddings: {', '.join(missing[:4])}",
-                              x=0.5, y=0.5, showarrow=False,
-                              font=dict(color="#334155", size=12))],
-        )
-        return fig
-    mat = np.zeros((n, n))
-    try:
-        for i, a in enumerate(ids):
-            for j, b in enumerate(ids):
-                ea = registry.get(a).to_state_vector()
-                eb = registry.get(b).to_state_vector()
-                mat[i][j] = float(np.dot(ea, eb))
-    except Exception as exc:
-        fig = go.Figure()
-        fig.update_layout(**DARK, title=dict(text=f"Similarity error: {exc}",
-                                              font=dict(size=13, color="#ef4444")))
-        return fig
-    labels = [x.replace("_", "<br>") for x in ids]
-    fig = go.Figure(go.Heatmap(
-        z=mat, x=labels, y=labels,
-        colorscale=[[0, "#0f0f1a"], [0.5, "rgba(124,58,237,0.6)"], [1, "#00d4ff"]],
-        showscale=True, zmin=0, zmax=1,
-        text=np.round(mat, 2), texttemplate="%{text}", textfont=dict(size=9),
-    ))
-    fig.update_layout(**DARK, height=400,
-                      title=dict(text="Capability Similarity (Cosine)", font=dict(size=13, color="#94a3b8")))
-    return fig
-def fig_training_curve() -> go.Figure:
-    path = ASSETS / "reward_curve.json"
-    if path.exists():
-        with open(path) as f:
-            d = json.load(f)
-        eps, rews = d["episodes"], d["mean_rewards"]
-    else:
-        rng  = np.random.default_rng(42)
-        eps  = list(range(0, 201, 5))
-        rews = [float(np.clip(0.1 + 0.5 * (1 - np.exp(-e / 80)) + rng.normal(0, 0.04), 0, 1))
-                for e in eps]
-    smooth = [float(np.mean(rews[max(0, i - 4):i + 1])) for i in range(len(rews))]
-    fig = go.Figure()
-    fig.add_trace(go.Scatter(x=eps, y=rews, mode="markers",
-                             marker=dict(size=5, color="rgba(0,212,255,0.35)"),
-                             name="Episode"))
-    fig.add_trace(go.Scatter(x=eps, y=smooth, mode="lines",
-                             line=dict(color="#00d4ff", width=2.5),
-                             fill="tozeroy", fillcolor="rgba(0,212,255,0.06)",
-                             name="Smoothed"))
-    fig.add_hline(y=0.1, line_dash="dash", line_color="rgba(148,163,184,0.35)",
-                  annotation_text="Random baseline", annotation_font_color="#64748b")
-    fig.update_layout(**DARK, **DARK_AXES, height=340,
-                      title=dict(text="Training Progress — Mean Reward per Episode",
-                                 font=dict(size=13, color="#94a3b8")),
-                      xaxis_title="Episode", yaxis_title="Mean Reward",
-                      legend=dict(bgcolor="rgba(0,0,0,0)"))
-    return fig
-def fig_training_entropy() -> go.Figure:
-    """
-    Policy entropy over training.
-    Reads from demo/assets/entropy_log.json if produced by train.py,
-    or from current session entropy if no log exists.
-    Never shows fake data — gracefully absent if neither source exists.
-    """
-    path = ASSETS / "entropy_log.json"
-    S    = _S()
-    if path.exists():
-        with open(path) as f:
-            d = json.load(f)
-        episodes     = d["episodes"]
-        entropies    = d["mean_entropies"]
-        source_label = "From training log"
-    elif S.step_entropies:
-        episodes     = list(range(1, len(S.step_entropies) + 1))
-        entropies    = S.step_entropies
-        source_label = "Current episode (live)"
-    else:
-        fig = go.Figure()
-        fig.update_layout(
-            **DARK, **DARK_AXES,
-            title=dict(text="Policy Entropy — Run training to populate",
-                       font=dict(size=13, color="#64748b")),
-            annotations=[dict(
-                text="Run python training/train.py to generate entropy logs",
-                x=0.5, y=0.5, showarrow=False,
-                font=dict(color="#334155", size=12),
-            )],
-        )
-        return fig
-    fig = go.Figure()
-    fig.add_trace(go.Scatter(
-        x=episodes, y=entropies, mode="lines+markers",
-        line=dict(color="#7c3aed", width=2.2),
-        marker=dict(size=4, color="#a78bfa"),
-        fill="tozeroy", fillcolor="rgba(124,58,237,0.06)",
-        name=source_label,
-    ))
-    fig.update_layout(
-        **DARK, **DARK_AXES, height=280,
-        title=dict(text=f"Policy Entropy over Training ({source_label})",
-                   font=dict(size=13, color="#94a3b8")),
-        xaxis_title="Episode / Step",
-        yaxis_title="Action Selection Entropy",
-        legend=dict(bgcolor="rgba(0,0,0,0)"),
-    )
-    return fig
-# ─────────────────────────────────────────────────────────
-# Quality-comparison helpers
-# ─────────────────────────────────────────────────────────
-def _generate_generic_output(task: str) -> str:
-    """Call GPT-4o-mini directly with the task — no specialist routing."""
-    import os
-    api_key = os.getenv("OPENAI_API_KEY")
-    if not api_key:
-        return (
-            "General problem-solving approach:\n"
-            "1. Gather and clarify requirements\n"
-            "2. Research common solution patterns\n"
-            "3. Draft a high-level architecture\n"
-            "4. Implement in small, testable increments\n"
-            "5. Validate against acceptance criteria and deploy\n"
-            "No specialist domain expertise applied."
-        )
-    try:
-        from openai import OpenAI
-        resp = OpenAI(api_key=api_key).chat.completions.create(
-            model="gpt-4o-mini",
-            max_tokens=600,
-            messages=[
-                {"role": "system",
-                 "content": "You are a general-purpose software engineering assistant."},
-                {"role": "user",
-                 "content": f"Provide a detailed solution approach for this task:\n\n{task}"},
-            ],
-        )
-        return resp.choices[0].message.content
-    except Exception as exc:
-        return f"(Generic output generation failed: {exc})"
-def _t1_relevance(task: str, output: str, registry) -> float:
-    """Cosine similarity between task and output embeddings, scaled 0–10."""
-    try:
-        import numpy as np
-        t = registry.embed_query(task)
-        o = registry.embed_query(output[:800])
-        if t is None or o is None:
-            return 0.0
-        cos = float(np.dot(t, o) / (np.linalg.norm(t) * np.linalg.norm(o) + 1e-8))
-        return round(max(0.0, cos) * 10, 2)
-    except Exception:
-        return 0.0
-def _judge_compare(task: str, generic: str, specialist: str) -> dict | None:
-    """GPT-4o-mini rates both outputs on 4 dimensions. Returns {dim: [generic, specialist]}."""
-    import os, json
-    api_key = os.getenv("OPENAI_API_KEY")
-    if not api_key:
-        return None
-    prompt = (
-        f"Task:\n{task[:400]}\n\n"
-        f"Output A (generic, no specialist routing):\n{generic[:700]}\n\n"
-        f"Output B (specialist-routed by trained policy):\n{specialist[:700]}\n\n"
-        "Rate each output 1–10 on: technical_depth, specificity, actionability, coverage.\n"
-        'Return JSON only: {"technical_depth":[A,B],"specificity":[A,B],'
-        '"actionability":[A,B],"coverage":[A,B]}'
-    )
-    try:
-        from openai import OpenAI
-        resp = OpenAI(api_key=api_key).chat.completions.create(
-            model="gpt-4o-mini",
-            max_tokens=150,
-            response_format={"type": "json_object"},
-            messages=[{"role": "user", "content": prompt}],
-        )
-        return json.loads(resp.choices[0].message.content)
-    except Exception:
-        return None
-def fig_radar_comparison(
-    gen_scores: dict,
-    spec_scores: dict,
-) -> go.Figure:
-    dims   = list(gen_scores.keys())
-    g_vals = [gen_scores[d]  for d in dims]
-    s_vals = [spec_scores[d] for d in dims]
-    dims_c = dims   + [dims[0]]
-    g_c    = g_vals + [g_vals[0]]
-    s_c    = s_vals + [s_vals[0]]
-    fig = go.Figure()
-    fig.add_trace(go.Scatterpolar(
-        r=g_c, theta=dims_c, fill="toself",
-        fillcolor="rgba(239,68,68,0.10)",
-        line=dict(color="#ef4444", width=2),
-        name="Generic (no routing)",
-    ))
-    fig.add_trace(go.Scatterpolar(
-        r=s_c, theta=dims_c, fill="toself",
-        fillcolor="rgba(0,212,255,0.13)",
-        line=dict(color="#00d4ff", width=2.5),
-        name="Specialist-routed",
-    ))
-    fig.update_layout(
-        paper_bgcolor="rgba(0,0,0,0)",
-        font=dict(color="#e2e8f0", family="Inter, system-ui, sans-serif"),
-        polar=dict(
-            bgcolor="rgba(0,0,0,0)",
-            radialaxis=dict(
-                visible=True, range=[0, 10],
-                gridcolor="rgba(255,255,255,0.08)",
-                tickfont=dict(size=9, color="#475569"),
-            ),
-            angularaxis=dict(
-                gridcolor="rgba(255,255,255,0.08)",
-                tickfont=dict(size=11, color="#94a3b8"),
-            ),
-        ),
-        title=dict(
-            text="Quality Radar — Generic vs Specialist-Routed",
-            font=dict(size=13, color="#94a3b8"),
-        ),
-        legend=dict(bgcolor="rgba(0,0,0,0)", font=dict(color="#94a3b8", size=11)),
-        height=420,
-        margin=dict(l=60, r=60, t=60, b=40),
-    )
-    return fig
-# ─────────────────────────────────────────────────────────
-# UI helpers
-# ─────────────────────────────────────────────────────────
-def inject_css():
-    st.markdown("""
-<style>
-@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&display=swap');
-html, body, [data-testid="stAppViewContainer"] {
-    background: #0f0f1a !important;
-    font-family: 'Inter', system-ui, sans-serif !important;
-}
-[data-testid="stHeader"]  { background: transparent !important; }
-[data-testid="stToolbar"] { display: none !important; }
-[data-testid="stTabs"] > div:first-child button {
-    color: #475569 !important; font-weight: 600 !important; font-size: 13px !important;
-}
-[data-testid="stTabs"] > div:first-child button[aria-selected="true"] {
-    color: #00d4ff !important; border-bottom-color: #00d4ff !important;
-}
-.stButton > button {
-    border-radius: 8px !important; font-weight: 600 !important;
-    font-size: 13px !important; transition: all .18s !important;
-    border: 1px solid rgba(255,255,255,0.18) !important;
-    background: rgba(255,255,255,0.10) !important; color: #e2e8f0 !important;
-}
-.stButton > button:hover {
-    background: rgba(255,255,255,0.18) !important;
-    border-color: rgba(0,212,255,0.45) !important;
-    color: #ffffff !important;
-}
-.stButton > button[kind="primary"] {
-    background: linear-gradient(135deg,#00d4ff,#0092bb) !important;
-    border: none !important; color: #0a0f1a !important;
-}
-.stButton > button[kind="primary"]:hover {
-    box-shadow: 0 4px 18px rgba(0,212,255,0.35) !important;
-}
-[data-testid="stTextInput"] input,
-[data-testid="stTextArea"]  textarea {
-    background: rgba(0,0,0,0.3) !important;
-    border: 1px solid rgba(255,255,255,0.09) !important;
-    color: #e2e8f0 !important; border-radius: 8px !important;
-}
-[data-testid="stSelectbox"] > div > div {
-    background: rgba(0,0,0,0.35) !important;
-    border: 1px solid rgba(255,255,255,0.09) !important;
-    border-radius: 8px !important; color: #e2e8f0 !important;
-}
-[data-testid="stSlider"] [data-testid="stTickBar"] { color: #475569 !important; }
-[data-testid="metric-container"] {
-    background: rgba(255,255,255,0.03) !important;
-    border: 1px solid rgba(255,255,255,0.07) !important;
-    border-radius: 12px !important; padding: 16px !important;
-}
-[data-testid="stMetric"] label { color: #475569 !important; font-size: 11px !important; }
-[data-testid="stMetricValue"]  { color: #00d4ff !important; font-weight: 700 !important; }
-[data-testid="stCode"], .stCodeBlock {
-    background: rgba(0,0,0,0.4) !important;
-    border: 1px solid rgba(255,255,255,0.07) !important;
-    border-radius: 10px !important;
-}
-hr { border-color: rgba(255,255,255,0.07) !important; }
-::-webkit-scrollbar { width: 4px; height: 4px; }
-::-webkit-scrollbar-thumb { background: rgba(255,255,255,0.1); border-radius: 4px; }
-::-webkit-scrollbar-track { background: transparent; }
-</style>
-""", unsafe_allow_html=True)
-def hero():
-    st.markdown("""
-<div style="background:linear-gradient(135deg,#0f0f1a,#130a22,#091422);
-            border:1px solid rgba(0,212,255,0.14);border-radius:16px;
-            padding:28px 36px;margin-bottom:4px;position:relative;overflow:hidden;">
-  <div style="position:absolute;top:-60px;right:-40px;width:360px;height:360px;
-              background:radial-gradient(circle,rgba(124,58,237,0.11) 0%,transparent 70%);
-              pointer-events:none;"></div>
-  <div style="position:absolute;bottom:-60px;left:15%;width:280px;height:280px;
-              background:radial-gradient(circle,rgba(0,212,255,0.07) 0%,transparent 70%);
-              pointer-events:none;"></div>
-  <div style="font-size:28px;font-weight:800;
-              background:linear-gradient(90deg,#00d4ff,#7c3aed,#00d4ff);
-              background-size:200% auto;-webkit-background-clip:text;
-              -webkit-text-fill-color:transparent;background-clip:text;
-              margin:0 0 8px;">SpindleFlow RL</div>
-  <div style="color:#64748b;font-size:13px;margin:0;">
-    Delegation Policy Learning Environment &mdash;
-    Teaching orchestrators to route, specialize, and stop.
-  </div>
-</div>
-""", unsafe_allow_html=True)
-def sec(title: str):
-    st.markdown(
-        f'<div style="font-size:11px;font-weight:700;color:#475569;text-transform:uppercase;'
-        f'letter-spacing:1px;padding-bottom:10px;border-bottom:1px solid rgba(255,255,255,0.07);'
-        f'margin:18px 0 14px;">{title}</div>',
-        unsafe_allow_html=True,
-    )
-def status_bar(msg: str, color: str = "#94a3b8"):
-    st.markdown(
-        f'<div style="background:rgba(0,0,0,0.3);border:1px solid rgba(255,255,255,0.07);'
-        f'border-radius:8px;padding:10px 16px;font-size:12px;color:{color};margin:6px 0 10px;">'
-        f'{_html.escape(msg)}</div>',
-        unsafe_allow_html=True,
-    )
-def render_live_stats(S: Session) -> None:
-    """Sidebar live stats strip — all values read directly from session state."""
-    with st.sidebar:
-        st.markdown(
-            '<div style="font-size:10px;font-weight:700;color:#00d4ff;'
-            'text-transform:uppercase;letter-spacing:1px;margin-bottom:12px;">'
-            '● Live Episode Stats</div>',
-            unsafe_allow_html=True,
-        )
-        status = ("Running"  if (S.env is not None and not S.done) else
-                  "Complete" if S.done else "Idle")
-        status_color = ("#10b981" if status == "Running" else
-                        "#f59e0b" if status == "Complete" else "#475569")
-        st.markdown(
-            f'<div style="display:flex;justify-content:space-between;'
-            f'padding:6px 0;border-bottom:1px solid rgba(255,255,255,0.05);">'
-            f'<span style="font-size:11px;color:#475569;">Status</span>'
-            f'<span style="font-size:11px;font-weight:700;color:{status_color};">'
-            f'{status}</span></div>',
-            unsafe_allow_html=True,
-        )
-        unique_called = len(set(
-            sp for h in S.episode_history for sp in h.get("called", [])
-        ))
-        dag_depth = str(S.env.delegation_graph.depth) if S.env else "—"
-        stats = [
-            ("Step",         str(S.step_n),                                              "#e2e8f0"),
-            ("Total Reward", f"{sum(S.rewards):+.4f}" if S.rewards else "—",
-             "#10b981" if (S.rewards and sum(S.rewards) >= 0) else "#ef4444"),
-            ("Mean Step Rwd",f"{float(np.mean(S.rewards)):+.4f}" if S.rewards else "—", "#94a3b8"),
-            ("Specialists",  str(unique_called),                                         "#7c3aed"),
-            ("DAG Depth",    dag_depth,                                                  "#f59e0b"),
-            ("Mean Entropy", f"{float(np.mean(S.step_entropies)):.3f}"
-                             if S.step_entropies else "—",                               "#00d4ff"),
-        ]
-        for label, value, color in stats:
-            st.markdown(
-                f'<div style="display:flex;justify-content:space-between;'
-                f'padding:5px 0;border-bottom:1px solid rgba(255,255,255,0.04);">'
-                f'<span style="font-size:11px;color:#475569;">{label}</span>'
-                f'<span style="font-size:11px;font-weight:600;color:{color};">'
-                f'{value}</span></div>',
-                unsafe_allow_html=True,
-            )
-        if S.rewards:
-            st.markdown('<div style="margin-top:12px;"></div>', unsafe_allow_html=True)
-            st.plotly_chart(fig_reward_curve(S.rewards), use_container_width=True)
-def _render_replay_step(S: Session, step_idx: int) -> None:
-    """Render charts for a specific historical step — no env calls."""
-    if not S.episode_history or step_idx >= len(S.episode_history):
-        st.info("No episode data to replay. Run an episode first.")
-        return
-    snap       = S.episode_history[step_idx]
-    cumulative = snap["cumulative"]
-    # Cumulative called specialists up to and including this step
-    cumulative_called = list({
-        sp
-        for h in S.episode_history[:step_idx + 1]
-        for sp in h.get("called", [])
-    })
-    st.markdown(
-        f'<div style="background:rgba(124,58,237,0.07);border:1px solid rgba(124,58,237,0.2);'
-        f'border-radius:10px;padding:12px 18px;font-size:12px;color:#a78bfa;margin-bottom:12px;">'
-        f'Replaying Step {snap["step"]}  ·  Action: <b>{snap["action_name"]}</b>  ·  '
-        f'Reward: <b>{snap["reward"]:+.4f}</b>  ·  '
-        f'Cumulative: <b>{cumulative:+.4f}</b></div>',
-        unsafe_allow_html=True,
-    )
-    rc1, rc2 = st.columns(2)
-    with rc1:
-        st.plotly_chart(
-            fig_delegation_graph(S, cumulative_called, snap["edges"], highlight_latest=False),
-            use_container_width=True,
-            key=f"replay_dag_{step_idx}",
-        )
-    with rc2:
-        st.plotly_chart(
-            fig_reward_breakdown(snap["components"]),
-            use_container_width=True,
-            key=f"replay_breakdown_{step_idx}",
-        )
-    sec("Action Trace at This Step")
-    trace_lines = []
-    for h in S.episode_history[:step_idx + 1]:
-        sign        = "+" if h["reward"] >= 0 else ""
-        called_str  = ", ".join(h["called"]) if h["called"] else "—"
-        marker      = "► " if h["step"] == snap["step"] else "  "
-        trace_lines.append(
-            f"{marker}Step {h['step']:>2}  │  {h['action_name']:<22}  │  "
-            f"reward: {sign}{h['reward']:.4f}  │  specialists: {called_str}"
-        )
-    st.code("\n".join(trace_lines), language=None)
-# ─────────────────────────────────────────────────────────
-# Tab 1 — Live Demo
-# ─────────────────────────────────────────────────────────
-def tab_live_demo():
-    S = _S()
-    col_task, col_ctrl = st.columns([3, 2], gap="large")
-    with col_task:
-        sec("Task")
-        task_dd  = st.selectbox("Preset task", PRESET_TASKS, key="task_dd")
-        task_txt = st.text_input("Or enter custom task",
-                                 placeholder="Describe a software engineering task…",
-                                 key="task_txt")
-        phase    = st.slider("Curriculum phase", 1, 3, 1, key="phase_sl")
-    with col_ctrl:
-        sec("Controls")
-        c1, c2    = st.columns(2)
-        reset_btn = c1.button("Reset Episode",    type="primary",    use_container_width=True, key="reset_btn")
-        run_btn   = c2.button("Run Full Episode", use_container_width=True,                    key="run_btn")
-        st.markdown('<div style="height:6px"></div>', unsafe_allow_html=True)
-        use_trained = st.checkbox("🤖 Use Trained Policy", value=False, key="use_trained",
-                                  help="Load the trained RecurrentPPO model from HF Hub")
-        trained_model = obs_mean = obs_var = None
-        clip_obs = 10.0
-        if use_trained:
-            with st.spinner("Loading trained model from HF Hub…"):
-                trained_model, obs_mean, obs_var, clip_obs, model_err = _load_trained_model(HF_MODEL_REPO)
-            if model_err:
-                st.error(f"Model load failed: {model_err}")
-            else:
-                st.success("Trained policy loaded ✓")
-        cat       = _load_catalog()
-        act_type  = st.selectbox("Action type (manual mode)",
-                                 ["RANDOM", "STOP", "CALL SPECIALIST", "PARALLEL SPAWN"],
-                                 key="act_type",
-                                 disabled=use_trained)
-        spec_ids  = [sp["id"] for sp in cat]
-        spec_ch   = st.selectbox("Target specialist", spec_ids, key="spec_ch",
-                                 disabled=use_trained)
-        step_btn  = st.button("Execute One Step",
-                              disabled=(S.env is None or S.done),
-                              use_container_width=True, key="step_btn")
-    status_msg = st.session_state.get("demo_status", "Click 'Reset Episode' to start.")
-    status_clr = "#34d399" if "complete" in status_msg or "started" in status_msg else "#94a3b8"
-    status_bar(status_msg, status_clr)
-    st.markdown(_exec_mode_badges(S), unsafe_allow_html=True)
-    # ── Reset ──────────────────────────────────────────────
-    if reset_btn:
-        with st.spinner("Initializing environment… (first run ~30 s on CPU)"):
-            S.reset(int(phase))
-        spawn_note = (
-            f"  |  ⚡ Spawned: {', '.join(S.spawned_specialists)}"
-            if S.spawned_specialists else ""
-        )
-        st.session_state.demo_status = f'Episode started  |  Task: "{S.task[:90]}"{spawn_note}'
-        st.session_state.last_called = []
-        st.session_state.last_edges  = []
-        st.session_state.last_info   = {}
-        st.rerun()
-    # ── Step ───────────────────────────────────────────────
-    if step_btn and S.env is not None and not S.done:
-        if use_trained and trained_model is not None and S.obs_current is not None:
-            action, S.lstm_states = _predict(
-                trained_model, S.obs_current, S.lstm_states,
-                S.episode_starts, obs_mean, obs_var, clip_obs,
-            )
-        else:
-            action = np.zeros(S.env.action_space.shape, dtype=np.float32)
-            if act_type == "STOP":
-                action[0] = 1.0
-            elif act_type == "CALL SPECIALIST":
-                ids = S.registry.list_ids()
-                if spec_ch in ids:
-                    idx = ids.index(spec_ch)
-                    if idx < S.env.max_specialists:
-                        action[1 + idx] = 1.0
-                else:
-                    action[1] = 1.0
-            elif act_type == "PARALLEL SPAWN":
-                action[0] = 6.0
-                action[1] = 1.0
-                if S.env.max_specialists > 1:
-                    action[2] = 1.0
-                action[1 + S.env.max_specialists] = 1.0
-            else:
-                action = S.env.action_space.sample()
-        _, r, term, trunc, info = S.step(action)
-        done = term or trunc
-        sign = "+" if r >= 0 else ""
-        msg  = f"Step {S.step_n}  |  reward {sign}{r:.4f}  |  {'DONE' if done else 'Running…'}"
-        if done:
-            msg += f"  |  Total: {sum(S.rewards):+.4f}"
-        st.session_state.demo_status = msg
-        # Use cumulative called_ids so graph stays populated even after STOP step
-        called = list(S.env.called_ids)
-        edges  = [(e.caller_id, e.callee_id)
-                  for e in S.env.delegation_graph.get_delegation_path()]
-        st.session_state.last_called = called
-        st.session_state.last_edges  = edges
-        st.session_state.last_info   = info
-        st.rerun()
-    # ── Run Full ───────────────────────────────────────────
-    if run_btn:
-        with st.spinner("Running full episode…"):
-            S.reset(int(phase))
-            info = {}
-            for _ in range(15):
-                if S.done:
-                    break
-                if use_trained and trained_model is not None and S.obs_current is not None:
-                    action, S.lstm_states = _predict(
-                        trained_model, S.obs_current, S.lstm_states,
-                        S.episode_starts, obs_mean, obs_var, clip_obs,
-                    )
-                else:
-                    action = S.env.action_space.sample()
-                _, _, _, _, info = S.step(action)
-        # Use cumulative called_ids so graph stays populated even after STOP step
-        called = list(S.env.called_ids) if S.env else []
-        edges  = [(e.caller_id, e.callee_id)
-                  for e in S.env.delegation_graph.get_delegation_path()]
-        total  = sum(S.rewards)
-        st.session_state.demo_status = (
-            f"Episode complete  |  {S.step_n} steps  |  Total reward: {total:+.4f}"
-        )
-        st.session_state.last_called = called
-        st.session_state.last_edges  = edges
-        st.session_state.last_info   = info
-        st.rerun()
-    # ── Metric strip ──────────────────────────────────────
-    if S.env is not None:
-        mc1, mc2, mc3, mc4 = st.columns(4)
-        mc1.metric("Obs Dim",     int(S.env.observation_space.shape[0]))
-        mc2.metric("Action Dim",  int(S.env.action_space.shape[0]))
-        mc3.metric("Specialists", S.registry.size)
-        mc4.metric("Phase",       phase)
-    # ── Hero: Robot Orchestrator Widget (full width) ──────
-    sec("Orchestrator  ·  Live Delegation View")
-    last_info = st.session_state.get("last_info", {})
-    render_orchestrator({
-        "called":  st.session_state.get("last_called", []),
-        "active":  (st.session_state.get("last_called", []) or [""])[-1]
-                   if not S.done else "",
-        "edges":   st.session_state.get("last_edges",  []),
-        "task":    S.task,
-        "step":    S.step_n,
-        "mode":    last_info.get("delegation_mode", "SEQUENTIAL"),
-        "done":    S.done,
-        "reward":  sum(S.rewards) if S.rewards else None,
-        "phase":   int(st.session_state.get("phase_sl", 1)),
-    })
-    # Thought bubble ticker — robot's last internal monologue
-    _thoughts = last_info.get("thoughts") or last_info.get("thought")
-    if _thoughts:
-        st.markdown(
-            f'<div style="font-size:11px;color:#64748b;margin-top:-8px;padding:4px 8px;">'
-            f'💭 {_html.escape(str(_thoughts))}</div>',
-            unsafe_allow_html=True,
-        )
-    # ── Three-column secondary row ─────────────────────────
-    sc1, sc2, sc3 = st.columns([4, 4, 4])
-    with sc1:
-        st.plotly_chart(fig_reward_curve(S.rewards), use_container_width=True)
-    with sc2:
-        last_info = st.session_state.get("last_info", {})
-        st.plotly_chart(
-            fig_reward_breakdown(last_info.get("reward_components", {})),
-            use_container_width=True,
-        )
-    with sc3:
-        sec("Policy Confidence")
-        if S.step_entropies:
-            st.plotly_chart(
-                fig_policy_confidence(
-                    S.step_entropies,
-                    [h["step"] for h in S.episode_history],
-                ),
-                use_container_width=True,
-            )
-        else:
-            st.markdown(
-                '<div style="color:#334155;font-size:11px;padding:24px;text-align:center;">'
-                'Run an episode to see action entropy.</div>',
-                unsafe_allow_html=True,
-            )
-    # ── Step Log (full width) ──────────────────────────────
-    sec("Step Log / Action Trace")
-    if not S.actions:
-        st.markdown(
-            '<div style="color:#334155;font-size:12px;padding:16px;text-align:center;">'
-            'Waiting… Reset the episode to start.</div>',
-            unsafe_allow_html=True,
-        )
-    else:
-        lines = []
-        for i, (inf, r) in enumerate(zip(S.actions, S.rewards)):
-            sign  = "+" if r >= 0 else ""
-            act   = inf.get("action_name", "UNKNOWN")
-            specs = ", ".join(inf.get("called_specialists", []))
-            mode  = inf.get("delegation_mode", "")
-            e_str = (f" │ entropy: {S.step_entropies[i]:.3f}"
-                     if i < len(S.step_entropies) else "")
-            lats  = inf.get("specialist_latencies", {})
-            lat_str = (
-                "\n       │  → latency:  "
-                + ", ".join(f"{k}: {v:.0f}ms" for k, v in lats.items())
-            ) if lats else ""
-            lines.append(
-                f"Step {i+1:>2} │ {act:<22} │ reward: {sign}{r:.4f}{e_str}"
-                + (f"\n       │  → called:  {specs}" if specs else "")
-                + (f"\n       │  → mode:    {mode}" if mode else "")
-                + lat_str
-            )
-        total = sum(S.rewards)
-        unique_sp = len(set(sp for h in S.episode_history for sp in h.get("called", [])))
-        lines.append(f"{'─'*62}")
-        lines.append(
-            f"Total reward: {'+' if total>=0 else ''}{total:.4f}  │  "
-            f"Steps: {len(S.rewards)}  │  "
-            f"Specialists called: {unique_sp} unique"
-        )
-        st.code("\n".join(lines), language=None)
-    # ── Episode Replay (full width) ────────────────────────
-    if S.episode_history:
-        st.markdown("---")
-        sec("Episode Replay Mode")
-        st.caption(
-            "Scrub backward through every step of the episode. "
-            "Delegation graph, reward breakdown, and action trace all update to that exact state. "
-            "100% real data — no re-simulation."
-        )
-        n_steps = len(S.episode_history)
-        if n_steps > 1:
-            replay_step = st.slider(
-                "Replay step",
-                min_value=1,
-                max_value=n_steps,
-                value=n_steps,
-                step=1,
-                key="replay_slider",
-                format="Step %d",
-            )
-        else:
-            replay_step = 1
-            st.caption("Single-step episode — showing step 1.")
-        _render_replay_step(S, replay_step - 1)
-# ─────────────────────────────────────────────────────────
-# Tab 2 — Specialists
-# ─────────────────────────────────────────────────────────
-def tab_specialists():
-    S = _S()
-    # Prefer live registry so dynamically-added specialists appear immediately.
-    # Fall back to YAML catalog before the environment has been booted.
-    if S.registry is not None:
-        specialists  = S.registry.list_all()
-        source_note  = None
-    else:
-        class _SP:
-            def __init__(self, d: dict):
-                self.id                  = d["id"]
-                self.role                = d["role"]
-                self.description         = d["description"]
-                self.complexity_affinity = d["complexity_affinity"]
-                self.avg_latency_ms      = d["avg_latency_ms"]
-        specialists = [_SP(d) for d in _load_catalog()]
-        source_note = "Showing YAML catalog — run an episode to load the live registry (includes dynamic additions)."
-    # ── Dynamically spawned specialists (accumulated from Output tab runs) ──
-    spawned_pool = st.session_state.get("spawned_pool", [])
-    if spawned_pool:
-        sec(f"⚡ Dynamically Spawned  ·  {len(spawned_pool)} new agent{'s' if len(spawned_pool) != 1 else ''}")
-        st.caption(
-            "These specialists were auto-created during Output tab runs — "
-            "triggered when no existing specialist had sufficient domain coverage (similarity < threshold)."
-        )
-        pool_cols = st.columns(min(len(spawned_pool), 4))
-        for i, sp in enumerate(spawned_pool):
-            with pool_cols[i % 4]:
-                st.markdown(f"""
-<div style="background:rgba(251,191,36,0.06);border:1px solid rgba(251,191,36,0.28);
-            border-left:3px solid #fbbf24;border-radius:12px;
-            padding:14px;margin-bottom:10px;">
-  <div style="font-size:11px;font-weight:700;color:#fbbf24;margin-bottom:5px;">
-    ⚡ {_html.escape(sp['role'])}
-  </div>
-  <div style="font-size:10px;color:#475569;margin-bottom:6px;font-style:italic;">
-    Triggered by: {_html.escape(sp['triggered_by'][:70])}…
-  </div>
-  <div style="font-size:11px;color:#64748b;line-height:1.5;">
-    {_html.escape(sp['description'][:100])}…
-  </div>
-  <div style="font-size:10px;color:#334155;margin-top:8px;padding-top:8px;
-              border-top:1px solid rgba(255,255,255,0.05);">
-    {sp['avg_latency_ms']} ms &nbsp;·&nbsp; {', '.join(sp.get('complexity_affinity', []))}
-  </div>
-</div>""", unsafe_allow_html=True)
-        st.markdown("---")
-    n = len(specialists)
-    sec(f"Roster — {n} specialist{'s' if n != 1 else ''}, capability-embedded")
-    if source_note:
-        st.caption(source_note)
-    spawned_set = set(S.spawned_specialists) if S.registry is not None else set()
-    cols = st.columns(4)
-    for i, sp in enumerate(specialists):
-        c          = SPEC_COLORS.get(sp.id, "#7c3aed")
-        is_spawned = sp.id in spawned_set
-        border_top = "#fbbf24" if is_spawned else c
-        spawn_tag  = (
-            '<span style="font-size:9px;font-weight:700;color:#fbbf24;'
-            'background:rgba(251,191,36,0.1);border:1px solid rgba(251,191,36,0.25);'
-            'border-radius:999px;padding:1px 7px;margin-left:6px;">⚡ AUTO-SPAWNED</span>'
-            if is_spawned else ""
-        )
-        with cols[i % 4]:
-            st.markdown(f"""
-<div style="background:rgba(255,255,255,0.025);border:1px solid {c}22;
-            border-left:3px solid {border_top};border-radius:12px;
-            padding:14px;margin-bottom:10px;">
-  <div style="font-size:11px;font-weight:700;color:{c};margin-bottom:6px;">
-    {sp.role}{spawn_tag}
-  </div>
-  <div style="font-size:11px;color:#64748b;line-height:1.5;">
-    {_html.escape(sp.description[:90])}…
-  </div>
-  <div style="font-size:10px;color:#334155;margin-top:8px;padding-top:8px;
-              border-top:1px solid rgba(255,255,255,0.05);">
-    {sp.avg_latency_ms} ms &nbsp;·&nbsp; {', '.join(sp.complexity_affinity)}
-  </div>
-</div>""", unsafe_allow_html=True)
-    sec("Capability Similarity Matrix")
-    if st.button("Load Similarity Matrix", key="sim_btn"):
-        with st.spinner("Computing cosine similarity across 384-dim embeddings…"):
-            S.boot()
-            st.plotly_chart(fig_similarity(S.registry), use_container_width=True)
-    sec("Add Specialist Dynamically")
-    st.caption("New specialists are immediately representable via their 384-dim embedding — no retraining or YAML edits required.")
-    c1, c2   = st.columns(2)
-    new_id   = c1.text_input("ID",   placeholder="ml_engineer", key="new_id")
-    new_role = c2.text_input("Role", placeholder="ML Engineer",  key="new_role")
-    new_desc = st.text_area("Description",
-                            placeholder="Expert in PyTorch, model training, MLOps pipelines…",
-                            height=80, key="new_desc")
-    if st.button("Add to Roster", type="primary", key="add_btn"):
-        if new_id.strip() and new_role.strip() and new_desc.strip():
-            with st.spinner("Encoding specialist embedding…"):
-                S.boot()
-                S.registry.add_specialist({
-                    "id": new_id.strip(), "role": new_role.strip(),
-                    "description": new_desc.strip(),
-                    "complexity_affinity": ["moderate", "complex"],
-                    "avg_latency_ms": 5000,
-                })
-            st.success(
-                f"'{new_id.strip()}' added. "
-                "Policy can represent it via 384-dim embedding — no retraining needed."
-            )
-            st.plotly_chart(fig_similarity(S.registry), use_container_width=True)
-        else:
-            st.warning("Fill in all three fields.")
-# ─────────────────────────────────────────────────────────
-# Tab 3 — Training
-# ─────────────────────────────────────────────────────────
-def tab_training():
-    sec("Training Progress — Mean Reward per Episode")
-    st.markdown(
-        '<div style="background:rgba(0,212,255,0.06);border:1px solid rgba(0,212,255,0.20);'
-        'border-radius:12px;padding:16px 20px;margin-bottom:18px;">'
-        '<div style="font-size:13px;font-weight:700;color:#00d4ff;margin-bottom:6px;">'
-        '🔁 Want to run a fresh training run?</div>'
-        '<div style="font-size:12px;color:#94a3b8;margin-bottom:10px;">'
-        'Open the <strong style="color:#e2e8f0;">Training Space</strong> below, then click '
-        '<strong style="color:#e2e8f0;">▶ Start Training</strong>. '
-        'When the run completes the new model is pushed to HF Hub and this demo loads it automatically.<br>'
-        '<span style="color:#fb923c;font-size:11px;">⚠️ Starting a new run will overwrite the current A100-trained policy.</span>'
-        '</div>'
-        '<a href="https://huggingface.co/spaces/garvitsachdeva/finalRLEnv" target="_blank" '
-        'style="display:inline-block;background:rgba(0,212,255,0.12);border:1px solid rgba(0,212,255,0.35);'
-        'color:#00d4ff;padding:7px 18px;border-radius:8px;text-decoration:none;font-size:13px;font-weight:600;">'
-        '🚀 Open Training Space →</a>'
-        '</div>',
-        unsafe_allow_html=True,
-    )
-    c_fetch, _ = st.columns([2, 5])
-    if c_fetch.button("📥 Fetch latest curve from HF Hub", key="fetch_curve"):
-        try:
-            import shutil
-            from huggingface_hub import hf_hub_download
-            _tok = os.getenv("HF_TOKEN") or None
-            src = hf_hub_download(HF_MODEL_REPO, "reward_curve.json",
-                                  token=_tok, force_download=True)
-            ASSETS.mkdir(parents=True, exist_ok=True)
-            shutil.copy(src, ASSETS / "reward_curve.json")
-            st.success("reward_curve.json updated — chart will refresh.")
-            st.cache_data.clear()
-        except Exception as exc:
-            st.error(f"Download failed: {exc}")
-    st.plotly_chart(fig_training_curve(), use_container_width=True)
-    sec("Policy Entropy — Action Confidence Over Training")
-    st.caption(
-        "Entropy of the specialist-selection distribution. "
-        "High = exploring (early training). Low = confident routing (converged policy)."
-    )
-    st.plotly_chart(fig_training_entropy(), use_container_width=True)
-    sec("Curriculum Phases")
-    c1, c2, c3 = st.columns(3)
-    _phase_card = lambda col, color, label, eps, desc: col.markdown(
-        f'<div style="background:rgba({color},0.04);border:1px solid rgba({color},0.18);'
-        f'border-radius:12px;padding:18px;">'
-        f'<div style="font-size:10px;font-weight:700;color:rgb({color});text-transform:uppercase;'
-        f'letter-spacing:1px;margin-bottom:8px;">{label}</div>'
-        f'<div style="font-size:22px;font-weight:700;color:#e2e8f0;margin-bottom:5px;">{eps}</div>'
-        f'<div style="font-size:11px;color:#475569;">{desc}</div></div>',
-        unsafe_allow_html=True,
-    )
-    _phase_card(c1, "0,212,255",  "Phase 1 · Atomic",             "200 episodes",
-                "Agent learns basic routing — which single specialist to call.")
-    _phase_card(c2, "124,58,237", "Phase 2 · Moderate",           "400 episodes",
-                "Agent learns multi-specialist coordination and mode selection.")
-    _phase_card(c3, "245,158,11", "Phase 3 · Complex/Enterprise", "600 episodes",
-                "Full delegation strategy with DAG depth, fallbacks, and latency trade-offs.")
-    sec("Quick Start Commands")
-    c1, c2 = st.columns(2)
-    with c1:
-        st.markdown("**Local training**")
-        st.code(
-            "# Demo mode — no OpenAI key needed\n"
-            "cd spindleflow-rl\n"
-            "python training/train.py \\\n"
-            "  --phase 1 --timesteps 50000\n\n"
-            "# Monitor in TensorBoard\n"
-            "tensorboard --logdir tensorboard_logs/",
-            language="bash",
-        )
-    with c2:
-        st.markdown("**Google Colab (T4 GPU, free)**")
-        st.code(
-            "!git clone https://github.com/garvitsachdevaa/kuchbhi\n"
-            "%cd kuchbhi\n"
-            "!pip install -r requirements.txt sb3-contrib\n\n"
-            "# 5k-step demo run\n"
-            "%run colab/train_colab.py",
-            language="python",
-        )
-# ─────────────────────────────────────────────────────────
-# Tab 4 — Quality Demo
-# ─────────────────────────────────────────────────────────
-def tab_quality():
-    results = st.session_state.get("output_results")
-    env_obj = st.session_state.get("output_env")
-    sec("Live Quality Comparison — Generic vs Specialist-Routed")
-    if results is None:
-        st.markdown(
-            '<div style="background:rgba(245,158,11,0.05);border:1px solid rgba(245,158,11,0.2);'
-            'border-radius:12px;padding:28px;text-align:center;">'
-            '<div style="font-size:13px;color:#fbbf24;font-weight:600;margin-bottom:8px;">'
-            'No Output run yet</div>'
-            '<div style="font-size:12px;color:#64748b;">'
-            'Go to the <b>🎯 Output</b> tab, enter a task, and click '
-            '"Run Trained Policy" — then return here to generate the quality comparison.'
-            '</div></div>',
-            unsafe_allow_html=True,
-        )
-    else:
-        task         = results["task"]
-        spec_results = results["specialist_results"]
-        specialist_text = "\n\n".join(
-            f"[{sr['id'].upper()}]\n{sr['output'] or ''}"
-            for sr in spec_results if sr.get("output")
-        ) or "(no specialist output)"
-        # Task banner
-        st.markdown(
-            f'<div style="background:rgba(0,212,255,0.04);border:1px solid rgba(0,212,255,0.18);'
-            f'border-radius:10px;padding:12px 18px;margin-bottom:16px;">'
-            f'<span style="font-size:9px;font-weight:700;color:#475569;text-transform:uppercase;'
-            f'letter-spacing:1px;">Comparing outputs for: </span>'
-            f'<span style="font-size:12px;color:#e2e8f0;">{_html.escape(task[:140])}</span>'
-            f'</div>',
-            unsafe_allow_html=True,
-        )
-        comp_data = st.session_state.get("quality_comparison")
-        already_computed = comp_data is not None and comp_data.get("task") == task
-        if not already_computed:
-            if st.button("⚡ Generate Quality Comparison", type="primary", key="gen_comp_btn"):
-                with st.spinner("Generating generic output + running GPT-4o-mini judge…"):
-                    generic_text = _generate_generic_output(task)
-                    registry     = env_obj.registry if env_obj else None
-                    gen_t1  = _t1_relevance(task, generic_text,    registry) if registry else 5.0
-                    spec_t1 = _t1_relevance(task, specialist_text, registry) if registry else 7.0
-                    judge = _judge_compare(task, generic_text, specialist_text)
-                    def _pick(key, fallback_g, fallback_s):
-                        pair = (judge or {}).get(key, [fallback_g, fallback_s])
-                        return float(pair[0]), float(pair[1])
-                    td_g, td_s = _pick("technical_depth", 5, 7)
-                    sp_g, sp_s = _pick("specificity",     4, 8)
-                    ac_g, ac_s = _pick("actionability",   4, 7)
-                    cv_g, cv_s = _pick("coverage",        5, 8)
-                    gen_scores  = {"Task Relevance": gen_t1,  "Technical Depth": td_g,
-                                   "Specificity": sp_g, "Actionability": ac_g, "Coverage": cv_g}
-                    spec_scores = {"Task Relevance": spec_t1, "Technical Depth": td_s,
-                                   "Specificity": sp_s, "Actionability": ac_s, "Coverage": cv_s}
-                    st.session_state.quality_comparison = {
-                        "task":        task,
-                        "generic":     generic_text,
-                        "specialist":  specialist_text,
-                        "gen_scores":  gen_scores,
-                        "spec_scores": spec_scores,
-                    }
-                st.rerun()
-        comp_data = st.session_state.get("quality_comparison")
-        if comp_data and comp_data.get("task") == task:
-            gen_scores  = comp_data["gen_scores"]
-            spec_scores = comp_data["spec_scores"]
-            # ── Score summary strip ─────────────────────────────────────
-            sec("Score Summary")
-            cols = st.columns(len(gen_scores))
-            for i, (dim, g_val) in enumerate(gen_scores.items()):
-                s_val = spec_scores[dim]
-                delta = round(s_val - g_val, 1)
-                cols[i].metric(
-                    dim,
-                    f"{s_val:.1f} / 10",
-                    f"{delta:+.1f} vs generic",
-                )
-            # ── Radar chart ─────────────────────────────────────────────
-            sec("Quality Radar")
-            st.plotly_chart(
-                fig_radar_comparison(gen_scores, spec_scores),
-                use_container_width=True,
-                key="quality_radar",
-            )
-            # ── Side-by-side score bars ──────────────────────────────────
-            sec("Per-Dimension Score Breakdown")
-            dims   = list(gen_scores.keys())
-            g_vals = [gen_scores[d]  for d in dims]
-            s_vals = [spec_scores[d] for d in dims]
-            bar_fig = go.Figure()
-            bar_fig.add_trace(go.Bar(
-                name="Generic",    x=dims, y=g_vals,
-                marker_color="rgba(239,68,68,0.75)", marker_line_width=0,
-                text=[f"{v:.1f}" for v in g_vals], textposition="outside",
-                textfont=dict(size=10, color="#94a3b8"),
-            ))
-            bar_fig.add_trace(go.Bar(
-                name="Specialist", x=dims, y=s_vals,
-                marker_color="rgba(0,212,255,0.75)", marker_line_width=0,
-                text=[f"{v:.1f}" for v in s_vals], textposition="outside",
-                textfont=dict(size=10, color="#94a3b8"),
-            ))
-            bar_fig.update_layout(
-                **DARK, **DARK_AXES, height=300, barmode="group",
-                legend=dict(bgcolor="rgba(0,0,0,0)", font=dict(color="#94a3b8")),
-            )
-            bar_fig.update_yaxes(range=[0, 11], gridcolor="rgba(255,255,255,0.05)")
-            st.plotly_chart(bar_fig, use_container_width=True, key="quality_bars")
-            # ── Side-by-side text ────────────────────────────────────────
-            sec("Output Text Comparison")
-            c1, c2 = st.columns(2)
-            with c1:
-                st.markdown(
-                    '<div style="font-size:10px;font-weight:700;color:#ef4444;'
-                    'text-transform:uppercase;letter-spacing:1px;margin-bottom:8px;">'
-                    '✗  Generic Output (No Delegation)</div>',
-                    unsafe_allow_html=True,
-                )
-                st.code(comp_data["generic"][:1200], language=None)
-            with c2:
-                st.markdown(
-                    '<div style="font-size:10px;font-weight:700;color:#10b981;'
-                    'text-transform:uppercase;letter-spacing:1px;margin-bottom:8px;">'
-                    '✓  Specialist-Routed Output (Trained Policy)</div>',
-                    unsafe_allow_html=True,
-                )
-                st.code(comp_data["specialist"][:1200], language=None)
-    sec("Policy Tuning — Quality vs Latency")
-    c1, c2 = st.columns(2)
-    with c1:
-        st.markdown("""
-<div style="background:rgba(124,58,237,0.05);border:1px solid rgba(124,58,237,0.2);
-            border-radius:12px;padding:16px;">
-  <div style="font-size:10px;font-weight:700;color:#a78bfa;text-transform:uppercase;
-              letter-spacing:1px;margin-bottom:8px;">Quality Policy</div>
-  <div style="font-size:12px;color:#64748b;line-height:1.8;">
-    5 specialists &nbsp;·&nbsp; sequential &nbsp;·&nbsp; ~180 s<br>
-    <code style="color:#a78bfa;background:rgba(124,58,237,0.12);
-                 padding:2px 6px;border-radius:4px;">latency_weight = 0.0</code>
-  </div>
-</div>""", unsafe_allow_html=True)
-    with c2:
-        st.markdown("""
-<div style="background:rgba(0,212,255,0.05);border:1px solid rgba(0,212,255,0.2);
-            border-radius:12px;padding:16px;">
-  <div style="font-size:10px;font-weight:700;color:#00d4ff;text-transform:uppercase;
-              letter-spacing:1px;margin-bottom:8px;">Latency Policy</div>
-  <div style="font-size:12px;color:#64748b;line-height:1.8;">
-    3 specialists &nbsp;·&nbsp; parallel &nbsp;·&nbsp; ~45 s<br>
-    <code style="color:#00d4ff;background:rgba(0,212,255,0.1);
-                 padding:2px 6px;border-radius:4px;">latency_weight = 0.15</code>
-  </div>
-</div>""", unsafe_allow_html=True)
-# ─────────────────────────────────────────────────────────
-# Tab 5 — Reward Lab
-# ─────────────────────────────────────────────────────────
-def tab_reward_lab():
-    sec("Interactive Reward Explorer")
-    st.caption("Tune the reward weights and watch each component update live.")
-    col_s, col_c = st.columns([1, 2], gap="large")
-    with col_s:
-        lw = st.slider("Latency Weight",     0.0, 0.50, 0.05, 0.01, key="rl_lw")
-        ep = st.slider("Efficiency Penalty", 0.0, 0.20, 0.05, 0.01, key="rl_ep")
-        fp = st.slider("Failure Penalty",    0.0, 1.00, 0.30, 0.05, key="rl_fp")
-        cw = st.slider("Consistency Bonus",  0.0, 0.50, 0.10, 0.01, key="rl_cw")
-        eb = st.slider("Explanation Bonus",  0.0, 0.20, 0.05, 0.01, key="rl_eb")
-    comps = {
-        "quality_delta":      0.42,
-        "efficiency_penalty": -ep * 2,
-        "failure_penalty":    -fp * 0.3,
-        "recovery_bonus":     0.08,
-        "conflict_penalty":   -0.05,
-        "conflict_bonus":     0.03,
-        "consistency_bonus":  cw * 0.6,
-        "latency_penalty":    -lw * 0.25,
-        "explanation_bonus":  eb,
-    }
-    total = sum(comps.values())
-    sign  = "+" if total >= 0 else ""
-    with col_c:
-        st.plotly_chart(fig_reward_breakdown(comps), use_container_width=True)
-        st.markdown(
-            f'<div style="background:rgba(0,212,255,0.05);border:1px solid rgba(0,212,255,0.18);'
-            f'border-radius:10px;padding:14px 18px;font-size:13px;color:#94a3b8;">'
-            f'Estimated total reward: '
-            f'<span style="color:#00d4ff;font-weight:700;font-size:20px;">{sign}{total:.3f}</span>'
-            f'</div>',
-            unsafe_allow_html=True,
-        )
-# ─────────────────────────────────────────────────────────
-# Tab 6 — Architecture
-# ─────────────────────────────────────────────────────────
-def tab_architecture():
-    obs0 = EpisodeState.observation_dim(6)
-    act0 = 6 + 6
-    c1, c2 = st.columns(2)
-    with c1:
-        sec(f"Observation Space  ({obs0:,} dims)")
-        st.markdown("""
-| Dims | Component |
-|-----:|-----------|
-| 384  | Task embedding (all-MiniLM-L6-v2) |
-| 2304 | Roster embeddings (6 × 384) |
-| 2304 | Called embeddings (6 × 384) |
-| 384  | Scratchpad embedding |
-| 100  | Delegation graph adjacency (10 × 10) |
-| 6    | Called-specialist mask |
-| 8    | Scalar features |
-""")
-    with c2:
-        sec(f"Action Space  ({act0}-dim Box)")
-        st.markdown("""
-| Index  | Component |
-|--------|-----------|
-| [0]    | Meta-action (STOP / CALL / PARALLEL…) |
-| [1:7]  | Specialist selection logits (multi-hot) |
-| [7]    | Delegation mode (SEQ / PAR / FAN-OUT…) |
-| [8:12] | Mode parameters (rounds, threshold…) |
-""")
-    c1, c2, c3 = st.columns(3)
-    with c1:
-        sec("Policy")
-        st.markdown("""
-- **LSTM PPO** (RecurrentPPO)
-- MlpLstmPolicy
-- Hidden: 256 · 1 layer
-- POMDP-safe via LSTM state
-- 4 factored action heads
-""")
-    with c2:
-        sec("Tiered Reward")
-        st.markdown("""
-- **T0** — Structural heuristics
-- **T1** — Cosine embedding sim
-- **T2** — GPT-4o-mini judge
-- **T3** — Full judge (checkpoints)
-- Episode-level tier lock
-""")
-    with c3:
-        sec("Safety")
-        st.markdown("""
-- DAG cycle detection (DFS)
-- Max delegation depth: 2
-- Scratchpad sandbox isolation
-- Injection sanitization
-- Action masking (DAG)
-""")
-    sec("Reward Function")
-    st.code("""total_reward = (
-  quality_delta          # specialist_score − baseline  (same tier)
-− efficiency_penalty     # 0.05 × max(0, n_called − expected)
-− failure_penalty        # 0.3 per timeout,  0.2 per error
-+ recovery_bonus         # +0.1 if fallback succeeded
-− conflict_penalty       # 0.1 per unresolved conflict
-+ conflict_bonus         # 0.05 per resolved conflict
-+ consistency_bonus      # 0.1 × Dirichlet-prior path score
-− latency_penalty        # latency_weight × overage_fraction
-+ explanation_bonus      # 0.05 if delegation is auditable
-)""", language="python")
-# ────────────────────────────────────────────────���────────
-# Tab 7 — Output (Trained Policy)
-# ─────────────────────────────────────────────────────────
-def tab_output():
-    """Run the trained LSTM PPO policy on a custom task and show every specialist's output."""
-    hero()
-    st.markdown(
-        '<div style="font-size:12px;color:#64748b;margin-bottom:16px;">'
-        'Enter any software engineering task. The trained LSTM PPO policy decides which '
-        'specialists to delegate to — each specialist\'s individual output and the collective '
-        'synthesis are shown below.</div>',
-        unsafe_allow_html=True,
-    )
-    col_input, col_ctrl = st.columns([3, 1], gap="large")
-    with col_input:
-        sec("Task")
-        task_input = st.text_area(
-            "Task description",
-            height=110,
-            key="output_task_input",
-            placeholder=(
-                "Build a real-time collaborative code review tool with inline comments, "
-                "role-based access control, GitHub webhook integration, and CI/CD pipeline "
-                "status display. Include authentication with OAuth2."
-            ),
-        )
-    with col_ctrl:
-        sec("Config")
-        out_phase = st.selectbox("Curriculum phase", [1, 2, 3], index=1, key="output_phase")
-        st.markdown('<div style="height:8px"></div>', unsafe_allow_html=True)
-        run_btn = st.button(
-            "🚀 Run Trained Policy",
-            type="primary",
-            use_container_width=True,
-            key="output_run_btn",
-        )
-    if run_btn:
-        _task = (task_input or "").strip()
-        if not _task:
-            st.warning("Please enter a task description.")
-            return
-        with st.spinner("Loading trained model from HF Hub…"):
-            model, obs_mean, obs_var, clip_obs, model_err = _load_trained_model(HF_MODEL_REPO)
-        if model_err:
-            st.error(f"Model load failed: {model_err}")
-            return
-        st.success("Trained policy loaded ✓")
-        with st.spinner("Running episode with trained policy…"):
-            try:
-                env = SpindleFlowEnv(
-                    config_path=CONFIG, catalog_path=CATALOG,
-                    use_real_spindleflow=False, phase=int(out_phase),
-                )
-                # Inject custom task so the env uses the user's input
-                env.task_bank.sample = lambda: _task
-                obs, info = env.reset()
-                task_used = info.get("task", _task)
-                lstm_states    = None
-                episode_starts = np.array([True])
-                done           = False
-                rewards: list[float] = []
-                MIN_SPECIALISTS = 4  # suppress STOP until this many specialists called
-                for _ in range(15):
-                    if done:
-                        break
-                    obs_arr = obs[np.newaxis, :].copy().astype(np.float32)
-                    if obs_mean is not None and obs_var is not None:
-                        obs_arr = np.clip(
-                            (obs_arr - obs_mean) / np.sqrt(obs_var + 1e-8),
-                            -clip_obs, clip_obs,
-                        )
-                    action_batch, lstm_states = model.predict(
-                        obs_arr,
-                        state=lstm_states,
-                        episode_start=episode_starts,
-                        deterministic=True,
-                    )
-                    action = action_batch[0].copy()
-                    called_set = set(env.called_ids)
-                    if len(called_set) < MIN_SPECIALISTS:
-                        # The policy may want to STOP early; when it does, its
-                        # specialist-selection logits are all low/negative so
-                        # simply zeroing action[0] still produces garbage selection.
-                        # Fix: build a fresh action that directly picks the first
-                        # uncalled specialist with a hard positive logit (1.0).
-                        roster = env.active_specialist_ids
-                        uncalled = [sid for sid in roster if sid not in called_set]
-                        if uncalled:
-                            action = np.zeros(env.action_space.shape, dtype=np.float32)
-                            action[0] = 0.0  # MetaAction.CALL_SPECIALIST
-                            idx = roster.index(uncalled[0])
-                            if 1 + idx < len(action):
-                                action[1 + idx] = 1.0
-                    obs, r, term, trunc, _ = env.step(action)
-                    rewards.append(float(r))
-                    done = term or trunc
-                    episode_starts = np.array([done])
-                called  = list(env.called_ids)
-                edges   = [(e.caller_id, e.callee_id)
-                           for e in env.delegation_graph.get_delegation_path()]
-                spawned = list(getattr(env, "spawned_this_episode", []))
-                st.session_state.output_results = {
-                    "task":    task_used,
-                    "rewards": rewards,
-                    "called":  called,
-                    "edges":   edges,
-                    "specialist_results": [
-                        {
-                            "id":         sr.specialist_id,
-                            "output":     sr.output,
-                            "status":     sr.status,
-                            "latency_ms": sr.latency_ms,
-                        }
-                        for sr in env.specialist_results
-                    ],
-                    "spawned": spawned,
-                }
-                # Keep env alive for delegation-graph rendering
-                st.session_state.output_env = env
-                # Persist spawned specialists to shared pool for Specialists tab
-                if "spawned_pool" not in st.session_state:
-                    st.session_state.spawned_pool = []
-                existing_ids = {sp["id"] for sp in st.session_state.spawned_pool}
-                for sid in spawned:
-                    if sid not in existing_ids:
-                        sp_obj = env.registry.get(sid)
-                        if sp_obj:
-                            st.session_state.spawned_pool.append({
-                                "id":                  sid,
-                                "role":                sp_obj.role,
-                                "description":         sp_obj.description,
-                                "complexity_affinity": list(sp_obj.complexity_affinity),
-                                "avg_latency_ms":      sp_obj.avg_latency_ms,
-                                "triggered_by":        task_used[:120],
-                            })
-            except Exception as exc:
-                import traceback
-                st.error(f"Episode failed: {exc}")
-                st.code(traceback.format_exc(), language=None)
-                return
-        st.rerun()
-    # ── Display results ────────────────────────────────────────────────
-    results = st.session_state.get("output_results")
-    env_obj = st.session_state.get("output_env")
-    if results is None:
-        st.markdown(
-            '<div style="color:#334155;font-size:12px;padding:40px;text-align:center;">'
-            'Enter a task and click "Run Trained Policy" to see delegation and specialist outputs.'
-            '</div>',
-            unsafe_allow_html=True,
-        )
-        return
-    # Task banner
-    st.markdown(
-        f'<div style="background:rgba(0,212,255,0.04);'
-        f'border:1px solid rgba(0,212,255,0.18);border-radius:10px;'
-        f'padding:14px 18px;margin:10px 0 16px;">'
-        f'<div style="font-size:9px;font-weight:700;color:#475569;'
-        f'text-transform:uppercase;letter-spacing:1px;margin-bottom:5px;">Task</div>'
-        f'<div style="font-size:13px;color:#e2e8f0;">{_html.escape(results["task"])}</div>'
-        f'</div>',
-        unsafe_allow_html=True,
-    )
-    # Metrics strip
-    total_r = sum(results["rewards"])
-    mc1, mc2, mc3, mc4 = st.columns(4)
-    mc1.metric("Total Reward",       f"{total_r:+.3f}")
-    mc2.metric("Steps",              len(results["rewards"]))
-    mc3.metric("Specialists Called", len(results["called"]))
-    mc4.metric("Auto-Spawned",       len(results["spawned"]))
-    # Orchestrator widget
-    sec("Orchestrator  ·  Delegation Visualization")
-    render_orchestrator({
-        "called":  results["called"],
-        "active":  "",
-        "edges":   results["edges"],
-        "task":    results["task"],
-        "step":    len(results["rewards"]),
-        "mode":    "SEQUENTIAL",
-        "done":    True,
-        "reward":  sum(results["rewards"]),
-        "phase":   int(st.session_state.get("output_phase", 2)),
-        "spawned": results["spawned"],
-    })
-    # Delegation graph
-    sec("Delegation Graph")
-    if env_obj is not None:
-        class _GraphProxy:
-            registry            = env_obj.registry
-            spawned_specialists = results["spawned"]
-            env                 = env_obj
-        st.plotly_chart(
-            fig_delegation_graph(
-                _GraphProxy(),
-                results["called"],
-                results["edges"],
-                highlight_latest=False,
-                spawned_ids=results["spawned"],
-            ),
-            use_container_width=True,
-            key="output_dag",
-        )
-    # Auto-spawn alert
-    if results["spawned"]:
-        st.markdown(
-            '<div style="background:rgba(251,191,36,0.06);'
-            'border:1px solid rgba(251,191,36,0.22);border-radius:10px;'
-            'padding:10px 16px;margin:8px 0;">'
-            '<span style="font-size:10px;font-weight:700;color:#fbbf24;'
-            'text-transform:uppercase;letter-spacing:1px;">⚡ Auto-Spawned: </span>'
-            '<span style="font-size:12px;color:#e2e8f0;">'
-            + ", ".join(results["spawned"])
-            + '</span></div>',
-            unsafe_allow_html=True,
-        )
-    # Individual specialist outputs
-    spec_results = results["specialist_results"]
-    sec(f"Individual Specialist Outputs  ·  {len(spec_results)} called")
-    if not spec_results:
-        st.markdown(
-            '<div style="color:#475569;font-size:12px;padding:16px;'
-            'background:rgba(0,0,0,0.2);border-radius:8px;">'
-            'The policy issued STOP without delegating to any specialists.</div>',
-            unsafe_allow_html=True,
-        )
-    else:
-        for sr in spec_results:
-            sid    = sr["id"]
-            color  = SPEC_COLORS.get(sid, "#7c3aed")
-            ok_clr = "#10b981" if sr["status"] == "success" else "#ef4444"
-            lat    = sr.get("latency_ms", 0)
-            label  = (
-                f"🤖  {sid.replace('_', ' ').title()}"
-                f"  ·  {sr['status']}  ·  {lat:.0f} ms"
-            )
-            with st.expander(label, expanded=True):
-                st.markdown(
-                    f'<div style="border-left:3px solid {color};'
-                    f'padding:4px 0 4px 12px;margin-bottom:8px;">'
-                    f'<span style="font-size:10px;color:{color};font-weight:700;">{sid}</span>'
-                    f'<span style="font-size:10px;color:#475569;">  ·  status: </span>'
-                    f'<span style="font-size:10px;color:{ok_clr};">{sr["status"]}</span>'
-                    f'<span style="font-size:10px;color:#475569;">  ·  {lat:.0f} ms</span>'
-                    f'</div>',
-                    unsafe_allow_html=True,
-                )
-                st.code(sr["output"] or "(no output)", language=None)
-    # Synthesized / collective output
-    sec("Synthesized Output  ·  Collective Response")
-    st.caption("All specialist outputs combined — this is what the orchestrator received.")
-    if spec_results:
-        parts = [
-            f"{'─'*52}\n[{sr['id'].upper()}]\n{'─'*52}\n{sr['output'] or '(empty)'}"
-            for sr in spec_results
-        ]
-        synthesis = "\n\n".join(parts)
-    else:
-        synthesis = "(no specialists called — policy chose STOP on first step)"
-    st.code(synthesis, language=None)
-# ─────────────────────────────────────────────────────────
-# Entry point
-# ─────────────────────────────────────────────────────────
-def main():
-    inject_css()
-    S = _S()
-    render_live_stats(S)
-    t1, t2, t3, t4, t5, t6, t7 = st.tabs([
-        "🎯 Output",
-        "⚡ Training Interface Example",
-        "🤖 Specialists",
-        "📈 Training",
-        "🔍 Quality Demo",
-        "🧪 Reward Lab",
-        "🏗 Architecture",
-    ])
-    with t1: tab_output()
-    with t2: tab_live_demo()
-    with t3: tab_specialists()
-    with t4: tab_training()
-    with t5: tab_quality()
-    with t6: tab_reward_lab()
-    with t7: tab_architecture()
-# Guard allows safe imports for testing without triggering the UI.
-# Streamlit runs scripts with __name__ == "__main__".
-if __name__ == "__main__":
-    main()

+import streamlit as st
+import sys, os, traceback
 from pathlib import Path
+import importlib.util
+root = Path(__file__).resolve().parent
+sys.path.insert(0, str(root))
+os.chdir(str(root))
 try:
+    demo_file = root / "demo" / "streamlit_app.py"
+    spec = importlib.util.spec_from_file_location("spindleflow_demo", str(demo_file))
+    mod  = importlib.util.module_from_spec(spec)
+    mod.__file__ = str(demo_file)          # demo's own sys.path logic resolves correctly
+    sys.modules["spindleflow_demo"] = mod
+    spec.loader.exec_module(mod)           # runs demo/streamlit_app.py in its own context
+    mod.main()
+except SystemExit:
+    pass
+except BaseException as e:
+    st.error(f"SpindleFlow failed to load: {e}")
+    st.code(traceback.format_exc())

fix: importlib wrapper with correct __file__; remove garbage files

fix: importlib wrapper with correct file; remove garbage files