import React, { useState, useEffect, useRef, useCallback } from 'react' // Relative URL = same origin. Works on HF Spaces (frontend+backend on :7860) // and locally via the Vite proxy defined in vite.config.js. const API = '' // ── Tiny SVG line chart ──────────────────────────────────────────────────────── function LineChart({ data, color = '#6366f1', height = 130, label }) { if (!data.length) return (
{label || 'Waiting for data…'}
) const W = Math.max(data.length * 18, 260) const lo = Math.min(...data) const hi = Math.max(...data) const span = hi === lo ? 1 : hi - lo const py = v => (height - 16) - ((v - lo) / span) * (height - 28) + 8 const pts = data.map((v, i) => `${i * 18 + 9},${py(v)}`).join(' ') return ( {/* zero line */} {/* fill area */} `${i * 18 + 9},${py(v)}`), `${(data.length - 1) * 18 + 9},${height}`].join(' ')} fill={color + '18'} stroke="none" /> {/* line */} {/* last point dot */} {data.length > 0 && ( )} ) } // Dual line chart (energy + stress) function DualChart({ energy, stress, height = 130 }) { const data = energy.length ? energy : [] if (!data.length) return (
Waiting for data…
) const W = Math.max(data.length * 18, 260) const py = (v, lo, hi) => { const span = hi === lo ? 1 : hi - lo return (height - 16) - ((v - lo) / span) * (height - 28) + 8 } const loE = Math.min(...energy, 0), hiE = Math.max(...energy, 1) const loS = Math.min(...stress, 0), hiS = Math.max(...stress, 1) const ePts = energy.map((v, i) => `${i * 18 + 9},${py(v, loE, hiE)}`).join(' ') const sPts = stress.map((v, i) => `${i * 18 + 9},${py(v, loS, hiS)}`).join(' ') return ( ) } // ── Tiny components ──────────────────────────────────────────────────────────── function Chip({ label, value, color, bg }) { return (
{label}
{value}
) } function Tag({ children, color, bg }) { return ( {children} ) } const PRI_STYLE = { critical: { color: '#dc2626', bg: '#fef2f2' }, high: { color: '#d97706', bg: '#fffbeb' }, normal: { color: '#16a34a', bg: '#f0fdf4' }, low: { color: '#64748b', bg: '#f8fafc' }, } // ── Scoring Formula Card ─────────────────────────────────────────────────────── const FORMULA_ITEMS = [ { key:'completion', label:'Task Completion', weight:0.60, color:'#6366f1', desc:'Fraction of tasks fully completed, weighted by priority' }, { key:'deadline', label:'Deadline Adherence', weight:0.22, color:'#0ea5e9', desc:'Bonus for finishing before deadline; penalty for missing it' }, { key:'energy', label:'Energy Efficiency', weight:0.10, color:'#22c55e', desc:'Penalises high worker fatigue and stress spikes' }, { key:'dependency', label:'Dependency Bonus', weight:0.05, color:'#f59e0b', desc:'Reward for respecting task dependency order' }, { key:'interrupt', label:'Interruption Bonus', weight:0.03, color:'#f43f5e', desc:'Reward for minimising context-switching interruptions' }, ] function ScoringFormulaCard() { return (
Reward Scoring Formula
Each action is scored on 5 dimensions. Weights reflect cognitive-load research priorities.
{/* Stacked weight bar */}
Weight distribution
{FORMULA_ITEMS.map(it => (
{it.weight >= 0.10 && ( {(it.weight * 100).toFixed(0)}% )}
))}
{FORMULA_ITEMS.map(it => (
{it.weight >= 0.08 ? it.label.split(' ')[0] : ''}
))}
{/* Component cards */}
{FORMULA_ITEMS.map(it => (
×{it.weight.toFixed(2)}
{it.label}
{it.desc}
))}
{/* Formula expression */}
Formula
score = {FORMULA_ITEMS.map((it, idx) => ( {it.key === 'completion' ? 'completion' : it.key === 'deadline' ? 'deadline' : it.key === 'energy' ? 'energy' : it.key === 'dependency' ? 'dep' : 'interrupt'}×{it.weight} {idx < FORMULA_ITEMS.length - 1 && + } ))} ∈ (0.01, 0.99)
) } // ── Training Performance Card (data from CLM_GRPO_Training.ipynb) ────────────── // Mean reward sampled at training checkpoints from the GRPO training run. const TRAINING_CURVE = [ { step: 1, reward: 0.1006 }, { step: 50, reward: 0.0882 }, { step: 100, reward: 0.1192 }, { step: 200, reward: 0.1010 }, { step: 300, reward: 0.1383 }, { step: 400, reward: 0.1700 }, { step: 500, reward: 0.2201 }, { step: 600, reward: 0.2542 }, { step: 700, reward: 0.2595 }, { step: 800, reward: 0.2648 }, { step: 900, reward: 0.2345 }, { step: 1000, reward: 0.2648 }, ] // Per-action mean reward measured before training (heuristic baseline) and // after 1000 steps of GRPO training. const ACTION_COMPARISON = [ { action: 'Work', before: 0.045, after: 0.102 }, { action: 'Focus', before: 0.124, after: 0.249 }, { action: 'Break', before: 0.030, after: 0.030 }, { action: 'Delay', before: 0.010, after: 0.010 }, ] function TrainingLineChart({ data, height = 160 }) { const W = 720 const padL = 36, padR = 16, padT = 18, padB = 28 const innerW = W - padL - padR const innerH = height - padT - padB const xs = data.map(d => d.step) const ys = data.map(d => d.reward) const xLo = Math.min(...xs), xHi = Math.max(...xs) const yLo = Math.min(0, ...ys), yHi = Math.max(...ys, 0.3) const px = v => padL + ((v - xLo) / (xHi - xLo || 1)) * innerW const py = v => padT + innerH - ((v - yLo) / (yHi - yLo || 1)) * innerH const pts = data.map(d => `${px(d.step)},${py(d.reward)}`).join(' ') return ( {/* Y axis grid */} {[0, 0.1, 0.2, 0.3].map(g => ( {g.toFixed(2)} ))} {/* X axis labels */} {[0, 250, 500, 750, 1000].map(s => ( step {s} ))} {/* Area + line */} `${px(d.step)},${py(d.reward)}`), `${px(xHi)},${py(yLo)}`].join(' ')} fill="#6366f122" stroke="none" /> {data.map(d => ( ))} ) } function ActionComparisonChart({ data, height = 200 }) { const W = 520 const padL = 60, padR = 16, padT = 18, padB = 36 const innerW = W - padL - padR const innerH = height - padT - padB const yMax = Math.max(...data.flatMap(d => [d.before, d.after]), 0.3) * 1.1 const groupW = innerW / data.length const barW = (groupW - 18) / 2 return ( {/* Y grid */} {[0, 0.1, 0.2, 0.3].map(g => { const y = padT + innerH - (g / yMax) * innerH return ( {g.toFixed(2)} ) })} {data.map((d, i) => { const gx = padL + i * groupW + 9 const hBefore = (d.before / yMax) * innerH const hAfter = (d.after / yMax) * innerH return ( {d.before.toFixed(3)} {d.after.toFixed(3)} {d.action} ) })} ) } function TrainingPerformanceCard() { const first = TRAINING_CURVE[0].reward const last = TRAINING_CURVE[TRAINING_CURVE.length - 1].reward const lift = (((last - first) / (Math.abs(first) || 1)) * 100).toFixed(0) return (
Training Performance — Before vs After GRPO
Measured during 1000-step GRPO fine-tuning of the policy on the CLM environment
Before
{first.toFixed(3)}
After
{last.toFixed(3)}
Lift
+{lift}%
Mean Reward Across Training Steps
Per-Action Reward Before   After
) } // ── Main Dashboard ───────────────────────────────────────────────────────────── export default function Dashboard() { // ── mode: 'stream' (auto-play via SSE) | 'manual' (interactive) ──────────── const [mode, setMode] = useState('stream') const [difficulty, setDiff] = useState('medium') const diffRef = useRef('medium') useEffect(() => { diffRef.current = difficulty }, [difficulty]) // ── stream state ────────────────────────────────────────────────────────── const [streaming, setStreaming] = useState(false) const [streamDone, setStreamDone] = useState(false) const [currentStep, setCurrentStep] = useState(null) const [rewardTrace, setRwTrace] = useState([]) const [energyTrace, setEnTrace] = useState([]) const [stressTrace, setStTrace] = useState([]) const [tasks, setTasks] = useState([]) const [driftAlerts, setDrift] = useState([]) const [finalScore, setFinal] = useState(null) const [currentAction, setAction] = useState(null) const [episodeCount, setEpCount] = useState(0) // history of scores across replays: [{ep, score, difficulty, steps}] const [history, setHistory] = useState([]) const esRef = useRef(null) const replayTimer = useRef(null) // ── manual state ────────────────────────────────────────────────────────── const [sessionId, setSession] = useState(null) const [obs, setObs] = useState(null) const [manLogs, setManLogs] = useState([]) const [manRewards, setManRw] = useState([]) const [manDone, setManDone] = useState(false) const [loading, setLoading] = useState(false) const [error, setError] = useState(null) const logRef = useRef(null) // ── SSE streaming ───────────────────────────────────────────────────────── const startStream = useCallback((diff) => { const d = (typeof diff === 'string' && diff) ? diff : diffRef.current if (esRef.current) { esRef.current.close(); esRef.current = null } if (replayTimer.current) { clearTimeout(replayTimer.current); replayTimer.current = null } // Reset per-episode state (keep history) setStreaming(true); setStreamDone(false); setFinal(null) setCurrentStep(null); setAction(null) setRwTrace([]); setEnTrace([]); setStTrace([]) setTasks([]); setDrift([]) setEpCount(prev => prev + 1) // Tracks whether this episode finished cleanly so onerror can ignore // the connection-close the browser fires after the server ends the stream. const episodeDone = { current: false } const es = new EventSource(`${API}/stream/run?difficulty=${d}&delay_ms=350`) esRef.current = es es.onmessage = (ev) => { let msg try { msg = JSON.parse(ev.data) } catch { return } if (!msg || typeof msg !== 'object') return const num = (v, fallback = 0) => (typeof v === 'number' && !isNaN(v) ? v : fallback) if (msg.type === 'reset') { setTasks(Array.isArray(msg.tasks) ? msg.tasks : []) setEnTrace([num(msg.energy, 1)]) setStTrace([num(msg.stress, 0)]) } if (msg.type === 'step') { setCurrentStep(num(msg.step, 0)) setAction(msg.action || null) setTasks(Array.isArray(msg.tasks) ? msg.tasks : []) setRwTrace(prev => [...prev, num(msg.reward)]) setEnTrace(prev => [...prev, num(msg.energy, 1)]) setStTrace(prev => [...prev, num(msg.stress, 0)]) if (msg.schema_drift) setDrift(prev => [...prev, msg.schema_drift]) if (msg.done) { episodeDone.current = true const score = typeof msg.final_score === 'number' ? msg.final_score : null setFinal(score) setStreamDone(true) setStreaming(false) setHistory(prev => [ { ep: prev.length + 1, score, difficulty: d, steps: num(msg.step, 0) }, ...prev.slice(0, 9), ]) es.close(); esRef.current = null } } if (msg.type === 'error') { setError(msg.message || 'Unknown error') setStreaming(false) es.close(); esRef.current = null } } es.onerror = () => { // When the server closes the stream after a clean episode end, the browser // fires onerror. Ignore it — only show an error for genuine disconnects. if (episodeDone.current) return setError('Stream disconnected. Check backend is running, then press Play again.') setStreaming(false) es.close(); esRef.current = null } }, []) // stable — reads difficulty through diffRef, never needs to be recreated const stopStream = () => { if (esRef.current) { esRef.current.close(); esRef.current = null } if (replayTimer.current) { clearTimeout(replayTimer.current); replayTimer.current = null } setStreaming(false) } // Cleanup on unmount only useEffect(() => () => { if (esRef.current) esRef.current.close() if (replayTimer.current) clearTimeout(replayTimer.current) }, []) // ── Manual episode helpers ──────────────────────────────────────────────── const handleReset = async () => { setLoading(true); setError(null); setManRw([]); setManLogs([]); setManDone(false) try { const r = await fetch(`${API}/reset`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ task_id: difficulty }), }) if (!r.ok) throw new Error(`HTTP ${r.status}`) const d = await r.json() setSession(d.session_id); setObs(d.observation) setManLogs([{ type: 'system', msg: `Episode started (${difficulty})` }]) } catch (err) { setError(err.message) } finally { setLoading(false) } } const handleAction = async (type, taskId = null) => { if (!sessionId) return setLoading(true) const action = { type }; if (taskId) action.task_id = taskId try { const r = await fetch(`${API}/step`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ session_id: sessionId, action }), }) if (!r.ok) throw new Error(`HTTP ${r.status}`) const d = await r.json() setObs(d.observation) setManRw(prev => [...prev, d.reward]) setManLogs(prev => [...prev, { type: d.reward >= 0 ? 'pos' : 'neg', msg: `${type}${taskId ? ' ' + taskId : ''} → ${d.reward?.toFixed(3)}`, }]) if (d.done) { setManLogs(prev => [...prev, { type: 'system', msg: `Done. Final score: ${d.info?.final_score?.toFixed(4) ?? 'N/A'}`, }]) setSession(null) setManDone(true) } setTimeout(() => { if (logRef.current) logRef.current.scrollTop = logRef.current.scrollHeight }, 50) } catch (err) { setError(err.message) } finally { setLoading(false) } } // ── Derived stream metrics ──────────────────────────────────────────────── const totalReward = rewardTrace.reduce((s, v) => s + v, 0) const lastEnergy = energyTrace[energyTrace.length - 1] ?? null const lastStress = stressTrace[stressTrace.length - 1] ?? null const tasksDone = tasks.filter(t => t.progress >= 1.0).length const manTasks = obs?.tasks || [] const manWorkers = obs?.visible_state?.workers || [] const manW0 = manWorkers[0] || {} // ── card ────────────────────────────────────────────────────────────────── const card = (extra = {}) => ({ background: '#fff', border: '1px solid #e2e8f0', borderRadius: 14, padding: 16, ...extra, }) const section = { fontSize: 10, fontWeight: 700, color: '#94a3b8', textTransform: 'uppercase', letterSpacing: '.08em', marginBottom: 10 } return (
{/* ── Top controls ───────────────────────────────────────────────────── */}
{/* Mode toggle */}
{[['stream', 'Auto-Play'], ['manual', 'Manual']].map(([id, lbl]) => ( ))}
{/* Difficulty */} {/* Action button */} {mode === 'stream' ? ( streaming ? : !streamDone && ( ) ) : ( !manDone && ( ) )} {streaming && ( ● LIVE · Episode #{episodeCount} )} {streamDone && ( Episode #{episodeCount} Complete {typeof finalScore === 'number' ? ` · Score: ${finalScore.toFixed(4)}` : ''} )}
{/* ── Error banner ───────────────────────────────────────────────────── */} {error && (
{error} 
)} {/* ── Schema drift alerts ────────────────────────────────────────────── */} {driftAlerts.map((d, i) => (
Schema Drift @ step {d.step}: {d.message}
))} {/* ═══════════════════ STREAM MODE ═══════════════════════════════════ */} {mode === 'stream' && ( <> {/* Episode complete banner */} {streamDone && (
Episode #{episodeCount} Complete
Final results frozen below — all charts and task data preserved
{typeof finalScore === 'number' && (
Final Score
{finalScore.toFixed(4)}
)}
)} {/* Metric chips */} {(streaming || streamDone) && (
= 0 ? '#16a34a' : '#ef4444'} /> 0.7 ? '#ef4444' : lastStress > 0.4 ? '#f59e0b' : '#22c55e'} /> {currentAction && ( )}
)} {/* Charts row */}
{/* Reward curve */}
Reward / Step
{rewardTrace.length > 0 && (
Min: {Math.min(...rewardTrace).toFixed(3)} Max: {Math.max(...rewardTrace).toFixed(3)} Steps: {rewardTrace.length}
)}
{/* Energy + Stress dual chart */}
Energy & Stress Energy  Stress
{energyTrace.length > 0 && (
Min E: {Math.min(...energyTrace).toFixed(2)} Max S: {Math.max(...stressTrace).toFixed(2)}
)}
{/* Task completion */}
Task Progress
{tasks.length === 0 && (
Episode not started
)} {tasks.map(t => { const ps = PRI_STYLE[t.priority] || PRI_STYLE.normal return (
{t.task_type} #{t.id} {t.priority}
= 1 ? '#22c55e' : '#6366f1', transition: 'width .3s ease', }} />
{(t.progress * 100).toFixed(0)}% {t.deadline ? ` · deadline: step ${t.deadline}` : ''} {t.is_interrupted ? ' (interrupted)' : ''}
) })}
{/* Cumulative reward + episode history */} {rewardTrace.length > 0 && (
Cumulative Reward — Episode #{episodeCount}
{ acc.push((acc[acc.length - 1] || 0) + v); return acc }, [])} color="#0ea5e9" height={80} />
{history.length > 0 && (
Episode History
{history.map(h => { const sc = typeof h.score === 'number' ? h.score : null const col = sc == null ? '#64748b' : sc >= 0.5 ? '#16a34a' : sc >= 0.3 ? '#f59e0b' : '#ef4444' const diff = typeof h.difficulty === 'string' ? h.difficulty : '—' const steps = typeof h.steps === 'number' ? h.steps : 0 return (
#{h.ep} {diff} {sc != null ? sc.toFixed(4) : '—'} {steps}s
) })}
)}
)} )} {/* ═══════════════════ MANUAL MODE ════════════════════════════════════ */} {mode === 'manual' && ( <> {/* Manual episode complete banner */} {manDone && (
Manual Episode Complete
All results frozen — task log and reward chart preserved below
)} {/* Worker metric chips */} {(manWorkers.length > 0 || manDone) && obs && (
t.progress >= 1).length}/${manTasks.length}`} color="#0ea5e9" /> s + v, 0).toFixed(2)} color={manRewards.reduce((s, v) => s + v, 0) >= 0 ? '#16a34a' : '#ef4444'} />
)}
{/* Task list */}
Task Queue
{manTasks.length === 0 && (
Press Start to begin
)} {manTasks.map(task => { const ps = PRI_STYLE[task.priority] || PRI_STYLE.normal return (
{task.task_type}  #{task.id}
{task.deadline ? `deadline: ${task.deadline}` : 'no deadline'} {task.depends_on ? ` · depends: ${task.depends_on}` : ''}
= 1 ? '#22c55e' : '#6366f1', transition: 'width .25s' }} />
{task.priority} {sessionId && task.progress < 1.0 && (
)}
) })} {sessionId && (
)}
{/* Reward chart */}
Reward / Step
{manRewards.length > 0 && (
{[ { l: 'Total', v: manRewards.reduce((s,v)=>s+v,0).toFixed(3) }, { l: 'Mean', v: (manRewards.reduce((s,v)=>s+v,0)/manRewards.length).toFixed(3) }, { l: 'Steps', v: manRewards.length }, ].map(s => (
{s.l}
{s.v}
))}
)}
{/* Action log */}
Action Log
{manLogs.length === 0 && ( No actions yet… )} {manLogs.map((l, i) => (
[{i}] {l.msg}
))}
)} {/* ── Training performance — before vs after GRPO ── */}
{/* ── Reward scoring formula — always visible ── */}
) }