import React, { useState, useEffect, useRef, useCallback } from 'react'
// Relative URL = same origin. Works on HF Spaces (frontend+backend on :7860)
// and locally via the Vite proxy defined in vite.config.js.
const API = ''
// ── Tiny SVG line chart ────────────────────────────────────────────────────────
function LineChart({ data, color = '#6366f1', height = 130, label }) {
if (!data.length) return (
Reward Scoring Formula
Each action is scored on 5 dimensions. Weights reflect cognitive-load research priorities.
{/* Stacked weight bar */}
Weight distribution
{FORMULA_ITEMS.map(it => (
{it.weight >= 0.10 && (
{(it.weight * 100).toFixed(0)}%
)}
))}
{FORMULA_ITEMS.map(it => (
{it.weight >= 0.08 ? it.label.split(' ')[0] : ''}
))}
{/* Component cards */}
{FORMULA_ITEMS.map(it => (
×{it.weight.toFixed(2)}
{it.label}
{it.desc}
))}
{/* Formula expression */}
Formula
score =
{FORMULA_ITEMS.map((it, idx) => (
{it.key === 'completion' ? 'completion' :
it.key === 'deadline' ? 'deadline' :
it.key === 'energy' ? 'energy' :
it.key === 'dependency' ? 'dep' : 'interrupt'}×{it.weight}
{idx < FORMULA_ITEMS.length - 1 &&
+ }
))}
∈ (0.01, 0.99)
)
}
// ── Training Performance Card (data from CLM_GRPO_Training.ipynb) ──────────────
// Mean reward sampled at training checkpoints from the GRPO training run.
const TRAINING_CURVE = [
{ step: 1, reward: 0.1006 },
{ step: 50, reward: 0.0882 },
{ step: 100, reward: 0.1192 },
{ step: 200, reward: 0.1010 },
{ step: 300, reward: 0.1383 },
{ step: 400, reward: 0.1700 },
{ step: 500, reward: 0.2201 },
{ step: 600, reward: 0.2542 },
{ step: 700, reward: 0.2595 },
{ step: 800, reward: 0.2648 },
{ step: 900, reward: 0.2345 },
{ step: 1000, reward: 0.2648 },
]
// Per-action mean reward measured before training (heuristic baseline) and
// after 1000 steps of GRPO training.
const ACTION_COMPARISON = [
{ action: 'Work', before: 0.045, after: 0.102 },
{ action: 'Focus', before: 0.124, after: 0.249 },
{ action: 'Break', before: 0.030, after: 0.030 },
{ action: 'Delay', before: 0.010, after: 0.010 },
]
function TrainingLineChart({ data, height = 160 }) {
const W = 720
const padL = 36, padR = 16, padT = 18, padB = 28
const innerW = W - padL - padR
const innerH = height - padT - padB
const xs = data.map(d => d.step)
const ys = data.map(d => d.reward)
const xLo = Math.min(...xs), xHi = Math.max(...xs)
const yLo = Math.min(0, ...ys), yHi = Math.max(...ys, 0.3)
const px = v => padL + ((v - xLo) / (xHi - xLo || 1)) * innerW
const py = v => padT + innerH - ((v - yLo) / (yHi - yLo || 1)) * innerH
const pts = data.map(d => `${px(d.step)},${py(d.reward)}`).join(' ')
return (
{/* ── Top controls ───────────────────────────────────────────────────── */}
{/* Mode toggle */}
{[['stream', 'Auto-Play'], ['manual', 'Manual']].map(([id, lbl]) => (
setMode(id)}
style={{ padding: '7px 14px', borderRadius: 8, border: 'none',
background: mode === id ? '#fff' : 'transparent',
fontWeight: 600, fontSize: 13, cursor: 'pointer',
boxShadow: mode === id ? '0 1px 3px rgba(0,0,0,.15)' : 'none',
color: mode === id ? '#0f172a' : '#64748b',
}}>{lbl}
))}
{/* Difficulty */}
setDiff(e.target.value)}
style={{ border: '1px solid #e2e8f0', borderRadius: 8,
padding: '8px 12px', fontSize: 13, background: '#fff' }}>
{['easy','medium','hard','expert'].map(l => (
{l.charAt(0).toUpperCase()+l.slice(1)}
))}
{/* Action button */}
{mode === 'stream' ? (
streaming
?
Stop
: !streamDone && (
startStream()}
style={{ background: '#6366f1', color: '#fff', border: 'none',
borderRadius: 8, padding: '8px 20px', fontWeight: 700,
fontSize: 13, cursor: 'pointer' }}>
Play Episode
)
) : (
!manDone && (
{loading ? 'Loading…' : sessionId ? 'Reset' : 'Start'}
)
)}
{streaming && (
● LIVE · Episode #{episodeCount}
)}
{streamDone && (
Episode #{episodeCount} Complete
{typeof finalScore === 'number' ? ` · Score: ${finalScore.toFixed(4)}` : ''}
)}
{/* ── Error banner ───────────────────────────────────────────────────── */}
{error && (
{error}
setError(null)}
style={{ background: 'none', border: 'none', cursor: 'pointer',
color: '#dc2626', fontWeight: 700 }}>x
)}
{/* ── Schema drift alerts ────────────────────────────────────────────── */}
{driftAlerts.map((d, i) => (
Schema Drift @ step {d.step}: {d.message}
))}
{/* ═══════════════════ STREAM MODE ═══════════════════════════════════ */}
{mode === 'stream' && (
<>
{/* Episode complete banner */}
{streamDone && (
Episode #{episodeCount} Complete
Final results frozen below — all charts and task data preserved
{typeof finalScore === 'number' && (
Final Score
{finalScore.toFixed(4)}
)}
)}
{/* Metric chips */}
{(streaming || streamDone) && (
= 0 ? '#16a34a' : '#ef4444'} />
0.7 ? '#ef4444' : lastStress > 0.4 ? '#f59e0b' : '#22c55e'} />
{currentAction && (
)}
)}
{/* Charts row */}
{/* Reward curve */}
Reward / Step
{rewardTrace.length > 0 && (
Min: {Math.min(...rewardTrace).toFixed(3)}
Max: {Math.max(...rewardTrace).toFixed(3)}
Steps: {rewardTrace.length}
)}
{/* Energy + Stress dual chart */}
Energy & Stress
─ Energy
╌ Stress
{energyTrace.length > 0 && (
Min E: {Math.min(...energyTrace).toFixed(2)}
Max S: {Math.max(...stressTrace).toFixed(2)}
)}
{/* Task completion */}
Task Progress
{tasks.length === 0 && (
Episode not started
)}
{tasks.map(t => {
const ps = PRI_STYLE[t.priority] || PRI_STYLE.normal
return (
{t.task_type} #{t.id}
{t.priority}
= 1 ? '#22c55e' : '#6366f1',
transition: 'width .3s ease',
}} />
{(t.progress * 100).toFixed(0)}%
{t.deadline ? ` · deadline: step ${t.deadline}` : ''}
{t.is_interrupted ? ' (interrupted)' : ''}
)
})}
{/* Cumulative reward + episode history */}
{rewardTrace.length > 0 && (
Cumulative Reward — Episode #{episodeCount}
{
acc.push((acc[acc.length - 1] || 0) + v); return acc
}, [])}
color="#0ea5e9" height={80}
/>
{history.length > 0 && (
Episode History
{history.map(h => {
const sc = typeof h.score === 'number' ? h.score : null
const col = sc == null ? '#64748b'
: sc >= 0.5 ? '#16a34a' : sc >= 0.3 ? '#f59e0b' : '#ef4444'
const diff = typeof h.difficulty === 'string' ? h.difficulty : '—'
const steps = typeof h.steps === 'number' ? h.steps : 0
return (
#{h.ep}
{diff}
{sc != null ? sc.toFixed(4) : '—'}
{steps}s
)
})}
)}
)}
>
)}
{/* ═══════════════════ MANUAL MODE ════════════════════════════════════ */}
{mode === 'manual' && (
<>
{/* Manual episode complete banner */}
{manDone && (
Manual Episode Complete
All results frozen — task log and reward chart preserved below
)}
{/* Worker metric chips */}
{(manWorkers.length > 0 || manDone) && obs && (
t.progress >= 1).length}/${manTasks.length}`}
color="#0ea5e9" />
s + v, 0).toFixed(2)}
color={manRewards.reduce((s, v) => s + v, 0) >= 0 ? '#16a34a' : '#ef4444'} />
)}
{/* Task list */}
Task Queue
{manTasks.length === 0 && (
Press Start to begin
)}
{manTasks.map(task => {
const ps = PRI_STYLE[task.priority] || PRI_STYLE.normal
return (
{task.task_type}
#{task.id}
{task.deadline ? `deadline: ${task.deadline}` : 'no deadline'}
{task.depends_on ? ` · depends: ${task.depends_on}` : ''}
= 1 ? '#22c55e' : '#6366f1',
transition: 'width .25s' }} />
{task.priority}
{sessionId && task.progress < 1.0 && (
handleAction('work', task.id)}
style={{ fontSize: 11, padding: '4px 9px', borderRadius: 6,
border: '1px solid #e2e8f0', background: '#f8fafc',
cursor: 'pointer', fontWeight: 600 }}>work
handleAction('focus', task.id)}
style={{ fontSize: 11, padding: '4px 9px', borderRadius: 6,
border: '1px solid #6366f1', background: '#eef2ff',
color: '#6366f1', cursor: 'pointer', fontWeight: 600 }}>focus
)}
)
})}
{sessionId && (
handleAction('break')}
style={{ flex: 1, padding: 9, borderRadius: 8,
border: '1px solid #e2e8f0', background: '#f0fdf4',
color: '#16a34a', fontWeight: 700, cursor: 'pointer', fontSize: 13 }}>
Break
handleAction('delay')}
style={{ flex: 1, padding: 9, borderRadius: 8,
border: '1px solid #e2e8f0', background: '#f8fafc',
color: '#64748b', fontWeight: 700, cursor: 'pointer', fontSize: 13 }}>
Delay
)}
{/* Reward chart */}
Reward / Step
{manRewards.length > 0 && (
{[
{ l: 'Total', v: manRewards.reduce((s,v)=>s+v,0).toFixed(3) },
{ l: 'Mean', v: (manRewards.reduce((s,v)=>s+v,0)/manRewards.length).toFixed(3) },
{ l: 'Steps', v: manRewards.length },
].map(s => (
))}
)}
{/* Action log */}
Action Log
{manLogs.length === 0 && (
No actions yet…
)}
{manLogs.map((l, i) => (
[{i}] {l.msg}
))}
>
)}
{/* ── Training performance — before vs after GRPO ── */}
{/* ── Reward scoring formula — always visible ── */}
)
}