| <!DOCTYPE html> |
| <html lang="en"> |
|
|
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>OpenEnv Dashboard</title> |
| <script src="https://cdn.tailwindcss.com"></script> |
| <script src="https://cdn.jsdelivr.net/npm/chart.js"></script> |
| <script> |
| tailwind.config = { |
| theme: { |
| extend: { |
| colors: { |
| bgmain: '#0a0a0a', |
| cardbg: '#141414', |
| borderc: '#262626', |
| accent: '#ffb380', |
| accenthover: '#ffc8a3', |
| textprim: '#e5e5e5', |
| textsec: '#737373', |
| }, |
| fontFamily: { |
| mono: ['ui-monospace', 'SFMono-Regular', 'Menlo', 'Monaco', 'Consolas', 'monospace'], |
| sans: ['Inter', 'ui-sans-serif', 'system-ui', '-apple-system', 'sans-serif'], |
| } |
| } |
| } |
| } |
| </script> |
| <style> |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap'); |
| |
| body { |
| font-family: 'Inter', sans-serif; |
| background-color: #0a0a0a; |
| color: #e5e5e5; |
| } |
| |
| .custom-scrollbar::-webkit-scrollbar { |
| width: 6px; |
| } |
| |
| .custom-scrollbar::-webkit-scrollbar-track { |
| background: #141414; |
| } |
| |
| .custom-scrollbar::-webkit-scrollbar-thumb { |
| background: #262626; |
| border-radius: 4px; |
| } |
| </style> |
| </head> |
|
|
| <body class="h-screen flex overflow-hidden"> |
|
|
| |
| <div class="w-72 border-r border-borderc bg-bgmain flex flex-col p-5 shrink-0"> |
| <div class="flex items-center gap-3 mb-10"> |
| <div class="w-6 h-6 bg-accent rounded-md flex items-center justify-center"> |
| <span class="text-bgmain text-xs font-bold shadow-lg">OE</span> |
| </div> |
| <div class="text-sm"> |
| <span class="font-bold text-textprim block">OpenEnv</span> |
| <span class="text-textsec text-xs">dynamic guardrail console</span> |
| </div> |
| </div> |
|
|
| <div class="mb-8"> |
| <h3 class="text-xs font-bold text-textsec uppercase tracking-widest mb-3">Data Explorer</h3> |
| <div class="w-full bg-bgmain border border-borderc text-textprim text-sm rounded p-2 mb-2 font-mono"> |
| [LIVE_STREAM] metrics.jsonl |
| </div> |
| <p class="text-xs text-textsec mb-4">Streaming 120-step telemetry direct from GRPO Trainer.</p> |
| </div> |
|
|
| <div class="mt-auto border-t border-borderc pt-4"> |
| <h3 class="text-xs font-bold text-textsec uppercase tracking-widest mb-3">Proxy Integration</h3> |
| <div class="flex items-center gap-2"> |
| <span class="relative flex h-2 w-2"> |
| <span |
| class="animate-ping absolute inline-flex h-full w-full rounded-full bg-accent opacity-75"></span> |
| <span class="relative inline-flex rounded-full h-2 w-2 bg-accent"></span> |
| </span> |
| <span id="sse-status" class="text-xs text-textprim">Monitoring SSE loop...</span> |
| </div> |
| </div> |
| </div> |
|
|
| |
| <div class="flex-1 flex flex-col overflow-y-auto custom-scrollbar p-8 bg-[url('data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI4IiBoZWlnaHQ9IjgiPgo8cmVjdCB3aWR0aD0iOCIgaGVpZ2h0PSI4IiBmaWxsPSIjMGEwYTBhIj48L3JlY3Q+CjxwYXRoIGQ9Ik0wIDBMOCA4Wk04IDBMMCA4WiIgc3Ryb2tlPSIjMTExIiBzdHJva2Utd2lkdGg9IjEuNSI+PC9wYXRoPgo8L3N2Zz4=')] bg-opacity-10" |
| style="background-blend-mode: overlay;"> |
|
|
| |
| <div class="flex justify-between items-end mb-6 bg-bgmain p-4 rounded-xl border border-borderc shadow-lg"> |
| <div> |
| <h1 class="text-2xl font-semibold text-textprim" id="header-title">Optimized Agent | 50 Steps</h1> |
| <p class="text-sm text-textsec mt-1">Multi-Objective Log-Barrier Reward Surface</p> |
| </div> |
| <div |
| class="px-3 py-1 border border-borderc rounded shadow text-xs font-mono text-textsec flex items-center gap-2 bg-cardbg"> |
| <div class="w-2 h-2 rounded-full bg-emerald-500 shadow-[0_0_8px_rgba(16,185,129,0.8)]"></div> READY |
| </div> |
| </div> |
|
|
| |
| <div class="bg-cardbg border border-borderc rounded-lg p-5 mb-6 shadow-md"> |
| <h3 class="text-xs text-textsec font-mono uppercase tracking-wide mb-4">Performance Overview</h3> |
| <div class="grid grid-cols-4 gap-6 divide-x divide-borderc"> |
| <div class="px-2"> |
| <p class="text-xs text-textsec uppercase mb-1">Rolling Recall</p> |
| <p class="text-2xl font-mono text-textprim" id="metric-recall">--%</p> |
| <p class="text-xs text-emerald-500 font-mono mt-1">Target ≥ 95.0%</p> |
| </div> |
| <div class="px-6"> |
| <p class="text-xs text-textsec uppercase mb-1">Rolling FPR</p> |
| <p class="text-2xl font-mono text-textprim" id="metric-fpr">--%</p> |
| <p class="text-xs text-red-400 font-mono mt-1">Target < 5.0%</p> |
| </div> |
| <div class="px-6"> |
| <p class="text-xs text-textsec uppercase mb-1">Current Step</p> |
| <p class="text-2xl font-mono text-textprim" id="metric-step">--</p> |
| <p class="text-xs text-textsec font-mono mt-1">Max 120</p> |
| </div> |
| <div class="px-6"> |
| <p class="text-xs text-textsec uppercase mb-1">Reward</p> |
| <p class="text-2xl font-mono text-textprim" id="metric-reward">--</p> |
| <p class="text-xs text-textsec font-mono mt-1">Log-Barrier Metric</p> |
| </div> |
| </div> |
| </div> |
|
|
| |
| <div class="grid grid-cols-2 gap-6 mb-6 h-72"> |
| <div class="bg-cardbg border border-borderc rounded-lg p-5 flex flex-col shadow-md"> |
| <div class="flex justify-between mb-2"> |
| <h3 class="text-xs text-textsec font-mono uppercase tracking-wide">Reward: Baseline vs Trained</h3> |
| <span class="text-xs text-borderc font-mono border border-borderc px-1 rounded">Delta</span> |
| </div> |
| <div class="flex-1 relative"> |
| <canvas id="chart-reward"></canvas> |
| </div> |
| </div> |
|
|
| <div class="bg-cardbg border border-borderc rounded-lg p-5 flex flex-col shadow-md"> |
| <div class="flex justify-between mb-2"> |
| <h3 class="text-xs text-textsec font-mono uppercase tracking-wide">FPR: Baseline vs Trained</h3> |
| </div> |
| <div class="flex-1 relative"> |
| <canvas id="chart-fpr"></canvas> |
| </div> |
| </div> |
| </div> |
|
|
| |
| <div class="grid grid-cols-1 lg:grid-cols-2 gap-6 pb-6"> |
| |
| <div |
| class="bg-cardbg border border-borderc rounded-lg p-5 flex flex-col min-h-[300px] max-h-[350px] shadow-md"> |
| <h3 class="text-xs text-textsec font-mono uppercase tracking-wide mb-4">Synthesized Guardrail AST |
| Validation</h3> |
| <div class="flex-1 bg-bgmain border border-borderc p-4 rounded font-mono text-sm overflow-auto text-[#6ee7b7] custom-scrollbar shadow-inner whitespace-pre" |
| id="ast-viewer">...</div> |
| </div> |
|
|
| |
| <div |
| class="bg-cardbg border border-borderc rounded-lg p-5 flex flex-col min-h-[300px] max-h-[350px] shadow-md bg-opacity-95"> |
| <div class="flex justify-between items-center mb-4"> |
| <h3 class="text-xs text-textsec font-mono uppercase tracking-wide">Live Threat Feed</h3> |
| <div class="flex items-center gap-2"> |
| <span |
| class="w-2 h-2 rounded-full bg-red-500 animate-pulse shadow-[0_0_8px_rgba(239,68,68,0.8)]"></span> |
| <span class="text-[10px] text-textsec font-mono uppercase">Intercepting TCP Stream</span> |
| </div> |
| </div> |
| <div class="flex-1 border border-borderc p-3 rounded font-mono text-xs overflow-y-auto custom-scrollbar shadow-inner flex flex-col gap-2 bg-[#050505]" |
| id="threat-feed"> |
| <div class="text-textsec italic text-center mt-10">Awaiting traffic packets...</div> |
| </div> |
| </div> |
| </div> |
| </div> |
|
|
| |
| <script> |
| |
| |
| Chart.defaults.color = '#737373'; |
| Chart.defaults.font.family = 'ui-monospace, SFMono-Regular, Consolas, monospace'; |
| const chartOptions = { |
| responsive: true, maintainAspectRatio: false, |
| plugins: { legend: { display: true, position: 'bottom', labels: { boxWidth: 12 } } }, |
| scales: { |
| x: { |
| grid: { color: '#262626' }, |
| title: { display: true, text: 'Training Iterations' } |
| }, |
| y: { |
| grid: { color: '#262626' }, |
| title: { display: true, text: 'Mean Reward Score' } |
| } |
| }, |
| elements: { point: { radius: 0, hitRadius: 10 }, line: { tension: 0.3 } } |
| }; |
| |
| const ctxReward = document.getElementById('chart-reward').getContext('2d'); |
| const rewardChart = new Chart(ctxReward, { |
| type: 'line', |
| data: { |
| labels: [], datasets: [ |
| { label: 'Agent Reward', borderColor: '#ffb380', borderWidth: 2, data: [] }, |
| { label: 'Static Regex Baseline', borderColor: '#ef4444', borderWidth: 2, borderDash: [5, 5], data: [] } |
| ] |
| }, |
| options: chartOptions |
| }); |
| |
| const ctxFpr = document.getElementById('chart-fpr').getContext('2d'); |
| const fprChart = new Chart(ctxFpr, { |
| type: 'line', |
| data: { |
| labels: [], |
| datasets: [ |
| { label: 'Trained FPR', borderColor: '#ef4444', borderWidth: 2, data: [] }, |
| { label: 'Baseline FPR', borderColor: '#737373', borderWidth: 2, borderDash: [5, 5], data: [] } |
| ] |
| }, |
| options: chartOptions |
| }); |
| |
| const BASELINE_REWARD = -8.0; |
| const MAX_REWARD = 0.0; |
| const ROLLING_WINDOW = 20; |
| |
| function toNumber(value, fallback = null) { |
| const n = Number(value); |
| return Number.isFinite(n) ? n : fallback; |
| } |
| |
| function clamp(value, min, max) { |
| return Math.max(min, Math.min(max, value)); |
| } |
| |
| function toPercent(value) { |
| return (value * 100).toFixed(1) + '%'; |
| } |
| |
| function extractMetric(raw) { |
| if (!raw || typeof raw !== 'object') return null; |
| const payload = (raw.payload && typeof raw.payload === 'object') ? raw.payload : raw; |
| |
| const reward = |
| toNumber(payload.reward, null) ?? |
| toNumber(payload["rewards/openenv_reward_func/mean"], null) ?? |
| toNumber(payload?.rewards?.openenv_reward_func?.mean, null) ?? |
| toNumber(raw["rewards/openenv_reward_func/mean"], null); |
| |
| if (reward === null) return null; |
| |
| const step = |
| toNumber(payload.step, null) ?? |
| toNumber(payload.global_step, null) ?? |
| toNumber(raw.step, null) ?? |
| toNumber(raw.global_step, null); |
| |
| return { |
| step, |
| reward, |
| ast_json: payload.ast_json ?? raw.ast_json ?? null, |
| recent_traffic: Array.isArray(payload.recent_traffic) ? payload.recent_traffic : [] |
| }; |
| } |
| |
| function deriveRollingStatsFromReward(dataArray) { |
| const window = dataArray.slice(-ROLLING_WINDOW); |
| if (window.length === 0) { |
| return { recall: 0, fpr: 0 }; |
| } |
| |
| const normalized = window.map((point) => { |
| |
| return clamp((point.reward - BASELINE_REWARD) / (MAX_REWARD - BASELINE_REWARD), 0, 1); |
| }); |
| const recall = normalized.reduce((acc, v) => acc + v, 0) / normalized.length; |
| const fpr = normalized.reduce((acc, v) => acc + (1 - v), 0) / normalized.length; |
| return { recall, fpr }; |
| } |
| |
| function renderDOM(dataArray) { |
| if (!dataArray || dataArray.length === 0) return; |
| |
| |
| rewardChart.data.labels = []; |
| rewardChart.data.datasets[0].data = []; |
| rewardChart.data.datasets[1].data = []; |
| fprChart.data.labels = []; |
| fprChart.data.datasets[0].data = []; |
| fprChart.data.datasets[1].data = []; |
| |
| dataArray.forEach(d => { |
| rewardChart.data.labels.push(d.step); |
| rewardChart.data.datasets[0].data.push(d.reward); |
| rewardChart.data.datasets[1].data.push(BASELINE_REWARD); |
| fprChart.data.labels.push(d.step); |
| const derived = deriveRollingStatsFromReward([d]); |
| fprChart.data.datasets[0].data.push(derived.fpr * 100); |
| fprChart.data.datasets[1].data.push(100.0); |
| }); |
| rewardChart.update(); |
| fprChart.update(); |
| |
| |
| const last = dataArray[dataArray.length - 1]; |
| const rolling = deriveRollingStatsFromReward(dataArray); |
| document.getElementById('metric-recall').innerText = toPercent(rolling.recall); |
| document.getElementById('metric-fpr').innerText = toPercent(rolling.fpr); |
| document.getElementById('metric-step').innerText = last.step; |
| document.getElementById('metric-reward').innerText = last.reward.toFixed(2); |
| |
| |
| let displayAst = last.ast_json; |
| if (typeof displayAst === 'string') { |
| try { |
| displayAst = JSON.parse(displayAst); |
| } catch (e) { } |
| } |
| document.getElementById('ast-viewer').innerText = JSON.stringify(displayAst, null, 4); |
| |
| |
| const feedContainer = document.getElementById('threat-feed'); |
| feedContainer.innerHTML = ''; |
| |
| if (last.recent_traffic && last.recent_traffic.length > 0) { |
| last.recent_traffic.forEach(item => { |
| const el = document.createElement('div'); |
| el.className = "flex gap-3 p-2 border-l-4 rounded-r-md bg-[#111] bg-opacity-80 leading-tight items-start transition-all"; |
| |
| let prefix = ""; |
| let textColor = ""; |
| let borderColor = ""; |
| |
| if (item.is_malicious && item.was_blocked) { |
| prefix = "[🛡️ BLOCKED]"; |
| textColor = "text-red-400"; |
| borderColor = "border-red-500"; |
| } else if (!item.is_malicious && !item.was_blocked) { |
| prefix = "[✅ ALLOWED]"; |
| textColor = "text-emerald-400"; |
| borderColor = "border-emerald-500"; |
| } else if (item.is_malicious && !item.was_blocked) { |
| prefix = "[⚠️ BREACH]"; |
| textColor = "text-orange-400"; |
| borderColor = "border-orange-500"; |
| el.classList.add("bg-orange-950", "bg-opacity-20"); |
| } else { |
| prefix = "[❌ F-POSITV]"; |
| textColor = "text-slate-500"; |
| borderColor = "border-slate-600"; |
| } |
| |
| el.classList.add(borderColor); |
| el.innerHTML = `<span class="shrink-0 select-none ${textColor} w-[100px] font-bold text-[10px] tracking-wider pt-0.5">${prefix}</span> <span class="${textColor} break-all opacity-90">${item.prompt_text}</span>`; |
| feedContainer.appendChild(el); |
| }); |
| } else { |
| feedContainer.innerHTML = '<div class="text-textsec italic text-center mt-10">No traffic detected in this cycle.</div>'; |
| } |
| } |
| |
| |
| const liveEventSource = new EventSource("/stream"); |
| let activeLiveStreamBuffer = []; |
| let stepCounter = 0; |
| |
| let playbackQueue = []; |
| let playbackInterval = null; |
| |
| function processQueue() { |
| if (playbackQueue.length === 0) { |
| clearInterval(playbackInterval); |
| playbackInterval = null; |
| return; |
| } |
| const metric = playbackQueue.shift(); |
| activeLiveStreamBuffer.push(metric); |
| if (activeLiveStreamBuffer.length > 120) activeLiveStreamBuffer.shift(); |
| renderDOM(activeLiveStreamBuffer); |
| } |
| |
| liveEventSource.onmessage = (event) => { |
| try { |
| const rawData = JSON.parse(event.data); |
| const metric = extractMetric(rawData); |
| if (!metric) { |
| return; |
| } |
| if (metric.step === null || metric.step <= stepCounter) { |
| stepCounter += 1; |
| metric.step = stepCounter; |
| } else { |
| stepCounter = metric.step; |
| } |
| document.getElementById('sse-status').innerText = "Live SSE Packets Receiving..."; |
| |
| playbackQueue.push(metric); |
| if (!playbackInterval) { |
| playbackInterval = setInterval(processQueue, 200); |
| } |
| } catch (e) { |
| document.getElementById('sse-status').innerText = "Malformed stream packet ignored."; |
| } |
| }; |
| |
| liveEventSource.onerror = () => { |
| document.getElementById('sse-status').innerText = "Waiting for pipeline..."; |
| }; |
| |
| |
| |
| |
| document.getElementById('header-title').innerText = 'Live Evaluation Session | Streaming'; |
| renderDOM([{ |
| step: 0, |
| reward: BASELINE_REWARD, |
| ast_json: { "status": "Waiting for live pipeline... Run inference to begin streaming." }, |
| recent_traffic: [] |
| }]); |
| </script> |
| </body> |
|
|
| </html> |