#!/usr/bin/env node 'use strict'; const http = require('http'); const fs = require('fs'); const path = require('path'); const PORT = parseInt(process.env.VISIBILITY_PORT || '4242'); // ── State ───────────────────────────────────────────────────────────────────── let state = fresh(); function fresh() { return { agents: {}, registry: {}, memory: {}, events: [], arrows: [], plan: [], internals: [], metrics: { steps: 0, tokens: 0, retries: 0 }, goal: '', runId: null, status: 'idle', startedAt: null, clients: [], }; } // ── SSE broadcast ───────────────────────────────────────────────────────────── function broadcast(type, payload) { const msg = `data: ${JSON.stringify({ type, payload, ts: Date.now() })}\n\n`; state.clients.forEach(r => { try { r.write(msg); } catch (_) {} }); } // ── Role colours ────────────────────────────────────────────────────────────── const COLORS = { orchestrator: '#8b7cf8', researcher: '#2dd4b0', coder: '#60a5fa', critic: '#f59e0b', synthesiser: '#60a5fa', worker: '#2dd4b0', }; // ── Helpers ─────────────────────────────────────────────────────────────────── function ensureAgent(id) { if (!state.agents[id]) { const r = state.registry[id] || {}; state.agents[id] = { id, label: r.label || id, role: r.role || 'worker', model: r.model || '', reports_to: r.reports_to || null, token_budget: r.token_budget || 8192, color: r.color || COLORS[r.role] || '#6b7280', status: 'idle', tokens: 0, calls: 0, }; } } function safeAgents() { const out = {}; for (const [k, v] of Object.entries(state.agents)) { out[k] = { id: v.id, label: v.label, role: v.role, model: v.model, reports_to: v.reports_to, token_budget: v.token_budget, color: v.color, status: v.status, tokens: v.tokens, calls: v.calls }; } return out; } function snapshot() { return { registry: state.registry, runId: state.runId, goal: state.goal, status: state.status, startedAt: state.startedAt, agents: safeAgents(), memory: state.memory, events: state.events.slice(0, 80), arrows: state.arrows.slice(0, 20), plan: state.plan, metrics: state.metrics, internals: state.internals.slice(0, 60), scenarios: Object.keys(SCENARIOS), }; } // ── Tools ───────────────────────────────────────────────────────────────────── const TOOLS = { register_agent({ id, label, role = 'worker', model = '', reports_to = null, token_budget = 8192, color = null }) { const c = color || COLORS[role] || '#6b7280'; state.registry[id] = { id, label, role, model, reports_to, token_budget, color: c }; state.agents[id] = { ...state.registry[id], status: 'idle', tokens: 0, calls: 0 }; broadcast('registry', state.registry); broadcast('agents', safeAgents()); broadcast('event', { agent: id, event_type: 'registered', message: `${label} registered — role:${role}, model:${model || 'unset'}`, tokens: 0, latency_ms: 0, ts: Date.now() }); return { ok: true }; }, log_event({ agent, event_type, message, tokens = 0, latency_ms = 0, metadata = {} }) { ensureAgent(agent); const item = { agent, event_type, message, tokens, latency_ms, metadata, ts: Date.now() }; state.events.unshift(item); if (state.events.length > 200) state.events.pop(); if (tokens) { state.agents[agent].tokens += tokens; state.agents[agent].calls += 1; state.metrics.tokens += tokens; } state.metrics.steps++; broadcast('event', item); broadcast('metrics', state.metrics); broadcast('agents', safeAgents()); return { ok: true }; }, set_memory({ key, value, op = 'write' }) { state.memory[key] = { value, op, ts: Date.now() }; broadcast('memory', { key, value, op, ts: Date.now() }); return { ok: true }; }, set_agent_state({ agent_id, status }) { ensureAgent(agent_id); state.agents[agent_id].status = status; broadcast('agents', safeAgents()); return { ok: true }; }, trace_step({ from_agent, to_agent, label = '', arrow_type = 'msg' }) { ensureAgent(from_agent); ensureAgent(to_agent); const arrow = { from: from_agent, to: to_agent, label, arrow_type, ts: Date.now() }; state.arrows.unshift(arrow); if (state.arrows.length > 50) state.arrows.pop(); broadcast('arrow', arrow); return { ok: true }; }, set_plan({ tasks }) { state.plan = tasks; broadcast('plan', tasks); return { ok: true }; }, set_goal({ goal, run_id }) { state.goal = goal; state.runId = run_id || String(Date.now()); state.status = 'running'; state.startedAt = Date.now(); broadcast('goal', { goal, runId: state.runId }); broadcast('status', 'running'); return { ok: true }; }, finish_run({ status = 'done' }) { state.status = status; broadcast('status', status); return { ok: true }; }, // ── Internal observability tools ────────────────────────────────────────── log_embedding({ agent, text, model = 'text-embedding-3-small', dims = 1536, latency_ms = 0 }) { ensureAgent(agent); const item = { kind: 'embedding', agent, text: String(text).slice(0, 90), model, dims, latency_ms, ts: Date.now() }; state.internals.unshift(item); if (state.internals.length > 200) state.internals.pop(); broadcast('internal', item); return { ok: true }; }, log_retrieval({ agent, query, results = [], latency_ms = 0 }) { ensureAgent(agent); const item = { kind: 'retrieval', agent, query: String(query).slice(0, 90), results: results.slice(0, 6).map(r => ({ text: String(r.text || '').slice(0, 70), score: r.score ?? 0 })), latency_ms, ts: Date.now(), }; state.internals.unshift(item); if (state.internals.length > 200) state.internals.pop(); broadcast('internal', item); return { ok: true }; }, log_tool_call({ agent, tool_name, input = '', output = '', latency_ms = 0, error = null }) { ensureAgent(agent); const item = { kind: 'tool_call', agent, tool_name, input: String(input).slice(0, 4000), output: String(output).slice(0, 4000), latency_ms, error, ts: Date.now(), }; state.internals.unshift(item); if (state.internals.length > 200) state.internals.pop(); broadcast('internal', item); return { ok: true }; }, log_generation({ agent, prompt_tokens = 0, completion_tokens = 0, model = '', latency_ms = 0, stop_reason = 'stop', messages = [], response = null, thinking = null }) { ensureAgent(agent); const total = prompt_tokens + completion_tokens; const item = { kind: 'generation', agent, prompt_tokens, completion_tokens, total, model, latency_ms, stop_reason, messages: (messages||[]).slice(0,30).map(m => ({ role: String(m.role||'user'), content: String(m.content||'').slice(0,2000) })), response: response ? String(response).slice(0,4000) : null, thinking: thinking ? String(thinking).slice(0,3000) : null, ts: Date.now(), }; state.internals.unshift(item); if (state.internals.length > 200) state.internals.pop(); if (total) { state.agents[agent].tokens += total; state.agents[agent].calls += 1; state.metrics.tokens += total; } broadcast('internal', item); broadcast('agents', safeAgents()); broadcast('metrics', state.metrics); return { ok: true }; }, }; // alias: log_llm_turn → log_generation (richer name exposed in MCP) TOOLS.log_llm_turn = TOOLS.log_generation; // ── Demo scenarios ───────────────────────────────────────────────────────────── const SCENARIOS = { research_code: { goal: 'Explain quicksort and write a Python implementation', steps: [ { delay: 0, fn: () => { TOOLS.register_agent({ id: 'orchestrator', label: 'Orchestrator', role: 'orchestrator', model: 'claude-sonnet-4-20250514', token_budget: 16384 }); TOOLS.register_agent({ id: 'researcher', label: 'Researcher', role: 'researcher', model: 'claude-haiku-4-5-20251001', reports_to: 'orchestrator', token_budget: 8192 }); TOOLS.register_agent({ id: 'coder', label: 'Coder', role: 'coder', model: 'claude-sonnet-4-20250514', reports_to: 'orchestrator', token_budget: 8192 }); TOOLS.register_agent({ id: 'critic', label: 'Critic', role: 'critic', model: 'claude-haiku-4-5-20251001', reports_to: 'orchestrator', token_budget: 4096 }); }}, { delay: 800, fn: () => { TOOLS.set_goal({ goal: SCENARIOS.research_code.goal }); TOOLS.set_agent_state({ agent_id: 'orchestrator', status: 'running' }); TOOLS.log_generation({ agent: 'orchestrator', prompt_tokens: 280, completion_tokens: 95, model: 'claude-sonnet-4-20250514', latency_ms: 620, stop_reason: 'end_turn', messages: [ { role: 'system', content: 'You are an orchestrator agent. Break the user goal into subtasks and delegate to specialist agents: Researcher (theory/research), Coder (implementation), Critic (validation). Always plan before routing.' }, { role: 'user', content: 'Explain quicksort and write a Python implementation' }, ], response: "I'll break this into 3 sequential tasks:\n1. **Researcher** — explain quicksort: theory, O(n log n) complexity, partition schemes (Lomuto/Hoare)\n2. **Coder** — write a clean Python implementation with type hints, docstrings, and edge-case handling\n3. **Critic** — review code quality, correctness, and style\n\nRouting to Researcher first.", }); TOOLS.log_event({ agent: 'orchestrator', event_type: 'start', message: 'Planning tasks…' }); }}, { delay: 900, fn: () => { TOOLS.set_plan({ tasks: [{ agent: 'researcher', task: 'Explain quicksort', depends_on: [] }, { agent: 'coder', task: 'Write Python implementation', depends_on: [0] }, { agent: 'critic', task: 'Validate code quality', depends_on: [1] }] }); TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'researcher', label: 'explain', arrow_type: 'msg' }); TOOLS.set_agent_state({ agent_id: 'researcher', status: 'running' }); TOOLS.set_memory({ key: 'goal', value: SCENARIOS.research_code.goal }); }}, // Researcher — embed query, web search, generate { delay: 400, fn: () => { TOOLS.log_embedding({ agent: 'researcher', text: 'quicksort algorithm explanation divide conquer', model: 'text-embedding-3-small', dims: 1536, latency_ms: 48 }); }}, { delay: 300, fn: () => { TOOLS.log_retrieval({ agent: 'researcher', query: 'quicksort algorithm complexity analysis', latency_ms: 92, results: [ { text: 'Quicksort uses divide-and-conquer: pick a pivot, partition into <, =, > subarrays.', score: 0.94 }, { text: 'Average-case O(n log n); worst-case O(n²) with bad pivot selection.', score: 0.91 }, { text: 'Lomuto vs Hoare partition schemes differ in swap count and cache behaviour.', score: 0.87 }, { text: 'Introsort (used in STL) falls back to heapsort to avoid O(n²) worst case.', score: 0.82 }, ], }); }}, { delay: 500, fn: () => { TOOLS.log_tool_call({ agent: 'researcher', tool_name: 'web_search', input: 'quicksort algorithm detailed explanation', output: '6 results — Wikipedia, CS Visualizer, CLRS excerpt', latency_ms: 340 }); }}, { delay: 900, fn: () => { TOOLS.log_generation({ agent: 'researcher', prompt_tokens: 1840, completion_tokens: 620, model: 'claude-haiku-4-5-20251001', latency_ms: 1320, stop_reason: 'end_turn', messages: [ { role: 'system', content: 'You are a researcher agent. Synthesise accurate technical information from retrieved documents. Be precise, cite complexity bounds, note tradeoffs.' }, { role: 'user', content: 'Task from orchestrator: Explain the quicksort algorithm in detail — theory, complexity, partition schemes.' }, { role: 'assistant', content: '[embedding query and retrieving relevant documents…]' }, { role: 'tool', content: 'Retrieved 4 chunks:\n• Quicksort uses divide-and-conquer: pick a pivot, partition into <, =, > subarrays. (score 0.94)\n• Average O(n log n); worst O(n²) with bad pivot selection. (score 0.91)\n• Lomuto vs Hoare partition differ in swap count and cache behaviour. (score 0.87)\n• Introsort falls back to heapsort to avoid O(n²) worst case. (score 0.82)' }, ], response: "**Quicksort** is a divide-and-conquer sorting algorithm:\n\n**Core strategy**: Choose a pivot element, partition the array into ≤ pivot and > pivot halves, then recursively sort each half in-place.\n\n**Complexity**:\n- Average: O(n log n) — balanced splits with good pivot choice\n- Worst: O(n²) — degenerate pivot on already-sorted input\n- Space: O(log n) stack depth average\n\n**Partition schemes**:\n- *Lomuto*: simpler code, last element as pivot, O(n) comparisons\n- *Hoare*: ~3× fewer swaps, two converging pointers\n\n**Practical optimisations**:\n- Median-of-3 pivot selection to avoid worst case\n- Switch to insertion sort for subarrays smaller than ~10 elements\n- Introsort (Python's Timsort variant) adds heapsort fallback for guaranteed O(n log n)", }); TOOLS.log_event({ agent: 'researcher', event_type: 'reply', message: 'Quicksort: divide-and-conquer. Pivot splits into <, =, > partitions. Avg O(n log n), worst O(n²) with sorted input.' }); TOOLS.set_memory({ key: 'research', value: 'Quicksort: O(n log n) avg, O(n²) worst. Lomuto/Hoare partition.' }); TOOLS.trace_step({ from_agent: 'researcher', to_agent: 'orchestrator', label: 'done', arrow_type: 'result' }); TOOLS.set_agent_state({ agent_id: 'researcher', status: 'done' }); }}, { delay: 500, fn: () => { TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'coder', label: 'implement', arrow_type: 'msg' }); TOOLS.set_agent_state({ agent_id: 'coder', status: 'running' }); }}, // Coder — retrieve code examples, execute sandbox, generate { delay: 400, fn: () => { TOOLS.log_embedding({ agent: 'coder', text: 'Python quicksort implementation with type hints', model: 'text-embedding-3-small', dims: 1536, latency_ms: 51 }); }}, { delay: 300, fn: () => { TOOLS.log_retrieval({ agent: 'coder', query: 'Python quicksort recursive + in-place implementation', latency_ms: 88, results: [ { text: 'def quicksort(arr): return [] if not arr else quicksort([x for x in arr[1:] if x<=arr[0]])+[arr[0]]+...', score: 0.96 }, { text: 'def partition(arr, lo, hi): pivot=arr[hi]; i=lo-1; ...', score: 0.93 }, { text: 'Lomuto partition: O(n) extra calls, simpler code. Hoare: fewer swaps.', score: 0.88 }, ], }); }}, { delay: 800, fn: () => { TOOLS.log_generation({ agent: 'coder', prompt_tokens: 3200, completion_tokens: 980, model: 'claude-sonnet-4-20250514', latency_ms: 2100, stop_reason: 'end_turn', messages: [ { role: 'system', content: 'You are a coder agent. Write production-quality Python code with type hints, docstrings, and edge-case handling. Use retrieved snippets as reference, not copy-paste.' }, { role: 'user', content: 'Task from orchestrator: Implement quicksort in Python. Context from researcher:\n- Quicksort: divide-and-conquer, pick pivot, partition into ≤ and > halves\n- Avg O(n log n), worst O(n²)\n- Lomuto partition is simpler; Hoare has fewer swaps' }, { role: 'assistant', content: '[retrieving code examples and planning implementation…]' }, { role: 'tool', content: 'Retrieved snippets:\n• def quicksort(arr): return [] if not arr else quicksort([x for x in arr[1:] if x<=arr[0]])+[arr[0]]+... (functional, not in-place)\n• def partition(arr, lo, hi): pivot=arr[hi]; i=lo-1; ... (Lomuto, in-place)' }, ], response: "```python\nfrom typing import TypeVar\n\nT = TypeVar('T')\n\n\ndef quicksort(arr: list[T]) -> list[T]:\n \"\"\"Return a new sorted list using quicksort (functional style).\n\n Average O(n log n), worst-case O(n²). Not in-place.\n \"\"\"\n if len(arr) <= 1:\n return arr\n pivot = arr[len(arr) // 2] # median-of-1 (simple)\n left = [x for x in arr if x < pivot]\n mid = [x for x in arr if x == pivot]\n right = [x for x in arr if x > pivot]\n return quicksort(left) + mid + quicksort(right)\n\n\ndef quicksort_inplace(arr: list[T], lo: int = 0, hi: int | None = None) -> None:\n \"\"\"Sort arr in-place using Lomuto partition scheme.\"\"\"\n if hi is None:\n hi = len(arr) - 1\n if lo < hi:\n p = _partition(arr, lo, hi)\n quicksort_inplace(arr, lo, p - 1)\n quicksort_inplace(arr, p + 1, hi)\n\n\ndef _partition(arr: list[T], lo: int, hi: int) -> int:\n pivot, i = arr[hi], lo - 1\n for j in range(lo, hi):\n if arr[j] <= pivot:\n i += 1\n arr[i], arr[j] = arr[j], arr[i]\n arr[i + 1], arr[hi] = arr[hi], arr[i + 1]\n return i + 1\n```\n\nBoth variants pass edge cases: empty list, single element, all-equal, reverse-sorted.", }); }}, { delay: 600, fn: () => { TOOLS.log_tool_call({ agent: 'coder', tool_name: 'code_execute', input: 'quicksort([3,1,4,1,5,9,2,6]) — smoke test', output: '[1, 1, 2, 3, 4, 5, 6, 9] ✓ (12ms)', latency_ms: 112 }); TOOLS.log_tool_call({ agent: 'coder', tool_name: 'code_execute', input: 'quicksort([]) — edge case empty list', output: '[] ✓', latency_ms: 8 }); TOOLS.log_tool_call({ agent: 'coder', tool_name: 'code_execute', input: 'quicksort([1]) — single element', output: '[1] ✓', latency_ms: 6 }); }}, { delay: 900, fn: () => { TOOLS.log_event({ agent: 'coder', event_type: 'reply', message: 'quicksort() + quicksort_inplace() — full docstrings, Lomuto partition, all edge cases pass.' }); TOOLS.set_memory({ key: 'code', value: 'def quicksort(arr: list) -> list: ...\ndef quicksort_inplace(arr, lo, hi): ...' }); TOOLS.trace_step({ from_agent: 'coder', to_agent: 'orchestrator', label: 'ready', arrow_type: 'result' }); TOOLS.set_agent_state({ agent_id: 'coder', status: 'done' }); }}, { delay: 500, fn: () => { TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'critic', label: 'validate', arrow_type: 'msg' }); TOOLS.set_agent_state({ agent_id: 'critic', status: 'running' }); }}, // Critic — embed code, lint, generate review { delay: 400, fn: () => { TOOLS.log_embedding({ agent: 'critic', text: 'def quicksort(arr: list) -> list: ...', model: 'text-embedding-3-small', dims: 1536, latency_ms: 44 }); }}, { delay: 300, fn: () => { TOOLS.log_tool_call({ agent: 'critic', tool_name: 'lint_check', input: 'quicksort.py', output: 'pylint 9.8/10 — 0 errors, 1 convention (missing module docstring)', latency_ms: 180 }); TOOLS.log_tool_call({ agent: 'critic', tool_name: 'type_check', input: 'mypy quicksort.py --strict', output: 'Success: no issues found in 1 source file', latency_ms: 95 }); }}, { delay: 700, fn: () => { TOOLS.log_generation({ agent: 'critic', prompt_tokens: 2100, completion_tokens: 480, model: 'claude-haiku-4-5-20251001', latency_ms: 980, stop_reason: 'end_turn', messages: [ { role: 'system', content: 'You are a critic agent. Review code for correctness, style, type safety, and edge-case coverage. Output a score /10 with justification.' }, { role: 'user', content: 'Review this Python quicksort implementation:\n\ndef quicksort(arr: list[T]) -> list[T]: ...\ndef quicksort_inplace(arr, lo, hi): ...\n\nTool results: pylint 9.8/10, mypy strict: no issues.' }, ], response: "**PASS — 9/10**\n\n✓ Type hints on public API (TypeVar T for generics)\n✓ Docstrings explain complexity and behaviour\n✓ Both functional and in-place variants provided\n✓ Edge cases: empty list, single element return correctly\n✓ mypy strict passes — no type errors\n✓ pylint 9.8/10\n\n**Minor issues**:\n- Missing module-level docstring (-0.5)\n- `quicksort_inplace` docstring doesn't document `lo`/`hi` params (-0.5)\n- Pivot selection is not median-of-3 — can hit O(n²) on nearly-sorted input (acceptable for demo)\n\nRecommendation: **approve for merge**. Add module docstring before production use.", }); TOOLS.log_event({ agent: 'critic', event_type: 'pass', message: 'PASS 9/10 — clean API, type-safe, edge cases covered. Minor: missing module docstring.' }); TOOLS.trace_step({ from_agent: 'critic', to_agent: 'orchestrator', label: 'pass 9/10', arrow_type: 'result' }); TOOLS.set_agent_state({ agent_id: 'critic', status: 'done' }); }}, { delay: 400, fn: () => { TOOLS.set_memory({ key: 'output', value: 'quicksort.py — approved 9/10' }); TOOLS.log_event({ agent: 'orchestrator', event_type: 'done', message: 'Run complete — 18 steps' }); TOOLS.set_agent_state({ agent_id: 'orchestrator', status: 'done' }); TOOLS.finish_run({ status: 'done' }); }}, ], }, critic_retry: { goal: 'Write an RFC-5321 compliant email regex validator', steps: [ { delay: 0, fn: () => { TOOLS.register_agent({ id: 'orchestrator', label: 'Orchestrator', role: 'orchestrator', model: 'claude-sonnet-4-20250514', token_budget: 16384 }); TOOLS.register_agent({ id: 'coder', label: 'Coder', role: 'coder', model: 'claude-sonnet-4-20250514', reports_to: 'orchestrator', token_budget: 8192 }); TOOLS.register_agent({ id: 'critic', label: 'Critic', role: 'critic', model: 'claude-haiku-4-5-20251001', reports_to: 'orchestrator', token_budget: 4096 }); }}, { delay: 700, fn: () => { TOOLS.set_goal({ goal: SCENARIOS.critic_retry.goal }); TOOLS.set_agent_state({ agent_id: 'orchestrator', status: 'running' }); TOOLS.log_generation({ agent: 'orchestrator', prompt_tokens: 240, completion_tokens: 80, model: 'claude-sonnet-4-20250514', latency_ms: 580 }); TOOLS.log_event({ agent: 'orchestrator', event_type: 'start', message: 'Planning…' }); }}, { delay: 800, fn: () => { TOOLS.set_plan({ tasks: [{ agent: 'coder', task: 'Write RFC-5321 email regex', depends_on: [] }, { agent: 'critic', task: 'Validate regex correctness', depends_on: [0] }] }); TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'coder', label: 'write', arrow_type: 'msg' }); TOOLS.set_agent_state({ agent_id: 'coder', status: 'running' }); }}, // Coder v1 — minimal attempt { delay: 400, fn: () => { TOOLS.log_embedding({ agent: 'coder', text: 'RFC-5321 email address validation regex Python', model: 'text-embedding-3-small', dims: 1536, latency_ms: 49 }); }}, { delay: 300, fn: () => { TOOLS.log_retrieval({ agent: 'coder', query: 'email regex RFC 5321 compliant Python', latency_ms: 84, results: [ { text: 'Simple: r"[^@]+@[^@]+\\.[^@]+" — catches most but misses edge cases.', score: 0.89 }, { text: 'RFC-5321 allows quoted strings, IP literals, special chars in local part.', score: 0.85 }, ], }); }}, { delay: 900, fn: () => { TOOLS.log_generation({ agent: 'coder', prompt_tokens: 920, completion_tokens: 240, model: 'claude-sonnet-4-20250514', latency_ms: 1800, stop_reason: 'end_turn' }); TOOLS.log_tool_call({ agent: 'coder', tool_name: 'code_execute', input: 'test_email("user@example.com")', output: 'True ✓', latency_ms: 14 }); TOOLS.log_event({ agent: 'coder', event_type: 'reply', message: 'Draft v1: r"[^@]+" — covers basic cases.' }); TOOLS.set_memory({ key: 'code', value: 'r"[^@]+"' }); TOOLS.trace_step({ from_agent: 'coder', to_agent: 'orchestrator', label: 'v1', arrow_type: 'result' }); TOOLS.set_agent_state({ agent_id: 'coder', status: 'active' }); }}, // Critic v1 review — fail { delay: 500, fn: () => { TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'critic', label: 'review v1', arrow_type: 'msg' }); TOOLS.set_agent_state({ agent_id: 'critic', status: 'running' }); }}, { delay: 400, fn: () => { TOOLS.log_embedding({ agent: 'critic', text: 'r"[^@]+" email regex RFC-5321 compliance', model: 'text-embedding-3-small', dims: 1536, latency_ms: 46 }); TOOLS.log_tool_call({ agent: 'critic', tool_name: 'regex_test_suite', input: 'RFC-5321 test vectors (120 cases)', output: '67/120 pass — missing TLDs, quoted strings, IP literals, consecutive dot check', latency_ms: 220 }); }}, { delay: 700, fn: () => { TOOLS.log_generation({ agent: 'critic', prompt_tokens: 1400, completion_tokens: 360, model: 'claude-haiku-4-5-20251001', latency_ms: 980, stop_reason: 'end_turn' }); TOOLS.log_event({ agent: 'critic', event_type: 'fail', message: 'FAIL 4/10 — 67/120 test vectors pass. Missing: TLDs, quoted strings, IP literals, consecutive-dot rule.' }); TOOLS.set_memory({ key: 'critique', value: 'fail 4/10 — missing TLDs, quoted strings, IP literals' }); TOOLS.trace_step({ from_agent: 'critic', to_agent: 'orchestrator', label: 'fail 4/10', arrow_type: 'result' }); TOOLS.set_agent_state({ agent_id: 'critic', status: 'active' }); state.metrics.retries++; broadcast('metrics', state.metrics); }}, // Orchestrator retries coder { delay: 500, fn: () => { TOOLS.log_generation({ agent: 'orchestrator', prompt_tokens: 480, completion_tokens: 120, model: 'claude-sonnet-4-20250514', latency_ms: 640 }); TOOLS.log_event({ agent: 'orchestrator', event_type: 'retry', message: 'Critic FAIL — retrying Coder with full critique attached' }); TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'coder', label: 'retry', arrow_type: 'retry' }); TOOLS.set_agent_state({ agent_id: 'coder', status: 'running' }); }}, // Coder v2 — thorough attempt { delay: 400, fn: () => { TOOLS.log_embedding({ agent: 'coder', text: 'RFC-5321 quoted strings IP literal TLD validation', model: 'text-embedding-3-small', dims: 1536, latency_ms: 52 }); TOOLS.log_retrieval({ agent: 'coder', query: 'RFC 5321 email local-part quoted string IP literal syntax', latency_ms: 96, results: [ { text: 'Local part: atom or quoted-string. Quoted allows spaces, special chars within double quotes.', score: 0.95 }, { text: 'Domain: hostname or IP literal [n.n.n.n]. TLD must be 2+ alpha chars.', score: 0.93 }, { text: 'No consecutive dots in local or domain part. No leading/trailing dot.', score: 0.91 }, ], }); }}, { delay: 1200, fn: () => { TOOLS.log_generation({ agent: 'coder', prompt_tokens: 2800, completion_tokens: 780, model: 'claude-sonnet-4-20250514', latency_ms: 2600, stop_reason: 'end_turn' }); }}, { delay: 600, fn: () => { TOOLS.log_tool_call({ agent: 'coder', tool_name: 'code_execute', input: 'RFC-5321 test suite — 120 vectors', output: '118/120 pass (2 obscure IPv6 edge cases)', latency_ms: 340 }); TOOLS.log_event({ agent: 'coder', event_type: 'reply', message: 'Draft v2: RFC-5321 compliant — TLD check, quoted strings, IP literals, consecutive-dot guard.' }); TOOLS.set_memory({ key: 'code', value: 'RFC5321_RE = re.compile(r\'...\') # 118/120 RFC vectors pass' }); TOOLS.trace_step({ from_agent: 'coder', to_agent: 'orchestrator', label: 'v2', arrow_type: 'result' }); TOOLS.set_agent_state({ agent_id: 'coder', status: 'done' }); }}, // Critic v2 review — pass { delay: 500, fn: () => { TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'critic', label: 'review v2', arrow_type: 'msg' }); TOOLS.set_agent_state({ agent_id: 'critic', status: 'running' }); }}, { delay: 400, fn: () => { TOOLS.log_tool_call({ agent: 'critic', tool_name: 'regex_test_suite', input: 'RFC-5321 test vectors (120 cases)', output: '118/120 pass — 2 obscure IPv6 literals; acceptable for prod use', latency_ms: 215 }); }}, { delay: 700, fn: () => { TOOLS.log_generation({ agent: 'critic', prompt_tokens: 1600, completion_tokens: 320, model: 'claude-haiku-4-5-20251001', latency_ms: 860, stop_reason: 'end_turn' }); TOOLS.log_event({ agent: 'critic', event_type: 'pass', message: 'PASS 9/10 — 118/120 RFC vectors pass, production-ready.' }); TOOLS.trace_step({ from_agent: 'critic', to_agent: 'orchestrator', label: 'pass 9/10', arrow_type: 'result' }); TOOLS.set_agent_state({ agent_id: 'critic', status: 'done' }); }}, { delay: 400, fn: () => { TOOLS.log_event({ agent: 'orchestrator', event_type: 'done', message: 'Complete after 1 retry — 1 retry, 20 steps' }); TOOLS.set_agent_state({ agent_id: 'orchestrator', status: 'done' }); TOOLS.finish_run({ status: 'done' }); }}, ], }, memory_overflow: { goal: 'Summarise 3 ML papers and synthesise into a report', steps: [ { delay: 0, fn: () => { TOOLS.register_agent({ id: 'orchestrator', label: 'Orchestrator', role: 'orchestrator', model: 'claude-sonnet-4-20250514', token_budget: 16384 }); TOOLS.register_agent({ id: 'researcher', label: 'Researcher', role: 'researcher', model: 'claude-haiku-4-5-20251001', reports_to: 'orchestrator', token_budget: 8192 }); TOOLS.register_agent({ id: 'synthesiser', label: 'Synthesiser', role: 'synthesiser', model: 'claude-sonnet-4-20250514', reports_to: 'orchestrator', token_budget: 8192 }); TOOLS.register_agent({ id: 'critic', label: 'Critic', role: 'critic', model: 'claude-haiku-4-5-20251001', reports_to: 'orchestrator', token_budget: 4096 }); }}, { delay: 700, fn: () => { TOOLS.set_goal({ goal: SCENARIOS.memory_overflow.goal }); TOOLS.set_agent_state({ agent_id: 'orchestrator', status: 'running' }); TOOLS.log_generation({ agent: 'orchestrator', prompt_tokens: 260, completion_tokens: 88, model: 'claude-sonnet-4-20250514', latency_ms: 600 }); TOOLS.log_event({ agent: 'orchestrator', event_type: 'start', message: 'Planning 3-paper synthesis…' }); }}, { delay: 900, fn: () => { TOOLS.set_plan({ tasks: [{ agent: 'researcher', task: 'Summarise paper A — scaling laws', depends_on: [] }, { agent: 'researcher', task: 'Summarise paper B — MoE routing', depends_on: [] }, { agent: 'researcher', task: 'Summarise paper C — RLHF hacking', depends_on: [] }, { agent: 'synthesiser', task: 'Synthesise into report', depends_on: [0,1,2] }] }); TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'researcher', label: 'paper A', arrow_type: 'msg' }); TOOLS.set_agent_state({ agent_id: 'researcher', status: 'running' }); }}, // Paper A { delay: 400, fn: () => { TOOLS.log_tool_call({ agent: 'researcher', tool_name: 'pdf_extract', input: 'scaling_laws_2020.pdf', output: '18,400 tokens extracted — 42 pages', latency_ms: 480 }); TOOLS.log_embedding({ agent: 'researcher', text: 'neural scaling laws loss compute data parameters', model: 'text-embedding-3-small', dims: 1536, latency_ms: 55 }); }}, { delay: 600, fn: () => { TOOLS.log_retrieval({ agent: 'researcher', query: 'key findings scaling laws compute-optimal training', latency_ms: 104, results: [ { text: 'Loss scales as power law with N (params), D (data), C (compute): L ∝ N^0.076.', score: 0.97 }, { text: 'Compute-optimal: scale params and data proportionally. Chinchilla law.', score: 0.94 }, { text: 'Irreducible loss ≈ 1.69 nats; emergent capabilities at scale thresholds.', score: 0.88 }, ], }); TOOLS.log_generation({ agent: 'researcher', prompt_tokens: 2400, completion_tokens: 520, model: 'claude-haiku-4-5-20251001', latency_ms: 1600, stop_reason: 'end_turn' }); TOOLS.log_event({ agent: 'researcher', event_type: 'reply', message: 'Paper A: Scaling laws — loss ∝ N^0.076. Compute-optimal: equal param/data scaling.' }); TOOLS.set_memory({ key: 'paper_a', value: 'Scaling laws: loss ∝ N^0.076, Chinchilla-optimal' }); TOOLS.trace_step({ from_agent: 'researcher', to_agent: 'orchestrator', label: 'A done', arrow_type: 'result' }); }}, // Paper B { delay: 400, fn: () => { TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'researcher', label: 'paper B', arrow_type: 'msg' }); TOOLS.log_tool_call({ agent: 'researcher', tool_name: 'pdf_extract', input: 'moe_routing_2023.pdf', output: '22,100 tokens extracted — 51 pages', latency_ms: 520 }); TOOLS.log_embedding({ agent: 'researcher', text: 'mixture of experts routing sparse transformer efficiency', model: 'text-embedding-3-small', dims: 1536, latency_ms: 53 }); }}, { delay: 600, fn: () => { TOOLS.log_retrieval({ agent: 'researcher', query: 'MoE routing top-k expert selection load balancing', latency_ms: 98, results: [ { text: 'Top-2 routing: each token sent to 2 of N experts. 60% active-param reduction vs dense.', score: 0.96 }, { text: 'Load balancing loss prevents expert collapse. Jitter noise aids exploration.', score: 0.92 }, { text: 'Switch Transformer: top-1 routing, simpler but prone to collapse without aux loss.', score: 0.87 }, ], }); TOOLS.log_generation({ agent: 'researcher', prompt_tokens: 2800, completion_tokens: 490, model: 'claude-haiku-4-5-20251001', latency_ms: 1500, stop_reason: 'end_turn' }); TOOLS.log_event({ agent: 'researcher', event_type: 'reply', message: 'Paper B: MoE top-2 routing, 60% active-param reduction. Load-balance aux loss prevents collapse.' }); TOOLS.set_memory({ key: 'paper_b', value: 'MoE: top-2 routing, 60% reduction, aux load-balance loss' }); TOOLS.trace_step({ from_agent: 'researcher', to_agent: 'orchestrator', label: 'B done', arrow_type: 'result' }); }}, // Paper C — triggers memory pressure { delay: 400, fn: () => { TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'researcher', label: 'paper C', arrow_type: 'msg' }); TOOLS.log_tool_call({ agent: 'researcher', tool_name: 'pdf_extract', input: 'rlhf_reward_hacking_2024.pdf', output: '31,200 tokens extracted — 68 pages', latency_ms: 710 }); TOOLS.log_embedding({ agent: 'researcher', text: 'RLHF reward hacking overoptimisation KL penalty', model: 'text-embedding-3-small', dims: 1536, latency_ms: 58 }); }}, { delay: 600, fn: () => { TOOLS.log_retrieval({ agent: 'researcher', query: 'reward hacking frequency mitigation strategies RLHF', latency_ms: 112, results: [ { text: 'Reward hacking observed in 34% of runs beyond 3000 RL steps. KL alone insufficient.', score: 0.95 }, { text: 'Constitutional AI + process reward models reduce hacking to <8%.', score: 0.91 }, { text: 'Ensemble reward models provide more robust signal than single RM.', score: 0.88 }, ], }); TOOLS.log_generation({ agent: 'researcher', prompt_tokens: 3200, completion_tokens: 560, model: 'claude-haiku-4-5-20251001', latency_ms: 1800, stop_reason: 'end_turn' }); TOOLS.log_event({ agent: 'researcher', event_type: 'reply', message: 'Paper C: RLHF reward hacking in 34% of runs. KL penalty alone insufficient; ensemble RMs help.' }); TOOLS.set_memory({ key: 'paper_c', value: 'RLHF: reward hacking 34%, use ensemble RMs + CAI' }); TOOLS.trace_step({ from_agent: 'researcher', to_agent: 'orchestrator', label: 'C done', arrow_type: 'result' }); TOOLS.set_agent_state({ agent_id: 'researcher', status: 'done' }); }}, // Synthesiser — context overflow { delay: 600, fn: () => { TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'synthesiser', label: 'synthesise', arrow_type: 'msg' }); TOOLS.set_agent_state({ agent_id: 'synthesiser', status: 'running' }); }}, { delay: 400, fn: () => { TOOLS.log_embedding({ agent: 'synthesiser', text: 'scaling laws MoE routing RLHF reward hacking synthesis', model: 'text-embedding-3-small', dims: 1536, latency_ms: 62 }); TOOLS.log_tool_call({ agent: 'synthesiser', tool_name: 'context_count', input: 'papers A+B+C combined tokens', output: '7,840 / 8,192 tokens used (95.7%) — paper C will be truncated', latency_ms: 12 }); TOOLS.log_event({ agent: 'synthesiser', event_type: 'warn', message: 'WARNING: context at 95.7% — paper C (RLHF) will be truncated to fit budget.' }); }}, { delay: 1200, fn: () => { TOOLS.log_generation({ agent: 'synthesiser', prompt_tokens: 7840, completion_tokens: 980, model: 'claude-sonnet-4-20250514', latency_ms: 3200, stop_reason: 'max_tokens' }); TOOLS.log_event({ agent: 'synthesiser', event_type: 'reply', message: 'Report done (partial): scaling laws + MoE full coverage; RLHF section truncated — recommend re-running with chunked context.' }); TOOLS.set_memory({ key: 'output', value: 'Report: scaling (full) + MoE (full) + RLHF (truncated)' }); TOOLS.trace_step({ from_agent: 'synthesiser', to_agent: 'orchestrator', label: 'report', arrow_type: 'result' }); TOOLS.set_agent_state({ agent_id: 'synthesiser', status: 'done' }); }}, { delay: 400, fn: () => { TOOLS.log_event({ agent: 'orchestrator', event_type: 'done', message: 'Complete — context overflow on paper C. Recommend chunked summarisation for large doc sets.' }); TOOLS.set_agent_state({ agent_id: 'orchestrator', status: 'done' }); TOOLS.finish_run({ status: 'done' }); }}, ], }, }; function runScenario(name) { const s = SCENARIOS[name]; if (!s) return false; const clients = state.clients; state = fresh(); state.clients = clients; broadcast('reset', {}); let cum = 0; s.steps.forEach(step => { cum += step.delay; setTimeout(() => { try { step.fn(); } catch (e) { console.error(e); } }, cum); }); return true; } // ── Dashboard HTML ───────────────────────────────────────────────────────────── const HTML = fs.readFileSync(path.join(__dirname, 'dashboard.html'), 'utf8'); // ── HTTP helpers ────────────────────────────────────────────────────────────── const CORS = { 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS', 'Access-Control-Allow-Headers': 'Content-Type', }; function body(req, cb) { let d = ''; req.on('data', c => d += c); req.on('end', () => cb(d)); } function json(res, data, status = 200) { res.writeHead(status, { ...CORS, 'Content-Type': 'application/json' }); res.end(JSON.stringify(data)); } // ── HTTP server ──────────────────────────────────────────────────────────────── const server = http.createServer((req, res) => { if (req.method === 'OPTIONS') { res.writeHead(204, CORS); res.end(); return; } // Dashboard UI if (req.method === 'GET' && (req.url === '/' || req.url === '/index.html')) { res.writeHead(200, { 'Content-Type': 'text/html' }); res.end(HTML); return; } // SSE stream if (req.method === 'GET' && req.url === '/events') { res.writeHead(200, { ...CORS, 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive' }); res.write(`data: ${JSON.stringify({ type: 'init', payload: { state: snapshot() }, ts: Date.now() })}\n\n`); state.clients.push(res); req.on('close', () => { state.clients = state.clients.filter(c => c !== res); }); return; } // Current state snapshot if (req.method === 'GET' && req.url === '/state') { json(res, snapshot()); return; } // Tool call if (req.method === 'POST' && req.url === '/tool') { body(req, data => { try { const { tool, args } = JSON.parse(data); const fn = TOOLS[tool]; json(res, fn ? fn(args || {}) : { error: `Unknown tool: ${tool}` }); } catch (e) { json(res, { error: e.message }, 400); } }); return; } // Run a demo scenario if (req.method === 'POST' && req.url === '/emulate') { body(req, data => { const { scenario } = JSON.parse(data || '{}'); const ok = runScenario(scenario || 'research_code'); json(res, { ok, scenario }, ok ? 200 : 400); }); return; } // Reset state if (req.method === 'POST' && req.url === '/reset') { const clients = state.clients; state = fresh(); state.clients = clients; broadcast('reset', {}); json(res, { ok: true }); return; } json(res, { error: 'Not found' }, 404); }); server.listen(PORT, () => { console.log(`\n agent-visibility\n`); console.log(` Dashboard → http://localhost:${PORT}`); console.log(` Tool POST → http://localhost:${PORT}/tool`); console.log(` Ctrl+C to stop\n`); });