Spaces:
No application file
No application file
| ; | |
| const http = require('http'); | |
| const fs = require('fs'); | |
| const path = require('path'); | |
| const PORT = parseInt(process.env.VISIBILITY_PORT || '4242'); | |
| // ββ State βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| let state = fresh(); | |
| function fresh() { | |
| return { | |
| agents: {}, registry: {}, memory: {}, events: [], | |
| arrows: [], plan: [], internals: [], | |
| metrics: { steps: 0, tokens: 0, retries: 0 }, | |
| goal: '', runId: null, status: 'idle', startedAt: null, | |
| clients: [], | |
| }; | |
| } | |
| // ββ SSE broadcast βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function broadcast(type, payload) { | |
| const msg = `data: ${JSON.stringify({ type, payload, ts: Date.now() })}\n\n`; | |
| state.clients.forEach(r => { try { r.write(msg); } catch (_) {} }); | |
| } | |
| // ββ Role colours ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const COLORS = { | |
| orchestrator: '#8b7cf8', researcher: '#2dd4b0', coder: '#60a5fa', | |
| critic: '#f59e0b', synthesiser: '#60a5fa', worker: '#2dd4b0', | |
| }; | |
| // ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function ensureAgent(id) { | |
| if (!state.agents[id]) { | |
| const r = state.registry[id] || {}; | |
| state.agents[id] = { | |
| id, label: r.label || id, role: r.role || 'worker', model: r.model || '', | |
| reports_to: r.reports_to || null, token_budget: r.token_budget || 8192, | |
| color: r.color || COLORS[r.role] || '#6b7280', status: 'idle', tokens: 0, calls: 0, | |
| }; | |
| } | |
| } | |
| function safeAgents() { | |
| const out = {}; | |
| for (const [k, v] of Object.entries(state.agents)) { | |
| out[k] = { id: v.id, label: v.label, role: v.role, model: v.model, | |
| reports_to: v.reports_to, token_budget: v.token_budget, color: v.color, | |
| status: v.status, tokens: v.tokens, calls: v.calls }; | |
| } | |
| return out; | |
| } | |
| function snapshot() { | |
| return { | |
| registry: state.registry, runId: state.runId, goal: state.goal, | |
| status: state.status, startedAt: state.startedAt, agents: safeAgents(), | |
| memory: state.memory, events: state.events.slice(0, 80), | |
| arrows: state.arrows.slice(0, 20), plan: state.plan, metrics: state.metrics, | |
| internals: state.internals.slice(0, 60), | |
| scenarios: Object.keys(SCENARIOS), | |
| }; | |
| } | |
| // ββ Tools βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const TOOLS = { | |
| register_agent({ id, label, role = 'worker', model = '', reports_to = null, token_budget = 8192, color = null }) { | |
| const c = color || COLORS[role] || '#6b7280'; | |
| state.registry[id] = { id, label, role, model, reports_to, token_budget, color: c }; | |
| state.agents[id] = { ...state.registry[id], status: 'idle', tokens: 0, calls: 0 }; | |
| broadcast('registry', state.registry); | |
| broadcast('agents', safeAgents()); | |
| broadcast('event', { agent: id, event_type: 'registered', | |
| message: `${label} registered β role:${role}, model:${model || 'unset'}`, | |
| tokens: 0, latency_ms: 0, ts: Date.now() }); | |
| return { ok: true }; | |
| }, | |
| log_event({ agent, event_type, message, tokens = 0, latency_ms = 0, metadata = {} }) { | |
| ensureAgent(agent); | |
| const item = { agent, event_type, message, tokens, latency_ms, metadata, ts: Date.now() }; | |
| state.events.unshift(item); | |
| if (state.events.length > 200) state.events.pop(); | |
| if (tokens) { | |
| state.agents[agent].tokens += tokens; | |
| state.agents[agent].calls += 1; | |
| state.metrics.tokens += tokens; | |
| } | |
| state.metrics.steps++; | |
| broadcast('event', item); | |
| broadcast('metrics', state.metrics); | |
| broadcast('agents', safeAgents()); | |
| return { ok: true }; | |
| }, | |
| set_memory({ key, value, op = 'write' }) { | |
| state.memory[key] = { value, op, ts: Date.now() }; | |
| broadcast('memory', { key, value, op, ts: Date.now() }); | |
| return { ok: true }; | |
| }, | |
| set_agent_state({ agent_id, status }) { | |
| ensureAgent(agent_id); | |
| state.agents[agent_id].status = status; | |
| broadcast('agents', safeAgents()); | |
| return { ok: true }; | |
| }, | |
| trace_step({ from_agent, to_agent, label = '', arrow_type = 'msg' }) { | |
| ensureAgent(from_agent); ensureAgent(to_agent); | |
| const arrow = { from: from_agent, to: to_agent, label, arrow_type, ts: Date.now() }; | |
| state.arrows.unshift(arrow); | |
| if (state.arrows.length > 50) state.arrows.pop(); | |
| broadcast('arrow', arrow); | |
| return { ok: true }; | |
| }, | |
| set_plan({ tasks }) { state.plan = tasks; broadcast('plan', tasks); return { ok: true }; }, | |
| set_goal({ goal, run_id }) { | |
| state.goal = goal; state.runId = run_id || String(Date.now()); | |
| state.status = 'running'; state.startedAt = Date.now(); | |
| broadcast('goal', { goal, runId: state.runId }); | |
| broadcast('status', 'running'); | |
| return { ok: true }; | |
| }, | |
| finish_run({ status = 'done' }) { | |
| state.status = status; broadcast('status', status); return { ok: true }; | |
| }, | |
| // ββ Internal observability tools ββββββββββββββββββββββββββββββββββββββββββ | |
| log_embedding({ agent, text, model = 'text-embedding-3-small', dims = 1536, latency_ms = 0 }) { | |
| ensureAgent(agent); | |
| const item = { kind: 'embedding', agent, text: String(text).slice(0, 90), model, dims, latency_ms, ts: Date.now() }; | |
| state.internals.unshift(item); | |
| if (state.internals.length > 200) state.internals.pop(); | |
| broadcast('internal', item); | |
| return { ok: true }; | |
| }, | |
| log_retrieval({ agent, query, results = [], latency_ms = 0 }) { | |
| ensureAgent(agent); | |
| const item = { | |
| kind: 'retrieval', agent, | |
| query: String(query).slice(0, 90), | |
| results: results.slice(0, 6).map(r => ({ text: String(r.text || '').slice(0, 70), score: r.score ?? 0 })), | |
| latency_ms, ts: Date.now(), | |
| }; | |
| state.internals.unshift(item); | |
| if (state.internals.length > 200) state.internals.pop(); | |
| broadcast('internal', item); | |
| return { ok: true }; | |
| }, | |
| log_tool_call({ agent, tool_name, input = '', output = '', latency_ms = 0, error = null }) { | |
| ensureAgent(agent); | |
| const item = { | |
| kind: 'tool_call', agent, tool_name, | |
| input: String(input).slice(0, 4000), | |
| output: String(output).slice(0, 4000), | |
| latency_ms, error, ts: Date.now(), | |
| }; | |
| state.internals.unshift(item); | |
| if (state.internals.length > 200) state.internals.pop(); | |
| broadcast('internal', item); | |
| return { ok: true }; | |
| }, | |
| log_generation({ agent, prompt_tokens = 0, completion_tokens = 0, model = '', latency_ms = 0, stop_reason = 'stop', messages = [], response = null, thinking = null }) { | |
| ensureAgent(agent); | |
| const total = prompt_tokens + completion_tokens; | |
| const item = { | |
| kind: 'generation', agent, prompt_tokens, completion_tokens, total, model, latency_ms, stop_reason, | |
| messages: (messages||[]).slice(0,30).map(m => ({ role: String(m.role||'user'), content: String(m.content||'').slice(0,2000) })), | |
| response: response ? String(response).slice(0,4000) : null, | |
| thinking: thinking ? String(thinking).slice(0,3000) : null, | |
| ts: Date.now(), | |
| }; | |
| state.internals.unshift(item); | |
| if (state.internals.length > 200) state.internals.pop(); | |
| if (total) { | |
| state.agents[agent].tokens += total; | |
| state.agents[agent].calls += 1; | |
| state.metrics.tokens += total; | |
| } | |
| broadcast('internal', item); | |
| broadcast('agents', safeAgents()); | |
| broadcast('metrics', state.metrics); | |
| return { ok: true }; | |
| }, | |
| }; | |
| // alias: log_llm_turn β log_generation (richer name exposed in MCP) | |
| TOOLS.log_llm_turn = TOOLS.log_generation; | |
| // ββ Demo scenarios βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const SCENARIOS = { | |
| research_code: { | |
| goal: 'Explain quicksort and write a Python implementation', | |
| steps: [ | |
| { delay: 0, fn: () => { | |
| TOOLS.register_agent({ id: 'orchestrator', label: 'Orchestrator', role: 'orchestrator', model: 'claude-sonnet-4-20250514', token_budget: 16384 }); | |
| TOOLS.register_agent({ id: 'researcher', label: 'Researcher', role: 'researcher', model: 'claude-haiku-4-5-20251001', reports_to: 'orchestrator', token_budget: 8192 }); | |
| TOOLS.register_agent({ id: 'coder', label: 'Coder', role: 'coder', model: 'claude-sonnet-4-20250514', reports_to: 'orchestrator', token_budget: 8192 }); | |
| TOOLS.register_agent({ id: 'critic', label: 'Critic', role: 'critic', model: 'claude-haiku-4-5-20251001', reports_to: 'orchestrator', token_budget: 4096 }); | |
| }}, | |
| { delay: 800, fn: () => { | |
| TOOLS.set_goal({ goal: SCENARIOS.research_code.goal }); | |
| TOOLS.set_agent_state({ agent_id: 'orchestrator', status: 'running' }); | |
| TOOLS.log_generation({ agent: 'orchestrator', prompt_tokens: 280, completion_tokens: 95, model: 'claude-sonnet-4-20250514', latency_ms: 620, stop_reason: 'end_turn', | |
| messages: [ | |
| { role: 'system', content: 'You are an orchestrator agent. Break the user goal into subtasks and delegate to specialist agents: Researcher (theory/research), Coder (implementation), Critic (validation). Always plan before routing.' }, | |
| { role: 'user', content: 'Explain quicksort and write a Python implementation' }, | |
| ], | |
| response: "I'll break this into 3 sequential tasks:\n1. **Researcher** β explain quicksort: theory, O(n log n) complexity, partition schemes (Lomuto/Hoare)\n2. **Coder** β write a clean Python implementation with type hints, docstrings, and edge-case handling\n3. **Critic** β review code quality, correctness, and style\n\nRouting to Researcher first.", | |
| }); | |
| TOOLS.log_event({ agent: 'orchestrator', event_type: 'start', message: 'Planning tasksβ¦' }); | |
| }}, | |
| { delay: 900, fn: () => { | |
| TOOLS.set_plan({ tasks: [{ agent: 'researcher', task: 'Explain quicksort', depends_on: [] }, { agent: 'coder', task: 'Write Python implementation', depends_on: [0] }, { agent: 'critic', task: 'Validate code quality', depends_on: [1] }] }); | |
| TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'researcher', label: 'explain', arrow_type: 'msg' }); | |
| TOOLS.set_agent_state({ agent_id: 'researcher', status: 'running' }); | |
| TOOLS.set_memory({ key: 'goal', value: SCENARIOS.research_code.goal }); | |
| }}, | |
| // Researcher β embed query, web search, generate | |
| { delay: 400, fn: () => { | |
| TOOLS.log_embedding({ agent: 'researcher', text: 'quicksort algorithm explanation divide conquer', model: 'text-embedding-3-small', dims: 1536, latency_ms: 48 }); | |
| }}, | |
| { delay: 300, fn: () => { | |
| TOOLS.log_retrieval({ agent: 'researcher', query: 'quicksort algorithm complexity analysis', latency_ms: 92, | |
| results: [ | |
| { text: 'Quicksort uses divide-and-conquer: pick a pivot, partition into <, =, > subarrays.', score: 0.94 }, | |
| { text: 'Average-case O(n log n); worst-case O(nΒ²) with bad pivot selection.', score: 0.91 }, | |
| { text: 'Lomuto vs Hoare partition schemes differ in swap count and cache behaviour.', score: 0.87 }, | |
| { text: 'Introsort (used in STL) falls back to heapsort to avoid O(nΒ²) worst case.', score: 0.82 }, | |
| ], | |
| }); | |
| }}, | |
| { delay: 500, fn: () => { | |
| TOOLS.log_tool_call({ agent: 'researcher', tool_name: 'web_search', input: 'quicksort algorithm detailed explanation', output: '6 results β Wikipedia, CS Visualizer, CLRS excerpt', latency_ms: 340 }); | |
| }}, | |
| { delay: 900, fn: () => { | |
| TOOLS.log_generation({ agent: 'researcher', prompt_tokens: 1840, completion_tokens: 620, model: 'claude-haiku-4-5-20251001', latency_ms: 1320, stop_reason: 'end_turn', | |
| messages: [ | |
| { role: 'system', content: 'You are a researcher agent. Synthesise accurate technical information from retrieved documents. Be precise, cite complexity bounds, note tradeoffs.' }, | |
| { role: 'user', content: 'Task from orchestrator: Explain the quicksort algorithm in detail β theory, complexity, partition schemes.' }, | |
| { role: 'assistant', content: '[embedding query and retrieving relevant documentsβ¦]' }, | |
| { role: 'tool', content: 'Retrieved 4 chunks:\nβ’ Quicksort uses divide-and-conquer: pick a pivot, partition into <, =, > subarrays. (score 0.94)\nβ’ Average O(n log n); worst O(nΒ²) with bad pivot selection. (score 0.91)\nβ’ Lomuto vs Hoare partition differ in swap count and cache behaviour. (score 0.87)\nβ’ Introsort falls back to heapsort to avoid O(nΒ²) worst case. (score 0.82)' }, | |
| ], | |
| response: "**Quicksort** is a divide-and-conquer sorting algorithm:\n\n**Core strategy**: Choose a pivot element, partition the array into β€ pivot and > pivot halves, then recursively sort each half in-place.\n\n**Complexity**:\n- Average: O(n log n) β balanced splits with good pivot choice\n- Worst: O(nΒ²) β degenerate pivot on already-sorted input\n- Space: O(log n) stack depth average\n\n**Partition schemes**:\n- *Lomuto*: simpler code, last element as pivot, O(n) comparisons\n- *Hoare*: ~3Γ fewer swaps, two converging pointers\n\n**Practical optimisations**:\n- Median-of-3 pivot selection to avoid worst case\n- Switch to insertion sort for subarrays smaller than ~10 elements\n- Introsort (Python's Timsort variant) adds heapsort fallback for guaranteed O(n log n)", | |
| }); | |
| TOOLS.log_event({ agent: 'researcher', event_type: 'reply', message: 'Quicksort: divide-and-conquer. Pivot splits into <, =, > partitions. Avg O(n log n), worst O(nΒ²) with sorted input.' }); | |
| TOOLS.set_memory({ key: 'research', value: 'Quicksort: O(n log n) avg, O(nΒ²) worst. Lomuto/Hoare partition.' }); | |
| TOOLS.trace_step({ from_agent: 'researcher', to_agent: 'orchestrator', label: 'done', arrow_type: 'result' }); | |
| TOOLS.set_agent_state({ agent_id: 'researcher', status: 'done' }); | |
| }}, | |
| { delay: 500, fn: () => { | |
| TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'coder', label: 'implement', arrow_type: 'msg' }); | |
| TOOLS.set_agent_state({ agent_id: 'coder', status: 'running' }); | |
| }}, | |
| // Coder β retrieve code examples, execute sandbox, generate | |
| { delay: 400, fn: () => { | |
| TOOLS.log_embedding({ agent: 'coder', text: 'Python quicksort implementation with type hints', model: 'text-embedding-3-small', dims: 1536, latency_ms: 51 }); | |
| }}, | |
| { delay: 300, fn: () => { | |
| TOOLS.log_retrieval({ agent: 'coder', query: 'Python quicksort recursive + in-place implementation', latency_ms: 88, | |
| results: [ | |
| { text: 'def quicksort(arr): return [] if not arr else quicksort([x for x in arr[1:] if x<=arr[0]])+[arr[0]]+...', score: 0.96 }, | |
| { text: 'def partition(arr, lo, hi): pivot=arr[hi]; i=lo-1; ...', score: 0.93 }, | |
| { text: 'Lomuto partition: O(n) extra calls, simpler code. Hoare: fewer swaps.', score: 0.88 }, | |
| ], | |
| }); | |
| }}, | |
| { delay: 800, fn: () => { | |
| TOOLS.log_generation({ agent: 'coder', prompt_tokens: 3200, completion_tokens: 980, model: 'claude-sonnet-4-20250514', latency_ms: 2100, stop_reason: 'end_turn', | |
| messages: [ | |
| { role: 'system', content: 'You are a coder agent. Write production-quality Python code with type hints, docstrings, and edge-case handling. Use retrieved snippets as reference, not copy-paste.' }, | |
| { role: 'user', content: 'Task from orchestrator: Implement quicksort in Python. Context from researcher:\n- Quicksort: divide-and-conquer, pick pivot, partition into β€ and > halves\n- Avg O(n log n), worst O(nΒ²)\n- Lomuto partition is simpler; Hoare has fewer swaps' }, | |
| { role: 'assistant', content: '[retrieving code examples and planning implementationβ¦]' }, | |
| { role: 'tool', content: 'Retrieved snippets:\nβ’ def quicksort(arr): return [] if not arr else quicksort([x for x in arr[1:] if x<=arr[0]])+[arr[0]]+... (functional, not in-place)\nβ’ def partition(arr, lo, hi): pivot=arr[hi]; i=lo-1; ... (Lomuto, in-place)' }, | |
| ], | |
| response: "```python\nfrom typing import TypeVar\n\nT = TypeVar('T')\n\n\ndef quicksort(arr: list[T]) -> list[T]:\n \"\"\"Return a new sorted list using quicksort (functional style).\n\n Average O(n log n), worst-case O(nΒ²). Not in-place.\n \"\"\"\n if len(arr) <= 1:\n return arr\n pivot = arr[len(arr) // 2] # median-of-1 (simple)\n left = [x for x in arr if x < pivot]\n mid = [x for x in arr if x == pivot]\n right = [x for x in arr if x > pivot]\n return quicksort(left) + mid + quicksort(right)\n\n\ndef quicksort_inplace(arr: list[T], lo: int = 0, hi: int | None = None) -> None:\n \"\"\"Sort arr in-place using Lomuto partition scheme.\"\"\"\n if hi is None:\n hi = len(arr) - 1\n if lo < hi:\n p = _partition(arr, lo, hi)\n quicksort_inplace(arr, lo, p - 1)\n quicksort_inplace(arr, p + 1, hi)\n\n\ndef _partition(arr: list[T], lo: int, hi: int) -> int:\n pivot, i = arr[hi], lo - 1\n for j in range(lo, hi):\n if arr[j] <= pivot:\n i += 1\n arr[i], arr[j] = arr[j], arr[i]\n arr[i + 1], arr[hi] = arr[hi], arr[i + 1]\n return i + 1\n```\n\nBoth variants pass edge cases: empty list, single element, all-equal, reverse-sorted.", | |
| }); | |
| }}, | |
| { delay: 600, fn: () => { | |
| TOOLS.log_tool_call({ agent: 'coder', tool_name: 'code_execute', input: 'quicksort([3,1,4,1,5,9,2,6]) β smoke test', output: '[1, 1, 2, 3, 4, 5, 6, 9] β (12ms)', latency_ms: 112 }); | |
| TOOLS.log_tool_call({ agent: 'coder', tool_name: 'code_execute', input: 'quicksort([]) β edge case empty list', output: '[] β', latency_ms: 8 }); | |
| TOOLS.log_tool_call({ agent: 'coder', tool_name: 'code_execute', input: 'quicksort([1]) β single element', output: '[1] β', latency_ms: 6 }); | |
| }}, | |
| { delay: 900, fn: () => { | |
| TOOLS.log_event({ agent: 'coder', event_type: 'reply', message: 'quicksort() + quicksort_inplace() β full docstrings, Lomuto partition, all edge cases pass.' }); | |
| TOOLS.set_memory({ key: 'code', value: 'def quicksort(arr: list) -> list: ...\ndef quicksort_inplace(arr, lo, hi): ...' }); | |
| TOOLS.trace_step({ from_agent: 'coder', to_agent: 'orchestrator', label: 'ready', arrow_type: 'result' }); | |
| TOOLS.set_agent_state({ agent_id: 'coder', status: 'done' }); | |
| }}, | |
| { delay: 500, fn: () => { | |
| TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'critic', label: 'validate', arrow_type: 'msg' }); | |
| TOOLS.set_agent_state({ agent_id: 'critic', status: 'running' }); | |
| }}, | |
| // Critic β embed code, lint, generate review | |
| { delay: 400, fn: () => { | |
| TOOLS.log_embedding({ agent: 'critic', text: 'def quicksort(arr: list) -> list: ...', model: 'text-embedding-3-small', dims: 1536, latency_ms: 44 }); | |
| }}, | |
| { delay: 300, fn: () => { | |
| TOOLS.log_tool_call({ agent: 'critic', tool_name: 'lint_check', input: 'quicksort.py', output: 'pylint 9.8/10 β 0 errors, 1 convention (missing module docstring)', latency_ms: 180 }); | |
| TOOLS.log_tool_call({ agent: 'critic', tool_name: 'type_check', input: 'mypy quicksort.py --strict', output: 'Success: no issues found in 1 source file', latency_ms: 95 }); | |
| }}, | |
| { delay: 700, fn: () => { | |
| TOOLS.log_generation({ agent: 'critic', prompt_tokens: 2100, completion_tokens: 480, model: 'claude-haiku-4-5-20251001', latency_ms: 980, stop_reason: 'end_turn', | |
| messages: [ | |
| { role: 'system', content: 'You are a critic agent. Review code for correctness, style, type safety, and edge-case coverage. Output a score /10 with justification.' }, | |
| { role: 'user', content: 'Review this Python quicksort implementation:\n\ndef quicksort(arr: list[T]) -> list[T]: ...\ndef quicksort_inplace(arr, lo, hi): ...\n\nTool results: pylint 9.8/10, mypy strict: no issues.' }, | |
| ], | |
| response: "**PASS β 9/10**\n\nβ Type hints on public API (TypeVar T for generics)\nβ Docstrings explain complexity and behaviour\nβ Both functional and in-place variants provided\nβ Edge cases: empty list, single element return correctly\nβ mypy strict passes β no type errors\nβ pylint 9.8/10\n\n**Minor issues**:\n- Missing module-level docstring (-0.5)\n- `quicksort_inplace` docstring doesn't document `lo`/`hi` params (-0.5)\n- Pivot selection is not median-of-3 β can hit O(nΒ²) on nearly-sorted input (acceptable for demo)\n\nRecommendation: **approve for merge**. Add module docstring before production use.", | |
| }); | |
| TOOLS.log_event({ agent: 'critic', event_type: 'pass', message: 'PASS 9/10 β clean API, type-safe, edge cases covered. Minor: missing module docstring.' }); | |
| TOOLS.trace_step({ from_agent: 'critic', to_agent: 'orchestrator', label: 'pass 9/10', arrow_type: 'result' }); | |
| TOOLS.set_agent_state({ agent_id: 'critic', status: 'done' }); | |
| }}, | |
| { delay: 400, fn: () => { | |
| TOOLS.set_memory({ key: 'output', value: 'quicksort.py β approved 9/10' }); | |
| TOOLS.log_event({ agent: 'orchestrator', event_type: 'done', message: 'Run complete β 18 steps' }); | |
| TOOLS.set_agent_state({ agent_id: 'orchestrator', status: 'done' }); | |
| TOOLS.finish_run({ status: 'done' }); | |
| }}, | |
| ], | |
| }, | |
| critic_retry: { | |
| goal: 'Write an RFC-5321 compliant email regex validator', | |
| steps: [ | |
| { delay: 0, fn: () => { | |
| TOOLS.register_agent({ id: 'orchestrator', label: 'Orchestrator', role: 'orchestrator', model: 'claude-sonnet-4-20250514', token_budget: 16384 }); | |
| TOOLS.register_agent({ id: 'coder', label: 'Coder', role: 'coder', model: 'claude-sonnet-4-20250514', reports_to: 'orchestrator', token_budget: 8192 }); | |
| TOOLS.register_agent({ id: 'critic', label: 'Critic', role: 'critic', model: 'claude-haiku-4-5-20251001', reports_to: 'orchestrator', token_budget: 4096 }); | |
| }}, | |
| { delay: 700, fn: () => { | |
| TOOLS.set_goal({ goal: SCENARIOS.critic_retry.goal }); | |
| TOOLS.set_agent_state({ agent_id: 'orchestrator', status: 'running' }); | |
| TOOLS.log_generation({ agent: 'orchestrator', prompt_tokens: 240, completion_tokens: 80, model: 'claude-sonnet-4-20250514', latency_ms: 580 }); | |
| TOOLS.log_event({ agent: 'orchestrator', event_type: 'start', message: 'Planningβ¦' }); | |
| }}, | |
| { delay: 800, fn: () => { | |
| TOOLS.set_plan({ tasks: [{ agent: 'coder', task: 'Write RFC-5321 email regex', depends_on: [] }, { agent: 'critic', task: 'Validate regex correctness', depends_on: [0] }] }); | |
| TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'coder', label: 'write', arrow_type: 'msg' }); | |
| TOOLS.set_agent_state({ agent_id: 'coder', status: 'running' }); | |
| }}, | |
| // Coder v1 β minimal attempt | |
| { delay: 400, fn: () => { | |
| TOOLS.log_embedding({ agent: 'coder', text: 'RFC-5321 email address validation regex Python', model: 'text-embedding-3-small', dims: 1536, latency_ms: 49 }); | |
| }}, | |
| { delay: 300, fn: () => { | |
| TOOLS.log_retrieval({ agent: 'coder', query: 'email regex RFC 5321 compliant Python', latency_ms: 84, | |
| results: [ | |
| { text: 'Simple: r"[^@]+@[^@]+\\.[^@]+" β catches most but misses edge cases.', score: 0.89 }, | |
| { text: 'RFC-5321 allows quoted strings, IP literals, special chars in local part.', score: 0.85 }, | |
| ], | |
| }); | |
| }}, | |
| { delay: 900, fn: () => { | |
| TOOLS.log_generation({ agent: 'coder', prompt_tokens: 920, completion_tokens: 240, model: 'claude-sonnet-4-20250514', latency_ms: 1800, stop_reason: 'end_turn' }); | |
| TOOLS.log_tool_call({ agent: 'coder', tool_name: 'code_execute', input: 'test_email("user@example.com")', output: 'True β', latency_ms: 14 }); | |
| TOOLS.log_event({ agent: 'coder', event_type: 'reply', message: 'Draft v1: r"[^@]+" β covers basic cases.' }); | |
| TOOLS.set_memory({ key: 'code', value: 'r"[^@]+"' }); | |
| TOOLS.trace_step({ from_agent: 'coder', to_agent: 'orchestrator', label: 'v1', arrow_type: 'result' }); | |
| TOOLS.set_agent_state({ agent_id: 'coder', status: 'active' }); | |
| }}, | |
| // Critic v1 review β fail | |
| { delay: 500, fn: () => { | |
| TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'critic', label: 'review v1', arrow_type: 'msg' }); | |
| TOOLS.set_agent_state({ agent_id: 'critic', status: 'running' }); | |
| }}, | |
| { delay: 400, fn: () => { | |
| TOOLS.log_embedding({ agent: 'critic', text: 'r"[^@]+" email regex RFC-5321 compliance', model: 'text-embedding-3-small', dims: 1536, latency_ms: 46 }); | |
| TOOLS.log_tool_call({ agent: 'critic', tool_name: 'regex_test_suite', input: 'RFC-5321 test vectors (120 cases)', output: '67/120 pass β missing TLDs, quoted strings, IP literals, consecutive dot check', latency_ms: 220 }); | |
| }}, | |
| { delay: 700, fn: () => { | |
| TOOLS.log_generation({ agent: 'critic', prompt_tokens: 1400, completion_tokens: 360, model: 'claude-haiku-4-5-20251001', latency_ms: 980, stop_reason: 'end_turn' }); | |
| TOOLS.log_event({ agent: 'critic', event_type: 'fail', message: 'FAIL 4/10 β 67/120 test vectors pass. Missing: TLDs, quoted strings, IP literals, consecutive-dot rule.' }); | |
| TOOLS.set_memory({ key: 'critique', value: 'fail 4/10 β missing TLDs, quoted strings, IP literals' }); | |
| TOOLS.trace_step({ from_agent: 'critic', to_agent: 'orchestrator', label: 'fail 4/10', arrow_type: 'result' }); | |
| TOOLS.set_agent_state({ agent_id: 'critic', status: 'active' }); | |
| state.metrics.retries++; broadcast('metrics', state.metrics); | |
| }}, | |
| // Orchestrator retries coder | |
| { delay: 500, fn: () => { | |
| TOOLS.log_generation({ agent: 'orchestrator', prompt_tokens: 480, completion_tokens: 120, model: 'claude-sonnet-4-20250514', latency_ms: 640 }); | |
| TOOLS.log_event({ agent: 'orchestrator', event_type: 'retry', message: 'Critic FAIL β retrying Coder with full critique attached' }); | |
| TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'coder', label: 'retry', arrow_type: 'retry' }); | |
| TOOLS.set_agent_state({ agent_id: 'coder', status: 'running' }); | |
| }}, | |
| // Coder v2 β thorough attempt | |
| { delay: 400, fn: () => { | |
| TOOLS.log_embedding({ agent: 'coder', text: 'RFC-5321 quoted strings IP literal TLD validation', model: 'text-embedding-3-small', dims: 1536, latency_ms: 52 }); | |
| TOOLS.log_retrieval({ agent: 'coder', query: 'RFC 5321 email local-part quoted string IP literal syntax', latency_ms: 96, | |
| results: [ | |
| { text: 'Local part: atom or quoted-string. Quoted allows spaces, special chars within double quotes.', score: 0.95 }, | |
| { text: 'Domain: hostname or IP literal [n.n.n.n]. TLD must be 2+ alpha chars.', score: 0.93 }, | |
| { text: 'No consecutive dots in local or domain part. No leading/trailing dot.', score: 0.91 }, | |
| ], | |
| }); | |
| }}, | |
| { delay: 1200, fn: () => { | |
| TOOLS.log_generation({ agent: 'coder', prompt_tokens: 2800, completion_tokens: 780, model: 'claude-sonnet-4-20250514', latency_ms: 2600, stop_reason: 'end_turn' }); | |
| }}, | |
| { delay: 600, fn: () => { | |
| TOOLS.log_tool_call({ agent: 'coder', tool_name: 'code_execute', input: 'RFC-5321 test suite β 120 vectors', output: '118/120 pass (2 obscure IPv6 edge cases)', latency_ms: 340 }); | |
| TOOLS.log_event({ agent: 'coder', event_type: 'reply', message: 'Draft v2: RFC-5321 compliant β TLD check, quoted strings, IP literals, consecutive-dot guard.' }); | |
| TOOLS.set_memory({ key: 'code', value: 'RFC5321_RE = re.compile(r\'...\') # 118/120 RFC vectors pass' }); | |
| TOOLS.trace_step({ from_agent: 'coder', to_agent: 'orchestrator', label: 'v2', arrow_type: 'result' }); | |
| TOOLS.set_agent_state({ agent_id: 'coder', status: 'done' }); | |
| }}, | |
| // Critic v2 review β pass | |
| { delay: 500, fn: () => { | |
| TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'critic', label: 'review v2', arrow_type: 'msg' }); | |
| TOOLS.set_agent_state({ agent_id: 'critic', status: 'running' }); | |
| }}, | |
| { delay: 400, fn: () => { | |
| TOOLS.log_tool_call({ agent: 'critic', tool_name: 'regex_test_suite', input: 'RFC-5321 test vectors (120 cases)', output: '118/120 pass β 2 obscure IPv6 literals; acceptable for prod use', latency_ms: 215 }); | |
| }}, | |
| { delay: 700, fn: () => { | |
| TOOLS.log_generation({ agent: 'critic', prompt_tokens: 1600, completion_tokens: 320, model: 'claude-haiku-4-5-20251001', latency_ms: 860, stop_reason: 'end_turn' }); | |
| TOOLS.log_event({ agent: 'critic', event_type: 'pass', message: 'PASS 9/10 β 118/120 RFC vectors pass, production-ready.' }); | |
| TOOLS.trace_step({ from_agent: 'critic', to_agent: 'orchestrator', label: 'pass 9/10', arrow_type: 'result' }); | |
| TOOLS.set_agent_state({ agent_id: 'critic', status: 'done' }); | |
| }}, | |
| { delay: 400, fn: () => { | |
| TOOLS.log_event({ agent: 'orchestrator', event_type: 'done', message: 'Complete after 1 retry β 1 retry, 20 steps' }); | |
| TOOLS.set_agent_state({ agent_id: 'orchestrator', status: 'done' }); | |
| TOOLS.finish_run({ status: 'done' }); | |
| }}, | |
| ], | |
| }, | |
| memory_overflow: { | |
| goal: 'Summarise 3 ML papers and synthesise into a report', | |
| steps: [ | |
| { delay: 0, fn: () => { | |
| TOOLS.register_agent({ id: 'orchestrator', label: 'Orchestrator', role: 'orchestrator', model: 'claude-sonnet-4-20250514', token_budget: 16384 }); | |
| TOOLS.register_agent({ id: 'researcher', label: 'Researcher', role: 'researcher', model: 'claude-haiku-4-5-20251001', reports_to: 'orchestrator', token_budget: 8192 }); | |
| TOOLS.register_agent({ id: 'synthesiser', label: 'Synthesiser', role: 'synthesiser', model: 'claude-sonnet-4-20250514', reports_to: 'orchestrator', token_budget: 8192 }); | |
| TOOLS.register_agent({ id: 'critic', label: 'Critic', role: 'critic', model: 'claude-haiku-4-5-20251001', reports_to: 'orchestrator', token_budget: 4096 }); | |
| }}, | |
| { delay: 700, fn: () => { | |
| TOOLS.set_goal({ goal: SCENARIOS.memory_overflow.goal }); | |
| TOOLS.set_agent_state({ agent_id: 'orchestrator', status: 'running' }); | |
| TOOLS.log_generation({ agent: 'orchestrator', prompt_tokens: 260, completion_tokens: 88, model: 'claude-sonnet-4-20250514', latency_ms: 600 }); | |
| TOOLS.log_event({ agent: 'orchestrator', event_type: 'start', message: 'Planning 3-paper synthesisβ¦' }); | |
| }}, | |
| { delay: 900, fn: () => { | |
| TOOLS.set_plan({ tasks: [{ agent: 'researcher', task: 'Summarise paper A β scaling laws', depends_on: [] }, { agent: 'researcher', task: 'Summarise paper B β MoE routing', depends_on: [] }, { agent: 'researcher', task: 'Summarise paper C β RLHF hacking', depends_on: [] }, { agent: 'synthesiser', task: 'Synthesise into report', depends_on: [0,1,2] }] }); | |
| TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'researcher', label: 'paper A', arrow_type: 'msg' }); | |
| TOOLS.set_agent_state({ agent_id: 'researcher', status: 'running' }); | |
| }}, | |
| // Paper A | |
| { delay: 400, fn: () => { | |
| TOOLS.log_tool_call({ agent: 'researcher', tool_name: 'pdf_extract', input: 'scaling_laws_2020.pdf', output: '18,400 tokens extracted β 42 pages', latency_ms: 480 }); | |
| TOOLS.log_embedding({ agent: 'researcher', text: 'neural scaling laws loss compute data parameters', model: 'text-embedding-3-small', dims: 1536, latency_ms: 55 }); | |
| }}, | |
| { delay: 600, fn: () => { | |
| TOOLS.log_retrieval({ agent: 'researcher', query: 'key findings scaling laws compute-optimal training', latency_ms: 104, | |
| results: [ | |
| { text: 'Loss scales as power law with N (params), D (data), C (compute): L β N^0.076.', score: 0.97 }, | |
| { text: 'Compute-optimal: scale params and data proportionally. Chinchilla law.', score: 0.94 }, | |
| { text: 'Irreducible loss β 1.69 nats; emergent capabilities at scale thresholds.', score: 0.88 }, | |
| ], | |
| }); | |
| TOOLS.log_generation({ agent: 'researcher', prompt_tokens: 2400, completion_tokens: 520, model: 'claude-haiku-4-5-20251001', latency_ms: 1600, stop_reason: 'end_turn' }); | |
| TOOLS.log_event({ agent: 'researcher', event_type: 'reply', message: 'Paper A: Scaling laws β loss β N^0.076. Compute-optimal: equal param/data scaling.' }); | |
| TOOLS.set_memory({ key: 'paper_a', value: 'Scaling laws: loss β N^0.076, Chinchilla-optimal' }); | |
| TOOLS.trace_step({ from_agent: 'researcher', to_agent: 'orchestrator', label: 'A done', arrow_type: 'result' }); | |
| }}, | |
| // Paper B | |
| { delay: 400, fn: () => { | |
| TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'researcher', label: 'paper B', arrow_type: 'msg' }); | |
| TOOLS.log_tool_call({ agent: 'researcher', tool_name: 'pdf_extract', input: 'moe_routing_2023.pdf', output: '22,100 tokens extracted β 51 pages', latency_ms: 520 }); | |
| TOOLS.log_embedding({ agent: 'researcher', text: 'mixture of experts routing sparse transformer efficiency', model: 'text-embedding-3-small', dims: 1536, latency_ms: 53 }); | |
| }}, | |
| { delay: 600, fn: () => { | |
| TOOLS.log_retrieval({ agent: 'researcher', query: 'MoE routing top-k expert selection load balancing', latency_ms: 98, | |
| results: [ | |
| { text: 'Top-2 routing: each token sent to 2 of N experts. 60% active-param reduction vs dense.', score: 0.96 }, | |
| { text: 'Load balancing loss prevents expert collapse. Jitter noise aids exploration.', score: 0.92 }, | |
| { text: 'Switch Transformer: top-1 routing, simpler but prone to collapse without aux loss.', score: 0.87 }, | |
| ], | |
| }); | |
| TOOLS.log_generation({ agent: 'researcher', prompt_tokens: 2800, completion_tokens: 490, model: 'claude-haiku-4-5-20251001', latency_ms: 1500, stop_reason: 'end_turn' }); | |
| TOOLS.log_event({ agent: 'researcher', event_type: 'reply', message: 'Paper B: MoE top-2 routing, 60% active-param reduction. Load-balance aux loss prevents collapse.' }); | |
| TOOLS.set_memory({ key: 'paper_b', value: 'MoE: top-2 routing, 60% reduction, aux load-balance loss' }); | |
| TOOLS.trace_step({ from_agent: 'researcher', to_agent: 'orchestrator', label: 'B done', arrow_type: 'result' }); | |
| }}, | |
| // Paper C β triggers memory pressure | |
| { delay: 400, fn: () => { | |
| TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'researcher', label: 'paper C', arrow_type: 'msg' }); | |
| TOOLS.log_tool_call({ agent: 'researcher', tool_name: 'pdf_extract', input: 'rlhf_reward_hacking_2024.pdf', output: '31,200 tokens extracted β 68 pages', latency_ms: 710 }); | |
| TOOLS.log_embedding({ agent: 'researcher', text: 'RLHF reward hacking overoptimisation KL penalty', model: 'text-embedding-3-small', dims: 1536, latency_ms: 58 }); | |
| }}, | |
| { delay: 600, fn: () => { | |
| TOOLS.log_retrieval({ agent: 'researcher', query: 'reward hacking frequency mitigation strategies RLHF', latency_ms: 112, | |
| results: [ | |
| { text: 'Reward hacking observed in 34% of runs beyond 3000 RL steps. KL alone insufficient.', score: 0.95 }, | |
| { text: 'Constitutional AI + process reward models reduce hacking to <8%.', score: 0.91 }, | |
| { text: 'Ensemble reward models provide more robust signal than single RM.', score: 0.88 }, | |
| ], | |
| }); | |
| TOOLS.log_generation({ agent: 'researcher', prompt_tokens: 3200, completion_tokens: 560, model: 'claude-haiku-4-5-20251001', latency_ms: 1800, stop_reason: 'end_turn' }); | |
| TOOLS.log_event({ agent: 'researcher', event_type: 'reply', message: 'Paper C: RLHF reward hacking in 34% of runs. KL penalty alone insufficient; ensemble RMs help.' }); | |
| TOOLS.set_memory({ key: 'paper_c', value: 'RLHF: reward hacking 34%, use ensemble RMs + CAI' }); | |
| TOOLS.trace_step({ from_agent: 'researcher', to_agent: 'orchestrator', label: 'C done', arrow_type: 'result' }); | |
| TOOLS.set_agent_state({ agent_id: 'researcher', status: 'done' }); | |
| }}, | |
| // Synthesiser β context overflow | |
| { delay: 600, fn: () => { | |
| TOOLS.trace_step({ from_agent: 'orchestrator', to_agent: 'synthesiser', label: 'synthesise', arrow_type: 'msg' }); | |
| TOOLS.set_agent_state({ agent_id: 'synthesiser', status: 'running' }); | |
| }}, | |
| { delay: 400, fn: () => { | |
| TOOLS.log_embedding({ agent: 'synthesiser', text: 'scaling laws MoE routing RLHF reward hacking synthesis', model: 'text-embedding-3-small', dims: 1536, latency_ms: 62 }); | |
| TOOLS.log_tool_call({ agent: 'synthesiser', tool_name: 'context_count', input: 'papers A+B+C combined tokens', output: '7,840 / 8,192 tokens used (95.7%) β paper C will be truncated', latency_ms: 12 }); | |
| TOOLS.log_event({ agent: 'synthesiser', event_type: 'warn', message: 'WARNING: context at 95.7% β paper C (RLHF) will be truncated to fit budget.' }); | |
| }}, | |
| { delay: 1200, fn: () => { | |
| TOOLS.log_generation({ agent: 'synthesiser', prompt_tokens: 7840, completion_tokens: 980, model: 'claude-sonnet-4-20250514', latency_ms: 3200, stop_reason: 'max_tokens' }); | |
| TOOLS.log_event({ agent: 'synthesiser', event_type: 'reply', message: 'Report done (partial): scaling laws + MoE full coverage; RLHF section truncated β recommend re-running with chunked context.' }); | |
| TOOLS.set_memory({ key: 'output', value: 'Report: scaling (full) + MoE (full) + RLHF (truncated)' }); | |
| TOOLS.trace_step({ from_agent: 'synthesiser', to_agent: 'orchestrator', label: 'report', arrow_type: 'result' }); | |
| TOOLS.set_agent_state({ agent_id: 'synthesiser', status: 'done' }); | |
| }}, | |
| { delay: 400, fn: () => { | |
| TOOLS.log_event({ agent: 'orchestrator', event_type: 'done', message: 'Complete β context overflow on paper C. Recommend chunked summarisation for large doc sets.' }); | |
| TOOLS.set_agent_state({ agent_id: 'orchestrator', status: 'done' }); | |
| TOOLS.finish_run({ status: 'done' }); | |
| }}, | |
| ], | |
| }, | |
| }; | |
| function runScenario(name) { | |
| const s = SCENARIOS[name]; | |
| if (!s) return false; | |
| const clients = state.clients; | |
| state = fresh(); | |
| state.clients = clients; | |
| broadcast('reset', {}); | |
| let cum = 0; | |
| s.steps.forEach(step => { cum += step.delay; setTimeout(() => { try { step.fn(); } catch (e) { console.error(e); } }, cum); }); | |
| return true; | |
| } | |
| // ββ Dashboard HTML βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const HTML = fs.readFileSync(path.join(__dirname, 'dashboard.html'), 'utf8'); | |
| // ββ HTTP helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const CORS = { | |
| 'Access-Control-Allow-Origin': '*', | |
| 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS', | |
| 'Access-Control-Allow-Headers': 'Content-Type', | |
| }; | |
| function body(req, cb) { let d = ''; req.on('data', c => d += c); req.on('end', () => cb(d)); } | |
| function json(res, data, status = 200) { | |
| res.writeHead(status, { ...CORS, 'Content-Type': 'application/json' }); | |
| res.end(JSON.stringify(data)); | |
| } | |
| // ββ HTTP server ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const server = http.createServer((req, res) => { | |
| if (req.method === 'OPTIONS') { res.writeHead(204, CORS); res.end(); return; } | |
| // Dashboard UI | |
| if (req.method === 'GET' && (req.url === '/' || req.url === '/index.html')) { | |
| res.writeHead(200, { 'Content-Type': 'text/html' }); | |
| res.end(HTML); | |
| return; | |
| } | |
| // SSE stream | |
| if (req.method === 'GET' && req.url === '/events') { | |
| res.writeHead(200, { ...CORS, 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive' }); | |
| res.write(`data: ${JSON.stringify({ type: 'init', payload: { state: snapshot() }, ts: Date.now() })}\n\n`); | |
| state.clients.push(res); | |
| req.on('close', () => { state.clients = state.clients.filter(c => c !== res); }); | |
| return; | |
| } | |
| // Current state snapshot | |
| if (req.method === 'GET' && req.url === '/state') { | |
| json(res, snapshot()); return; | |
| } | |
| // Tool call | |
| if (req.method === 'POST' && req.url === '/tool') { | |
| body(req, data => { | |
| try { | |
| const { tool, args } = JSON.parse(data); | |
| const fn = TOOLS[tool]; | |
| json(res, fn ? fn(args || {}) : { error: `Unknown tool: ${tool}` }); | |
| } catch (e) { json(res, { error: e.message }, 400); } | |
| }); return; | |
| } | |
| // Run a demo scenario | |
| if (req.method === 'POST' && req.url === '/emulate') { | |
| body(req, data => { | |
| const { scenario } = JSON.parse(data || '{}'); | |
| const ok = runScenario(scenario || 'research_code'); | |
| json(res, { ok, scenario }, ok ? 200 : 400); | |
| }); return; | |
| } | |
| // Reset state | |
| if (req.method === 'POST' && req.url === '/reset') { | |
| const clients = state.clients; | |
| state = fresh(); state.clients = clients; | |
| broadcast('reset', {}); | |
| json(res, { ok: true }); return; | |
| } | |
| json(res, { error: 'Not found' }, 404); | |
| }); | |
| server.listen(PORT, () => { | |
| console.log(`\n agent-visibility\n`); | |
| console.log(` Dashboard β http://localhost:${PORT}`); | |
| console.log(` Tool POST β http://localhost:${PORT}/tool`); | |
| console.log(` Ctrl+C to stop\n`); | |
| }); | |