const { Room, RoomEvent, Track, createLocalAudioTrack } = LivekitClient; const statusEl = document.getElementById("status"); const statusDot = document.getElementById("status-dot"); const connectBtn = document.getElementById("connect"); const disconnectBtn = document.getElementById("disconnect"); const muteBtn = document.getElementById("mute"); const canvas = document.getElementById("wave"); const remoteAudio = document.getElementById("remote-audio"); const ctx = canvas.getContext("2d"); let room = null; let localTrack = null; let remoteAudioTrack = null; let analyser = null; let audioContext = null; let animationId = null; let muted = false; let resizeObserver = null; let currentSessionId = null; let currentRoomName = null; let activeConnectionSeq = 0; let connectionState = "idle"; const AUDIO_DIAGNOSTICS = false; const CONNECTION_STATES = Object.freeze({ IDLE: "idle", CONNECTING: "connecting", CONNECTED: "connected", DISCONNECTING: "disconnecting", }); let averages = { eouDelay: [], llmTtft: [], llmToTtsHandoff: [], voiceGeneration: [], totalLatency: [], }; const LIVE_METRIC_IDS = [ "eou", "llm-ttft", "voice-generation", "total", ]; const pipelineStageRowEl = document.getElementById("pipeline-stage-row"); const handoffCardEl = document.getElementById("live-handoff-card"); const voiceGenerationStepEl = document.getElementById("live-voice-generation-step"); let activeLiveSpeechId = null; let liveTurnValues = createEmptyLiveTurnValues(); function createEmptyLiveTurnValues() { return { eouDelay: null, llmTtft: null, llmToTtsHandoff: null, ttsTtfb: null, totalLatency: null, }; } // Initialize canvas sizing on load window.addEventListener('DOMContentLoaded', () => { resizeCanvas(); // Watch for container size changes resizeObserver = new ResizeObserver(() => { resizeCanvas(); }); resizeObserver.observe(canvas.parentElement); }); function setStatus(text, state) { statusEl.textContent = text; statusDot.className = "status-dot"; if (state === "connected") statusDot.classList.add("connected"); else if (state === "connecting") statusDot.classList.add("connecting"); } function setConnectionState(nextState) { connectionState = nextState; connectBtn.disabled = connectionState !== CONNECTION_STATES.IDLE; disconnectBtn.disabled = connectionState !== CONNECTION_STATES.CONNECTED; muteBtn.disabled = connectionState !== CONNECTION_STATES.CONNECTED; } function resetMuteButton() { muteBtn.innerHTML = ` Mute`; } function clearRemoteAudio() { remoteAudio.pause(); remoteAudio.srcObject = null; remoteAudio.removeAttribute("src"); remoteAudio.load(); } function getMediaTrackSettings(track) { const mediaTrack = track && track.mediaStreamTrack; if (!mediaTrack || typeof mediaTrack.getSettings !== "function") { return {}; } try { return mediaTrack.getSettings() || {}; } catch (_error) { return {}; } } function logAudioDiagnostics(eventName, details = {}) { if (!AUDIO_DIAGNOSTICS) return; console.info("[audio-diagnostics]", eventName, { timestamp: new Date().toISOString(), sessionId: currentSessionId, roomName: currentRoomName, ...details, }); } function detachRemoteAudioTrack(track, reason) { if (!track || track.kind !== Track.Kind.Audio) return; try { track.detach(remoteAudio); } catch (error) { console.warn("Failed to detach remote audio track:", error); } if (remoteAudioTrack === track) { remoteAudioTrack = null; } clearRemoteAudio(); logAudioDiagnostics("remote_track_detached", { reason, trackSid: track.sid || null, }); } function attachRemoteAudioTrack(track, participant) { if (!track || track.kind !== Track.Kind.Audio) return; if (remoteAudioTrack && remoteAudioTrack !== track) { detachRemoteAudioTrack(remoteAudioTrack, "replaced_by_new_track"); } remoteAudioTrack = track; track.attach(remoteAudio); const trackSettings = getMediaTrackSettings(track); logAudioDiagnostics("remote_track_subscribed", { participantIdentity: participant && participant.identity ? participant.identity : null, trackSid: track.sid || null, trackSampleRate: trackSettings.sampleRate ?? null, trackSettings, remotePlaybackRate: remoteAudio.playbackRate, remoteDefaultPlaybackRate: remoteAudio.defaultPlaybackRate, }); const playPromise = remoteAudio.play(); if (playPromise && typeof playPromise.catch === "function") { playPromise.catch((error) => { console.warn("Remote audio playback did not auto-start:", error); }); } } function clearWave() { ctx.clearRect(0, 0, canvas.width, canvas.height); } function drawWave() { if (!analyser) { clearWave(); return; } const bufferLength = analyser.frequencyBinCount; const dataArray = new Uint8Array(bufferLength); analyser.getByteFrequencyData(dataArray); const w = canvas.width; const h = canvas.height; ctx.clearRect(0, 0, w, h); const barCount = 64; const step = Math.floor(bufferLength / barCount); const gap = 3; const barWidth = (w - gap * (barCount - 1)) / barCount; const centerY = h / 2; const maxBarHeight = h * 0.85; for (let i = 0; i < barCount; i++) { const raw = dataArray[i * step] || 0; const normalized = raw / 255; const eased = normalized * normalized; const barHeight = Math.max(3, eased * maxBarHeight); const halfHeight = barHeight / 2; const x = i * (barWidth + gap); const y = centerY - halfHeight; const intensity = 0.25 + normalized * 0.75; const r = Math.round(108 * intensity); const g = Math.round(143 * intensity); const b = Math.round(255 * intensity); ctx.beginPath(); const radius = Math.min(barWidth / 2, 3); roundRect(ctx, x, y, barWidth, barHeight, radius); ctx.fillStyle = `rgba(${r}, ${g}, ${b}, ${0.4 + normalized * 0.6})`; ctx.fill(); if (normalized > 0.3) { ctx.shadowColor = `rgba(108, 143, 255, ${normalized * 0.4})`; ctx.shadowBlur = 8; ctx.fill(); ctx.shadowColor = "transparent"; ctx.shadowBlur = 0; } } animationId = window.requestAnimationFrame(drawWave); } function roundRect(context, x, y, w, h, r) { if (w < 2 * r) r = w / 2; if (h < 2 * r) r = h / 2; context.moveTo(x + r, y); context.arcTo(x + w, y, x + w, y + h, r); context.arcTo(x + w, y + h, x, y + h, r); context.arcTo(x, y + h, x, y, r); context.arcTo(x, y, x + w, y, r); context.closePath(); } function resizeCanvas() { const container = canvas.parentElement; const containerWidth = container.clientWidth; const containerHeight = container.clientHeight; // Maintain aspect ratio, but scale to fit container const maxWidth = 900; const aspectRatio = 900 / 200; // Original aspect ratio let canvasWidth = Math.min(containerWidth, maxWidth); let canvasHeight = Math.round(canvasWidth / aspectRatio); // If height exceeds container, scale down if (canvasHeight > containerHeight - 40) { // 40px for padding canvasHeight = containerHeight - 40; canvasWidth = Math.round(canvasHeight * aspectRatio); } // Ensure minimum size canvasWidth = Math.max(canvasWidth, 400); canvasHeight = Math.max(canvasHeight, 150); // Update canvas dimensions canvas.width = canvasWidth; canvas.height = canvasHeight; canvas.style.width = `${canvasWidth}px`; canvas.style.height = `${canvasHeight}px`; } function setupAnalyser(track) { if (!track) return; audioContext = new AudioContext(); analyser = audioContext.createAnalyser(); analyser.fftSize = 512; analyser.smoothingTimeConstant = 0.8; const stream = new MediaStream([track.mediaStreamTrack]); const source = audioContext.createMediaStreamSource(stream); source.connect(analyser); const localTrackSettings = getMediaTrackSettings(track); logAudioDiagnostics("local_analyser_ready", { localTrackSampleRate: localTrackSettings.sampleRate ?? null, localTrackSettings, audioContextSampleRate: audioContext.sampleRate, }); drawWave(); } async function fetchSessionBootstrap() { if (!SESSION_BOOTSTRAP_URL || !LIVEKIT_URL) { throw new Error("Missing LiveKit configuration"); } const response = await fetch(`${SESSION_BOOTSTRAP_URL}?t=${Date.now()}`, { method: "GET", cache: "no-store", }); if (!response.ok) { let message = `bootstrap request failed (${response.status})`; try { const body = await response.json(); if (typeof body.message === "string" && body.message) { message = body.message; } } catch (_ignored) { // Keep default message when response is not JSON. } throw new Error(message); } return response.json(); } async function connectToRoom() { if (connectionState !== CONNECTION_STATES.IDLE) return; const connectionSeq = ++activeConnectionSeq; setConnectionState(CONNECTION_STATES.CONNECTING); setStatus("Preparing session...", "connecting"); let nextRoom = null; try { const bootstrap = await fetchSessionBootstrap(); if (connectionSeq !== activeConnectionSeq) return; currentSessionId = bootstrap.session_id || crypto.randomUUID(); currentRoomName = bootstrap.room_name || null; if (!bootstrap.token) { throw new Error("Session bootstrap did not return a token"); } setStatus( `Connecting to ${currentRoomName || "room"}...`, "connecting" ); nextRoom = new Room(); room = nextRoom; resetMetrics(); if (remoteAudioTrack) { detachRemoteAudioTrack(remoteAudioTrack, "before_new_room_connect"); } else { clearRemoteAudio(); } nextRoom.on(RoomEvent.TrackSubscribed, (track, publication, participant) => { if (room !== nextRoom || connectionSeq !== activeConnectionSeq) return; if (track.kind === Track.Kind.Audio) { attachRemoteAudioTrack(track, participant); setStatus("Agent streaming", "connected"); } }); nextRoom.on(RoomEvent.TrackUnsubscribed, (track, publication, participant) => { if (room !== nextRoom || connectionSeq !== activeConnectionSeq) return; if (track.kind !== Track.Kind.Audio) return; detachRemoteAudioTrack(track, "track_unsubscribed"); logAudioDiagnostics("remote_track_unsubscribed", { participantIdentity: participant && participant.identity ? participant.identity : null, trackSid: track.sid || null, }); }); nextRoom.on(RoomEvent.Disconnected, () => { if (room !== nextRoom || connectionSeq !== activeConnectionSeq) return; room = null; if (localTrack) { localTrack.stop(); } localTrack = null; currentSessionId = null; currentRoomName = null; muted = false; resetMuteButton(); if (remoteAudioTrack) { detachRemoteAudioTrack(remoteAudioTrack, "room_disconnected"); } else { clearRemoteAudio(); } cleanupWave(); resetMetrics(); setConnectionState(CONNECTION_STATES.IDLE); setStatus("Disconnected", ""); }); nextRoom.on(RoomEvent.DataReceived, (data, participant, kind, topic) => { if (room !== nextRoom || connectionSeq !== activeConnectionSeq) return; if (topic === "metrics") { const decoder = new TextDecoder("utf-8"); const jsonStr = decoder.decode(data); try { const metricsData = JSON.parse(jsonStr); if (metricsData.type === "metrics_live_update") { if (metricsData.diagnostic === true) return; handleLiveTurnBoundary(metricsData); updateLiveMetrics(metricsData); } else if (metricsData.type === "conversation_turn") { if (metricsData.role === "agent") { updateLiveMetrics(metricsData); } renderTurn(metricsData); } } catch (error) { console.error("Failed to parse metrics:", error); } } }); await nextRoom.connect(LIVEKIT_URL, bootstrap.token); if (room !== nextRoom || connectionSeq !== activeConnectionSeq) return; localTrack = await createLocalAudioTrack(); if (room !== nextRoom || connectionSeq !== activeConnectionSeq) { localTrack.stop(); localTrack = null; return; } await nextRoom.localParticipant.publishTrack(localTrack); setupAnalyser(localTrack); muted = false; resetMuteButton(); setConnectionState(CONNECTION_STATES.CONNECTED); setStatus(`Mic streaming (${currentRoomName || "connected"})`, "connected"); } catch (error) { if (localTrack) { localTrack.stop(); localTrack = null; } if (room === nextRoom) { try { await room.disconnect(); } catch (disconnectError) { console.warn("Failed to disconnect after connect error:", disconnectError); } room = null; } currentSessionId = null; currentRoomName = null; muted = false; resetMuteButton(); if (remoteAudioTrack) { detachRemoteAudioTrack(remoteAudioTrack, "connect_error"); } else { clearRemoteAudio(); } cleanupWave(); resetMetrics(); setConnectionState(CONNECTION_STATES.IDLE); throw error; } } function cleanupWave() { if (animationId) { window.cancelAnimationFrame(animationId); animationId = null; } if (audioContext) { const closeResult = audioContext.close(); if (closeResult && typeof closeResult.catch === "function") { closeResult.catch(() => {}); } audioContext = null; } analyser = null; clearWave(); } async function disconnectRoom() { if (!room || connectionState !== CONNECTION_STATES.CONNECTED) return; const disconnectingRoom = room; const disconnectSeq = ++activeConnectionSeq; setConnectionState(CONNECTION_STATES.DISCONNECTING); setStatus("Disconnecting...", "connecting"); try { if (localTrack) { try { await disconnectingRoom.localParticipant.unpublishTrack(localTrack); } catch (error) { console.warn("Failed to unpublish local track during disconnect:", error); } localTrack.stop(); localTrack = null; } if (remoteAudioTrack) { detachRemoteAudioTrack(remoteAudioTrack, "manual_disconnect"); } else { clearRemoteAudio(); } await disconnectingRoom.disconnect(); } finally { if (room === disconnectingRoom) { room = null; } if (disconnectSeq === activeConnectionSeq) { currentSessionId = null; currentRoomName = null; muted = false; resetMuteButton(); cleanupWave(); resetMetrics(); setConnectionState(CONNECTION_STATES.IDLE); setStatus("Disconnected", ""); } } } function resetMetrics() { activeLiveSpeechId = null; liveTurnValues = createEmptyLiveTurnValues(); averages = { eouDelay: [], llmTtft: [], llmToTtsHandoff: [], voiceGeneration: [], totalLatency: [], }; clearAllLiveMetrics(); setHandoffCardVisible(false); updateLiveMetricAverages(); } function handleLiveTurnBoundary(metricsData) { if (metricsData.stage !== "eou") return; const speechId = metricsData.speech_id; if (!speechId) { clearAllLiveMetrics(); setHandoffCardVisible(false); activeLiveSpeechId = null; liveTurnValues = createEmptyLiveTurnValues(); return; } if (speechId === activeLiveSpeechId) return; activeLiveSpeechId = speechId; liveTurnValues = createEmptyLiveTurnValues(); setHandoffCardVisible(false); setAllLiveMetricsLoading(); } async function toggleMute() { if (!room) return; muted = !muted; await room.localParticipant.setMicrophoneEnabled(!muted); if (muted) { muteBtn.innerHTML = ` Unmute`; setStatus("Mic muted", "connected"); } else { muteBtn.innerHTML = ` Mute`; setStatus("Mic streaming", "connected"); } } resetMuteButton(); setConnectionState(CONNECTION_STATES.IDLE); setHandoffCardVisible(false); connectBtn.addEventListener("click", () => { connectToRoom().catch((error) => { setStatus(`Failed: ${error.message}`, ""); setConnectionState(CONNECTION_STATES.IDLE); }); }); disconnectBtn.addEventListener("click", () => { disconnectRoom().catch((error) => { setStatus(`Error: ${error.message}`, ""); }); }); muteBtn.addEventListener("click", () => { toggleMute().catch((error) => { setStatus(`Error: ${error.message}`, ""); }); }); function getLatencyClass(value, warningThreshold, criticalThreshold) { if (value >= criticalThreshold) return "critical"; if (value >= warningThreshold) return "warning"; return ""; } function getTpsClass(value, warningThreshold, criticalThreshold) { if (value <= criticalThreshold) return "critical"; if (value <= warningThreshold) return "warning"; return ""; } function getLiveMetricValueBaseClass(metricId) { return metricId === "total" ? "metric-card-value pipeline-total-value" : "metric-card-value"; } function setLiveMetric(metricId, value, maxValue, warningThreshold, criticalThreshold, options) { const bar = document.getElementById(`live-${metricId}-bar`); const label = document.getElementById(`live-${metricId}`); if (value === undefined || value === null || Number.isNaN(value)) return; const percent = Math.min((value / maxValue) * 100, 100); const invertedThresholds = options && options.inverted; const cls = invertedThresholds ? getTpsClass(value, warningThreshold, criticalThreshold) : getLatencyClass(value, warningThreshold, criticalThreshold); const suffix = (options && options.suffix) || "s"; const decimals = (options && options.decimals !== undefined) ? options.decimals : 2; label.textContent = decimals > 0 ? `${value.toFixed(decimals)}${suffix}` : `${Math.round(value)} ${suffix}`; label.className = getLiveMetricValueBaseClass(metricId) + (cls ? ` ${cls}` : ""); bar.style.width = `${percent}%`; bar.className = "metric-card-fill" + (cls ? ` ${cls}` : ""); } function setLiveMetricAverage(metricId, value) { const averageLabel = document.getElementById(`live-${metricId}-avg`); if (!averageLabel) return; averageLabel.textContent = value !== null ? `avg ${value.toFixed(2)}s` : ""; } function setLiveMetricLoading(metricId) { const label = document.getElementById(`live-${metricId}`); const bar = document.getElementById(`live-${metricId}-bar`); label.textContent = "coming..."; label.className = `${getLiveMetricValueBaseClass(metricId)} loading`; bar.style.width = "0%"; bar.className = "metric-card-fill"; } function clearLiveMetric(metricId) { const label = document.getElementById(`live-${metricId}`); const bar = document.getElementById(`live-${metricId}-bar`); const averageLabel = document.getElementById(`live-${metricId}-avg`); label.textContent = "--"; label.className = getLiveMetricValueBaseClass(metricId); bar.style.width = "0%"; bar.className = "metric-card-fill"; if (averageLabel) averageLabel.textContent = ""; } function clearAllLiveMetrics() { LIVE_METRIC_IDS.forEach((id) => clearLiveMetric(id)); clearLiveMetric("handoff"); } function setAllLiveMetricsLoading() { LIVE_METRIC_IDS.forEach((id) => setLiveMetricLoading(id)); updateLiveMetricAverages(); } function setHandoffCardVisible(visible) { if (handoffCardEl) { handoffCardEl.hidden = !visible; } if (pipelineStageRowEl) { pipelineStageRowEl.classList.toggle("handoff-visible", visible); } if (voiceGenerationStepEl) { voiceGenerationStepEl.textContent = visible ? "4" : "3"; } } function isFiniteNumber(value) { return typeof value === "number" && Number.isFinite(value); } function avg(values) { if (!values.length) return null; return values.reduce((sum, value) => sum + value, 0) / values.length; } function updateLiveMetricAverages() { setLiveMetricAverage("eou", avg(averages.eouDelay)); setLiveMetricAverage("llm-ttft", avg(averages.llmTtft)); setLiveMetricAverage("handoff", avg(averages.llmToTtsHandoff)); setLiveMetricAverage("voice-generation", avg(averages.voiceGeneration)); setLiveMetricAverage("total", avg(averages.totalLatency)); } function shouldApplyUserLatency(turn, nextValue, currentValue) { if (!isFiniteNumber(nextValue)) return false; const stage = turn.stage; const isUserStage = stage === "eou" || stage === "stt" || turn.role === "user"; if (isUserStage) return true; if (nextValue > 0) return true; if (isFiniteNumber(currentValue)) { return false; } // Instruction-only startup turn has no EOU boundary and can legitimately be zero. return activeLiveSpeechId === null; } function updateLiveMetrics(turn) { const metrics = turn.metrics || {}; const latencies = turn.latencies || {}; const eouDelay = latencies.eou_delay ?? latencies.vad_detection_delay; if (shouldApplyUserLatency(turn, eouDelay, liveTurnValues.eouDelay)) { liveTurnValues.eouDelay = eouDelay; setLiveMetric("eou", eouDelay, 4.0, 0.8, 1.2); } const llmTtft = metrics.llm?.ttft; if (isFiniteNumber(llmTtft)) { liveTurnValues.llmTtft = llmTtft; setLiveMetric("llm-ttft", llmTtft, 4.0, 0.5, 1.0); } const llmToTtsHandoff = latencies.llm_to_tts_handoff_latency; if (isFiniteNumber(llmToTtsHandoff) && llmToTtsHandoff > 0) { liveTurnValues.llmToTtsHandoff = llmToTtsHandoff; setHandoffCardVisible(true); setLiveMetric("handoff", llmToTtsHandoff, 4.0, 0.35, 0.8); } else if (llmToTtsHandoff === 0) { liveTurnValues.llmToTtsHandoff = 0; setHandoffCardVisible(false); clearLiveMetric("handoff"); } const ttsTtfb = metrics.tts?.ttfb; if (isFiniteNumber(ttsTtfb)) { liveTurnValues.ttsTtfb = ttsTtfb; setLiveMetric("voice-generation", ttsTtfb, 4.0, 0.6, 1.2); } const totalLatency = latencies.total_latency; if (isFiniteNumber(totalLatency)) { liveTurnValues.totalLatency = totalLatency; } const hasAllStages = ( isFiniteNumber(liveTurnValues.eouDelay) && isFiniteNumber(liveTurnValues.llmTtft) && isFiniteNumber(liveTurnValues.ttsTtfb) ); if (hasAllStages) { const handoff = isFiniteNumber(liveTurnValues.llmToTtsHandoff) ? Math.max(liveTurnValues.llmToTtsHandoff, 0) : 0; const computedTotal = liveTurnValues.eouDelay + liveTurnValues.llmTtft + handoff + liveTurnValues.ttsTtfb; const totalValue = isFiniteNumber(liveTurnValues.totalLatency) ? liveTurnValues.totalLatency : computedTotal; setLiveMetric("total", totalValue, 8.0, 1.5, 3.0); } } function renderTurn(turn) { const latencies = turn.latencies || {}; const metrics = turn.metrics || {}; const eouDelay = latencies.eou_delay ?? latencies.vad_detection_delay; if (isFiniteNumber(eouDelay) && eouDelay > 0) averages.eouDelay.push(eouDelay); const llmTtft = metrics.llm?.ttft; if (isFiniteNumber(llmTtft) && llmTtft > 0) averages.llmTtft.push(llmTtft); const llmToTtsHandoff = latencies.llm_to_tts_handoff_latency; if (isFiniteNumber(llmToTtsHandoff) && llmToTtsHandoff > 0) { averages.llmToTtsHandoff.push(llmToTtsHandoff); } const ttsTtfb = metrics.tts?.ttfb; if (isFiniteNumber(ttsTtfb) && ttsTtfb > 0) { averages.voiceGeneration.push(ttsTtfb); } const totalLatency = latencies.total_latency; if (isFiniteNumber(totalLatency) && totalLatency > 0) { averages.totalLatency.push(totalLatency); } updateLiveMetricAverages(); }