/** * Minimal Conversation - embedded app entry point. * * Mounted by the dispatcher when the URL has `?embedded=1`, i.e. we're * inside the host's iframe. The host has already: * - signed the user in (HF OAuth), * - let them pick a robot, * - established the WebRTC session, * - run the wake-up trajectory. * * `connectToHost()` resolves with a live SDK handle past all that. * From there this module owns: * - kicking off the OpenAI Realtime session (ephemeral key minted * from the visitor's HF token, see `ephemeral-key.ts`), * - routing audio (robot mic ↔ OpenAI ↔ robot speakers), * - the conversation FSM (listening / user-speaking / processing / * ai-speaking) and its visual orb, * - tool calls (head poses + body-language move catalog), * - the in-app side controls (mute, end conversation), * - the settings modal (only the model `instructions` is user- * editable; model + voice are locked to the server defaults). * * Sign-in, picking, top-bar avatar, end-session button are NOT this * app's concern - they live in `@pollen-robotics/reachy-mini-sdk/host`. */ import "./style.css"; import { connectToHost, type ConnectedHandle } from "@pollen-robotics/reachy-mini-sdk/host/embed"; import type { ReachyMiniInstance } from "@pollen-robotics/reachy-mini-sdk"; import { OpenaiRealtimeClient, type RealtimeTool } from "./openai-realtime"; import { EphemeralKeyError, invalidateEphemeralKey, mintEphemeralKey, } from "./ephemeral-key"; import { HeadWobbler } from "./head-wobbler"; import { AntennasOscillator } from "./antennas"; import { applyAudioStartupConfig } from "./audio-startup-config"; import { MovePlayer, MOVE_CATALOG, MOVE_IDS, type MoveId, } from "./move-player"; // ─── Settings & defaults ──────────────────────────────────────────────── // Locked server-side defaults. Model + voice used to be user-editable // but the ephemeral-key flow (see `ephemeral-key.ts`) provisions a // session tied to a specific model: the Pollen mint endpoint // (`/api/openai/ephemeral` on `pollen-robotics-reachy-mini`) currently // mints for `gpt-realtime-2`. Sending a different model name in the // SDP handshake's session config makes OpenAI reject the call with // `400 invalid_model "Model X does not match the realtime token // model."`, so we MUST pass back the exact same model string the mint // endpoint defaulted to. If the upstream default ever changes, bump // this constant in sync; until then, keep these two values aligned // with `reachy_mini_mobile_app/.../settings.ts:DEFAULT_MODEL`. const DEFAULT_MODEL = "gpt-realtime-2"; const DEFAULT_VOICE = "cedar"; const DEFAULT_INSTRUCTIONS = "You are Reachy Mini, a small friendly robot companion. " + "Keep replies short, warm, and spoken. Avoid long monologues. " + "You control a small robot body. Two tools are available:\n" + " - `move_head`: point the head in a named direction (up, down, left, " + "right, tilt_left, tilt_right, center). Instant, use for subtle gestures " + "that accompany a sentence.\n" + " - `play_move`: trigger a short pre-recorded choreography (1-4s). The " + "catalog mixes `dance` entries (rhythmic, playful) and `emotion` entries " + "(reactive body language). Pick a dance when the moment calls for " + "theatricality (hi, joke, groove) and an emotion when reacting to " + "something the user just said (surprise, curiosity, praise, bad news).\n" + "Use tools sparingly, never more than once per reply."; // `instructions` is the only persisted setting now that the API key // comes from the HF-token ephemeral mint and model/voice are locked // server-side. The legacy `reachyMini.openai.{apiKey,model,voice}` // localStorage entries are intentionally left in place for any // returning visitor - they're simply ignored on read. const STORAGE_KEYS = { instructions: "reachyMini.openai.instructions", } as const; interface Settings { instructions: string; } function loadSettings(): Settings { return { instructions: localStorage.getItem(STORAGE_KEYS.instructions) ?? DEFAULT_INSTRUCTIONS, }; } function saveSettings(s: Settings): void { localStorage.setItem(STORAGE_KEYS.instructions, s.instructions); } // ─── Robot tools exposed to the OpenAI model ──────────────────────────── const HEAD_POSES = { center: { roll: 0, pitch: 0, yaw: 0 }, up: { roll: 0, pitch: -18, yaw: 0 }, down: { roll: 0, pitch: 18, yaw: 0 }, left: { roll: 0, pitch: 0, yaw: 25 }, right: { roll: 0, pitch: 0, yaw: -25 }, tilt_left: { roll: -15, pitch: 0, yaw: 0 }, tilt_right: { roll: 15, pitch: 0, yaw: 0 }, } as const; type HeadPoseName = keyof typeof HEAD_POSES; const ROBOT_TOOLS: RealtimeTool[] = [ { name: "move_head", description: "Point the robot's head in a named direction. Use this to accompany " + "your speech with a tiny, legible gesture.", parameters: { type: "object", properties: { direction: { type: "string", enum: Object.keys(HEAD_POSES), description: "Named head pose to assume.", }, }, required: ["direction"], }, }, { name: "play_move", description: "Trigger a short pre-recorded body-language move from the Reachy " + "dances + emotions library. Catalog (id | kind | when to pick it):\n" + MOVE_CATALOG.map( (m) => ` - ${m.id} | ${m.kind} | ${m.description}`, ).join("\n"), parameters: { type: "object", properties: { name: { type: "string", enum: [...MOVE_IDS], description: "Catalog id to play.", }, }, required: ["name"], }, }, ]; // ─── App state machine ────────────────────────────────────────────────── // // The host owns all pre-session state (sign-in / picker / connecting). // This app's FSM starts at `idle` once the host handed us a live SDK, // and only transitions between conversation-level states. type AppState = | "idle" | "starting" | "listening" | "user-speaking" | "processing" | "ai-speaking" | "error"; interface StateView { caption: string; disabled: boolean; } const STATE_VIEWS: Record = { idle: { caption: "Tap to start", disabled: false }, starting: { caption: "Starting", disabled: true }, listening: { caption: "", disabled: false }, "user-speaking": { caption: "", disabled: false }, processing: { caption: "", disabled: false }, "ai-speaking": { caption: "", disabled: false }, error: { caption: "Tap to retry", disabled: false }, }; const STATE_CLASS: Record = { idle: "state-authenticated", starting: "state-starting", listening: "state-listening", "user-speaking": "state-user-speaking", processing: "state-processing", "ai-speaking": "state-ai-speaking", error: "state-error", }; const LIVE_STATES: ReadonlySet = new Set([ "listening", "user-speaking", "processing", "ai-speaking", "starting", ]); /** Map our FSM to the embed protocol's coarse `AppPhase`. * * We deliberately collapse every intra-session state to `"live"` - * including `"starting"`. The host treats `"connecting"` as "the * embedded app is not interactive, reveal my ConnectingView overlay * on top", which made sense during the initial wake-up handshake * (owned by `connectToHost()` and surfaced from outside this FSM) * but is wrong for the post-boot transitions. Every time we re-enter * `"starting"` (user tapped the orb to begin a conversation, or the * silent reconnect kicked in) the app is already mounted and has its * own visual feedback - the `.state-starting .ind-spinner` CSS * showing a spinner inside the central circle. Reporting * `"connecting"` would re-paint the host's full-bleed overlay over * our spinner, which both flashes the user back to an * "establishing-session" screen they already cleared, and breaks * the silent-reconnect contract for ICE blips. */ function mapAppStateToHostPhase( state: AppState, ): "boot" | "connecting" | "live" | "leaving" | "error" { if (state === "error") return "error"; return "live"; } // ─── DOM refs ─────────────────────────────────────────────────────────── const $ = (selector: string): T => { const el = document.querySelector(selector); if (!el) throw new Error(`Missing element: ${selector}`); return el; }; const appRoot = $("#app"); const circleBtn = $("#main-circle"); const circleCaption = $("#circle-caption"); const toolToast = $("#tool-toast"); const toolToastText = toolToast.querySelector( ".tool-toast-text", )!; const micBtn = $("#mic-btn"); const stopBtn = $("#stop-btn"); const settingsBtn = $("#settings-btn"); const settingsModal = $("#settings-modal"); const inputInstructions = $("#openai-instructions"); const restartBtn = $("#restart-conversation"); const restartHint = $("#restart-hint"); const settingsForm = settingsModal.querySelector("form")!; // ─── Runtime state ────────────────────────────────────────────────────── let currentState: AppState = "idle"; let settings: Settings = loadSettings(); let robot: ReachyMiniInstance | null = null; let hostHandle: ConnectedHandle | null = null; let openai: OpenaiRealtimeClient | null = null; let openaiSink: HTMLAudioElement | null = null; let wobbler: HeadWobbler | null = null; let antennas: AntennasOscillator | null = null; let micLevel: MicLevelMonitor | null = null; let aiLevel: AiLevelMonitor | null = null; let movePlayer: MovePlayer | null = null; let movePlaying = false; let toolPoseRestoreTimer: number | null = null; let openaiReconnecting = false; let openaiReconnectAttempts = 0; let wakeLock: { release(): Promise } | null = null; let wakeLockUnavailable = false; let micMuted = false; // ─── UI rendering ─────────────────────────────────────────────────────── function setState(next: AppState): void { currentState = next; hostHandle?.setAppState({ phase: mapAppStateToHostPhase(next) }); const view = STATE_VIEWS[next]; circleBtn.disabled = view.disabled; circleBtn.className = `circle ${STATE_CLASS[next]}`; if (next !== "error") setCaption(view.caption); const live = LIVE_STATES.has(next); document .querySelector(".orb-wrap") ?.classList.toggle("live", live); micBtn.setAttribute("aria-hidden", live ? "false" : "true"); stopBtn.setAttribute("aria-hidden", live ? "false" : "true"); micBtn.tabIndex = live ? 0 : -1; stopBtn.tabIndex = live ? 0 : -1; updateRestartAvailability(); } function updateRestartAvailability(): void { const live = LIVE_STATES.has(currentState); restartBtn.disabled = !live; restartHint.hidden = live; } function setCaption(text: string, kind: "" | "error" | "muted" = ""): void { const trimmed = text.trim(); circleCaption.textContent = trimmed; circleCaption.className = `circle-caption${kind ? ` ${kind}` : ""}${trimmed ? "" : " empty"}`; } // ─── Settings modal ───────────────────────────────────────────────────── function openSettings(): void { inputInstructions.value = settings.instructions; updateRestartAvailability(); settingsModal.showModal(); } settingsBtn.addEventListener("click", () => openSettings()); settingsForm.addEventListener("submit", (event) => { const submitter = (event as SubmitEvent).submitter as HTMLButtonElement | null; if (submitter?.value !== "save") return; settings = { instructions: inputInstructions.value.trim() || DEFAULT_INSTRUCTIONS, }; saveSettings(settings); }); restartBtn.addEventListener("click", async () => { if (!LIVE_STATES.has(currentState)) return; settings = { instructions: inputInstructions.value.trim() || DEFAULT_INSTRUCTIONS, }; saveSettings(settings); settingsModal.close(); try { await teardownConversation(); await startConversation(); } catch (err) { onFatalError(err); } }); // ─── Click handler for the central circle ────────────────────────────── circleBtn.addEventListener("click", async () => { try { if (currentState === "idle") { await startConversation(); return; } if (currentState === "error") { circleCaption.removeAttribute("title"); setState("idle"); return; } } catch (err) { onFatalError(err); } }); // ─── Side controls (mic mute + stop) ──────────────────────────────────── micBtn.addEventListener("click", () => { if (!robot) return; micMuted = !micMuted; robot.setMicMuted(micMuted); micBtn.classList.toggle("muted", micMuted); micBtn.setAttribute("aria-label", micMuted ? "Unmute" : "Mute"); micBtn.title = micMuted ? "Unmute" : "Mute"; }); stopBtn.addEventListener("click", async () => { await teardownConversation(); micMuted = false; micBtn.classList.remove("muted"); setState("idle"); }); // ─── High-level flow steps ────────────────────────────────────────────── async function startConversation(): Promise { if (!robot) return; setState("starting"); const robotMicTrack = getRobotMicTrack(robot); if (!robotMicTrack) { onFatalError(new Error("Could not find the robot's microphone track")); return; } startMicLevelMonitor(robotMicTrack); startAntennas(); void acquireWakeLock(); openaiReconnectAttempts = 0; try { await connectOpenai(robotMicTrack); } catch (err) { onFatalError(err); return; } // Make sure the robot's outbound audio path is open so OpenAI's voice // reaches the speakers. robot.setMicMuted(false); } async function connectOpenai(robotMicTrack: MediaStreamTrack): Promise { // Mint an OpenAI Realtime ephemeral key from the visitor's HF token // (seeded into `sessionStorage.hf_token` by the host shell). The // returned `ek_…` value is used as the Bearer for the SDP handshake. // Failures bubble back to `startConversation()` as a fatal error // with a user-friendly caption. let ephemeralKey: string; try { ephemeralKey = await mintEphemeralKey(); } catch (err) { if (err instanceof EphemeralKeyError) { throw new Error(captionForEphemeralError(err)); } throw err; } const client = new OpenaiRealtimeClient({ apiKey: ephemeralKey, model: DEFAULT_MODEL, voice: DEFAULT_VOICE, instructions: settings.instructions, inputTrack: robotMicTrack, tools: ROBOT_TOOLS, }); client.on("outputTrack", ({ track }) => { routeOpenaiToRobot(track); startWobbler(track); startAiLevelMonitor(track); }); client.on("status", ({ status }) => { switch (status) { case "connected": if (currentState === "ai-speaking" && aiLevel) { aiLevel.waitForSilence(900, () => { if (currentState === "ai-speaking") { setState("listening"); antennas?.resume(); } }); } else { setState("listening"); antennas?.resume(); } openaiReconnectAttempts = 0; break; case "user-speaking": aiLevel?.cancelSilenceWait(); setState("user-speaking"); wobbler?.reset(); antennas?.freeze(); break; case "processing": setState("processing"); antennas?.resume(); break; case "ai-speaking": aiLevel?.cancelSilenceWait(); setState("ai-speaking"); antennas?.resume(); break; case "error": if (openaiReconnecting) return; void tryReconnectOpenai( robotMicTrack, new Error("OpenAI connection lost"), ); break; default: break; } }); client.on("toolCall", (call) => handleToolCall(call)); client.on("error", ({ error }) => console.error("[openai]", error)); openai = client; await client.connect(); } async function tryReconnectOpenai( robotMicTrack: MediaStreamTrack, cause: Error, ): Promise { if (openaiReconnecting) return; if (openaiReconnectAttempts >= 1) { onFatalError(cause); return; } openaiReconnecting = true; openaiReconnectAttempts += 1; console.warn("[openai] connection lost, attempting silent reconnect…", cause); setState("starting"); setCaption("Reconnecting", "muted"); stopWobbler(); antennas?.freeze(); try { await openai?.close(); } catch (err) { console.warn("[openai] close during reconnect failed:", err); } openai = null; // Drop the cached ephemeral key before the reconnect attempt. The // mint-cache is sized for back-to-back handshakes in normal // conditions, but a real disconnect implies the key may have been // server-revoked (clock skew, master key rotation, rate-limit). The // extra ~200 ms round-trip to re-mint is cheap compared to the // alternative: an OpenAI 401 that fails the silent reconnect and // forces the user back to the central circle. invalidateEphemeralKey(); await new Promise((resolve) => setTimeout(resolve, 500)); try { await connectOpenai(robotMicTrack); } catch (err) { openaiReconnecting = false; onFatalError(err instanceof Error ? err : new Error(String(err))); return; } openaiReconnecting = false; } /** * User-facing caption for an `EphemeralKeyError`. Kept terse because * the central circle's caption only has room for a single short line; * the full error message goes to the `circleCaption.title` tooltip * via `onFatalError()`. */ function captionForEphemeralError(err: EphemeralKeyError): string { if (err.reason === "hf_token_missing") { return "Sign in to Hugging Face to start a conversation"; } return `Could not reach the OpenAI key service (HTTP ${err.status ?? "?"})`; } function getRobotMicTrack( robotInstance: ReachyMiniInstance, ): MediaStreamTrack | null { const pc = robotInstance._pc; if (!pc) return null; for (const receiver of pc.getReceivers()) { if (receiver.track && receiver.track.kind === "audio") { return receiver.track; } } return null; } function routeOpenaiToRobot(track: MediaStreamTrack): void { if (!robot) return; const pc = robot._pc; if (!pc) return; const transceivers = pc.getTransceivers(); const audioTransceiver = transceivers.find( (t) => t.receiver.track?.kind === "audio" || t.sender.track?.kind === "audio", ); const audioSender = audioTransceiver?.sender ?? null; if (audioSender) { if ( audioTransceiver && audioTransceiver.direction !== "sendrecv" && audioTransceiver.direction !== "sendonly" ) { try { audioTransceiver.direction = "sendrecv"; } catch (err) { console.warn("[main] could not bump transceiver direction:", err); } } audioSender.replaceTrack(track).catch((err) => { console.error("[main] replaceTrack failed", err); }); } else { console.warn( "[main] no audio transceiver on the robot peer — bidirectional audio unavailable", ); } if (!openaiSink) { openaiSink = document.createElement("audio"); openaiSink.autoplay = true; openaiSink.muted = true; document.body.appendChild(openaiSink); } openaiSink.srcObject = new MediaStream([track]); } // ─── Head motion agent ────────────────────────────────────────────────── function startWobbler(assistantTrack: MediaStreamTrack): void { if (!robot) return; wobbler?.stop(); wobbler = new HeadWobbler({ track: assistantTrack, onOffsets: ({ roll, pitch, yaw }) => { if (toolPoseRestoreTimer !== null) return; if (movePlaying) return; const ok = robot?.setHeadRpyDeg(roll, pitch, yaw) ?? false; recordSend(ok, "wobbler"); }, }); wobbler.start(); } function stopWobbler(): void { wobbler?.stop(); wobbler = null; robot?.setHeadRpyDeg(0, 0, 0); } // ─── Antennas oscillator ──────────────────────────────────────────────── function startAntennas(): void { if (!robot) return; antennas?.stop(); antennas = new AntennasOscillator({ onAntennas: (right, left) => { if (movePlaying) return; const ok = robot?.setAntennasDeg(right, left) ?? false; recordSend(ok, "antennas"); }, }); antennas.start(); } function stopAntennas(): void { antennas?.stop(); antennas = null; robot?.setAntennasDeg(0, 0); } // ─── Tool-call handler ───────────────────────────────────────────────── let toolToastTimer: number | null = null; function showToolToast(text: string, durationMs = 2800): void { if (toolToastTimer !== null) { clearTimeout(toolToastTimer); toolToastTimer = null; } toolToastText.textContent = text; toolToast.classList.add("visible"); toolToast.setAttribute("aria-hidden", "false"); toolToastTimer = window.setTimeout(() => { toolToast.classList.remove("visible"); toolToast.setAttribute("aria-hidden", "true"); toolToastTimer = null; }, durationMs); } function describeToolCall(name: string, args: Record): string { switch (name) { case "move_head": { const direction = String(args.direction ?? "").toLowerCase(); const labels: Record = { up: "Looking up", down: "Looking down", left: "Looking left", right: "Looking right", center: "Looking forward", neutral: "Looking forward", }; return labels[direction] ?? `Moving head: ${direction || "?"}`; } case "play_move": { const move = String(args.name ?? ""); return move ? `Playing ${move}` : "Playing move"; } default: return `Tool: ${name}`; } } async function handleToolCall({ callId, name, arguments: args, }: { callId: string; name: string; arguments: Record; }): Promise { if (!robot || !openai) return; showToolToast(describeToolCall(name, args)); let result: { ok: boolean; message: string }; switch (name) { case "move_head": { const direction = String(args.direction ?? ""); if (direction in HEAD_POSES) { const pose = HEAD_POSES[direction as HeadPoseName]; applyToolHeadPose(pose); result = { ok: true, message: `head moved to ${direction}` }; } else { result = { ok: false, message: `unknown direction '${direction}'. Valid: ${Object.keys(HEAD_POSES).join(", ")}`, }; } break; } case "play_move": { const moveName = String(args.name ?? ""); if ((MOVE_IDS as readonly string[]).includes(moveName)) { try { await playMove(moveName as MoveId); result = { ok: true, message: `played move '${moveName}'` }; } catch (err) { result = { ok: false, message: `failed to play '${moveName}': ${err instanceof Error ? err.message : String(err)}`, }; } } else { result = { ok: false, message: `unknown move '${moveName}'. Valid: ${MOVE_IDS.join(", ")}`, }; } break; } default: result = { ok: false, message: `unknown tool '${name}'` }; } openai.sendToolResponse(callId, result); } async function playMove(name: MoveId): Promise { if (!robot) return; movePlayer ??= new MovePlayer(robot); movePlaying = true; try { await movePlayer.play(name); } finally { movePlaying = false; robot.setAntennasDeg(0, 0); } } function applyToolHeadPose(pose: { roll: number; pitch: number; yaw: number; }): void { if (!robot) return; robot.setHeadRpyDeg(pose.roll, pose.pitch, pose.yaw); if (toolPoseRestoreTimer !== null) clearTimeout(toolPoseRestoreTimer); toolPoseRestoreTimer = window.setTimeout(() => { toolPoseRestoreTimer = null; }, 1200); } // ─── Mic-level monitor (circle audio-reactivity) ──────────────────────── class MicLevelMonitor { private ctx: AudioContext | null = null; private analyser: AnalyserNode | null = null; private source: MediaStreamAudioSourceNode | null = null; private raf = 0; private timeBuf: Float32Array | null = null; private freqBuf: Uint8Array | null = null; private level = 0; private bands = [0, 0, 0, 0, 0]; private static readonly BAND_EDGES = [4, 8, 16, 32, 64, 128]; private static readonly LOG1P_10 = Math.log1p(10); private static compress(v: number): number { return Math.log1p(v * 10) / MicLevelMonitor.LOG1P_10; } start(track: MediaStreamTrack): void { this.stop(); const ctx = new AudioContext(); const src = ctx.createMediaStreamSource(new MediaStream([track])); const analyser = ctx.createAnalyser(); analyser.fftSize = 1024; analyser.smoothingTimeConstant = 0.75; src.connect(analyser); this.ctx = ctx; this.source = src; this.analyser = analyser; this.timeBuf = new Float32Array(new ArrayBuffer(analyser.fftSize * 4)); this.freqBuf = new Uint8Array( new ArrayBuffer(analyser.frequencyBinCount), ); const rootStyle = document.documentElement.style; const tick = (): void => { const an = this.analyser; const tbuf = this.timeBuf; const fbuf = this.freqBuf; if (!an || !tbuf || !fbuf) return; an.getFloatTimeDomainData(tbuf); let sum = 0; for (let i = 0; i < tbuf.length; i++) sum += tbuf[i]! * tbuf[i]!; const rms = Math.sqrt(sum / tbuf.length); const boosted = Math.min(1, Math.pow(rms * 6, 0.7)); const levelAttack = boosted > this.level ? 0.55 : 0.12; this.level += (boosted - this.level) * levelAttack; rootStyle.setProperty("--audio-level", this.level.toFixed(3)); an.getByteFrequencyData(fbuf); const edges = MicLevelMonitor.BAND_EDGES; for (let b = 0; b < 5; b++) { const lo = edges[b]!; const hi = edges[b + 1]!; let bandSum = 0; for (let j = lo; j < hi; j++) bandSum += fbuf[j]!; const raw = MicLevelMonitor.compress(bandSum / (hi - lo) / 255); const bandAttack = raw > this.bands[b]! ? 0.35 : 0.12; this.bands[b]! += (raw - this.bands[b]!) * bandAttack; rootStyle.setProperty( `--bar${b}`, Math.min(1, this.bands[b]!).toFixed(3), ); } this.raf = requestAnimationFrame(tick); }; this.raf = requestAnimationFrame(tick); } stop(): void { cancelAnimationFrame(this.raf); this.raf = 0; try { this.source?.disconnect(); this.analyser?.disconnect(); this.ctx?.close(); } catch { /* swallow */ } this.ctx = null; this.source = null; this.analyser = null; this.timeBuf = null; this.freqBuf = null; this.level = 0; this.bands = [0, 0, 0, 0, 0]; const rootStyle = document.documentElement.style; rootStyle.setProperty("--audio-level", "0"); for (let b = 0; b < 5; b++) rootStyle.setProperty(`--bar${b}`, "0"); } resumeAudio(): void { const ctx = this.ctx; if (!ctx || ctx.state !== "suspended") return; ctx.resume().catch((err) => { console.warn("[mic-level] audioCtx resume failed:", err); }); } } function startMicLevelMonitor(track: MediaStreamTrack): void { micLevel ??= new MicLevelMonitor(); micLevel.start(track); } function stopMicLevelMonitor(): void { micLevel?.stop(); } class AiLevelMonitor { private ctx: AudioContext | null = null; private analyser: AnalyserNode | null = null; private source: MediaStreamAudioSourceNode | null = null; private raf = 0; private timeBuf: Float32Array | null = null; private level = 0; private lastActiveTs = 0; private silenceWait: { quietMs: number; cb: () => void; maxWaitTimer: number | null; } | null = null; private static readonly SILENCE_THRESHOLD = 0.006; start(track: MediaStreamTrack): void { this.stop(); const ctx = new AudioContext(); const src = ctx.createMediaStreamSource(new MediaStream([track])); const analyser = ctx.createAnalyser(); analyser.fftSize = 1024; analyser.smoothingTimeConstant = 0.75; src.connect(analyser); this.ctx = ctx; this.source = src; this.analyser = analyser; this.timeBuf = new Float32Array(new ArrayBuffer(analyser.fftSize * 4)); this.lastActiveTs = performance.now(); const rootStyle = document.documentElement.style; const tick = (): void => { const an = this.analyser; const buf = this.timeBuf; if (!an || !buf) return; an.getFloatTimeDomainData(buf); let sum = 0; for (let i = 0; i < buf.length; i++) sum += buf[i]! * buf[i]!; const rms = Math.sqrt(sum / buf.length); const boosted = Math.min(1, Math.pow(rms * 6, 0.7)); const levelAttack = boosted > this.level ? 0.55 : 0.12; this.level += (boosted - this.level) * levelAttack; rootStyle.setProperty("--ai-audio-level", this.level.toFixed(3)); const now = performance.now(); if (rms > AiLevelMonitor.SILENCE_THRESHOLD) { this.lastActiveTs = now; } else if (this.silenceWait) { const quietFor = now - this.lastActiveTs; if (quietFor >= this.silenceWait.quietMs) { const { cb, maxWaitTimer } = this.silenceWait; this.silenceWait = null; if (maxWaitTimer !== null) clearTimeout(maxWaitTimer); try { cb(); } catch (err) { console.warn("[ai-level] silence callback threw:", err); } } } this.raf = requestAnimationFrame(tick); }; this.raf = requestAnimationFrame(tick); } stop(): void { cancelAnimationFrame(this.raf); this.raf = 0; this.cancelSilenceWait(); try { this.source?.disconnect(); this.analyser?.disconnect(); this.ctx?.close(); } catch { /* swallow */ } this.ctx = null; this.source = null; this.analyser = null; this.timeBuf = null; this.level = 0; document.documentElement.style.setProperty("--ai-audio-level", "0"); } resumeAudio(): void { const ctx = this.ctx; if (!ctx || ctx.state !== "suspended") return; ctx.resume().catch((err) => { console.warn("[ai-level] audioCtx resume failed:", err); }); } waitForSilence(quietMs: number, cb: () => void, maxWaitMs = 8000): void { this.cancelSilenceWait(); const maxWaitTimer = window.setTimeout(() => { if (this.silenceWait?.cb === cb) { this.silenceWait = null; try { cb(); } catch (err) { console.warn("[ai-level] max-wait callback threw:", err); } } }, maxWaitMs); this.silenceWait = { quietMs, cb, maxWaitTimer }; } cancelSilenceWait(): void { if (!this.silenceWait) return; if (this.silenceWait.maxWaitTimer !== null) { clearTimeout(this.silenceWait.maxWaitTimer); } this.silenceWait = null; } } function startAiLevelMonitor(track: MediaStreamTrack): void { aiLevel ??= new AiLevelMonitor(); aiLevel.start(track); } function stopAiLevelMonitor(): void { aiLevel?.stop(); } // ─── Background-tab resilience ────────────────────────────────────────── async function acquireWakeLock(): Promise { if (wakeLockUnavailable) return; const anyNav = navigator as Navigator & { wakeLock?: { request(type: "screen"): Promise<{ release(): Promise }>; }; }; if (!anyNav.wakeLock) { wakeLockUnavailable = true; return; } if (wakeLock) return; try { wakeLock = await anyNav.wakeLock.request("screen"); } catch (err) { const name = (err as { name?: string } | null)?.name; if (name === "NotAllowedError" || name === "SecurityError") { wakeLockUnavailable = true; } else { console.warn("[main] wakeLock.request failed:", err); } wakeLock = null; } } async function releaseWakeLock(): Promise { try { await wakeLock?.release(); } catch { /* swallow */ } wakeLock = null; } function resumeAudioContexts(): void { wobbler?.resumeAudio(); micLevel?.resumeAudio(); aiLevel?.resumeAudio(); } document.addEventListener("visibilitychange", () => { if (document.hidden) return; if (LIVE_STATES.has(currentState)) { void acquireWakeLock(); resumeAudioContexts(); void probeRobotLink(); } }); // ─── Robot data-channel health ───────────────────────────────────────── let consecutiveSendFailures = 0; function recordSend(ok: boolean, where: string): void { if (ok) { consecutiveSendFailures = 0; return; } consecutiveSendFailures += 1; if (consecutiveSendFailures === 1 || consecutiveSendFailures % 20 === 0) { console.warn( `[main] robot send failed (${where}), ${consecutiveSendFailures} consecutive failures`, ); } if (consecutiveSendFailures >= 40) { onFatalError( new Error( "Lost the robot data channel (no commands acknowledged). Tap the circle to reconnect.", ), ); } } async function probeRobotLink(): Promise { if (!robot) return; const ok = robot.setAntennasDeg(0, 0); if (!ok) { onFatalError( new Error( "Lost the robot data channel while the tab was hidden. Tap the circle to reconnect.", ), ); } } async function teardownConversation(): Promise { if (toolPoseRestoreTimer !== null) { clearTimeout(toolPoseRestoreTimer); toolPoseRestoreTimer = null; } movePlayer?.stop(); movePlaying = false; openaiReconnecting = false; openaiReconnectAttempts = 0; try { await openai?.close(); } catch { /* swallow */ } openai = null; stopWobbler(); stopAntennas(); stopMicLevelMonitor(); stopAiLevelMonitor(); void releaseWakeLock(); if (openaiSink) { openaiSink.srcObject = null; openaiSink.remove(); openaiSink = null; } } async function onFatalError(err: unknown): Promise { const message = err instanceof Error ? err.message : String(err); console.error("[main] error:", err); setState("error"); circleCaption.title = message; await teardownConversation(); } // ─── Boot ─────────────────────────────────────────────────────────────── async function boot(): Promise { hostHandle = await connectToHost(); robot = hostHandle.reachy; // Tune the XVF3800 audio board for conversation. Mirrors the Python // `apply_audio_startup_config()` call right after `start_recording()` // / `start_playing()` in `reachy_mini_conversation_app/console.py`: // by the time `connectToHost()` resolves the host's bridge has // already negotiated WebRTC audio in both directions and the // daemon's audio pipeline is hot, so the batched parameter write // lands on a settled board. // // Fire-and-forget: a failure (older SDK without `applyAudioConfig`, // missing audio board on a Lite running off-robot, DataChannel // burp) is non-fatal - the helper logs and we keep going with the // daemon's default tuning. The `void` makes the lack of await // explicit so we don't block the UI reveal on a multi-parameter // verify roundtrip (~100 ms × N). void applyAudioStartupConfig(robot); // Reveal the UI now that the SDK is ready. appRoot.classList.remove("hidden"); document.body.classList.remove("booting"); hostHandle.onLeave(async () => { await teardownConversation(); micMuted = false; micBtn.classList.remove("muted"); }); setState("idle"); } void boot().catch((err) => { console.error("[minimal-conversation/embed] bootstrap failed", err); try { window.parent.postMessage( { source: "reachy-mini", type: "embed:error", version: 1, message: err instanceof Error ? err.message : String(err), fatal: true, }, window.location.origin, ); } catch { /* swallow */ } });