tfrere's picture
tfrere HF Staff
fix(embed): stop re-raising the host ConnectingView on orb tap
2c51c9e
/**
* Minimal Conversation - embedded app entry point.
*
* Mounted by the dispatcher when the URL has `?embedded=1`, i.e. we're
* inside the host's iframe. The host has already:
* - signed the user in (HF OAuth),
* - let them pick a robot,
* - established the WebRTC session,
* - run the wake-up trajectory.
*
* `connectToHost()` resolves with a live SDK handle past all that.
* From there this module owns:
* - kicking off the OpenAI Realtime session (ephemeral key minted
* from the visitor's HF token, see `ephemeral-key.ts`),
* - routing audio (robot mic ↔ OpenAI ↔ robot speakers),
* - the conversation FSM (listening / user-speaking / processing /
* ai-speaking) and its visual orb,
* - tool calls (head poses + body-language move catalog),
* - the in-app side controls (mute, end conversation),
* - the settings modal (only the model `instructions` is user-
* editable; model + voice are locked to the server defaults).
*
* Sign-in, picking, top-bar avatar, end-session button are NOT this
* app's concern - they live in `@pollen-robotics/reachy-mini-sdk/host`.
*/
import "./style.css";
import { connectToHost, type ConnectedHandle } from "@pollen-robotics/reachy-mini-sdk/host/embed";
import type { ReachyMiniInstance } from "@pollen-robotics/reachy-mini-sdk";
import { OpenaiRealtimeClient, type RealtimeTool } from "./openai-realtime";
import {
EphemeralKeyError,
invalidateEphemeralKey,
mintEphemeralKey,
} from "./ephemeral-key";
import { HeadWobbler } from "./head-wobbler";
import { AntennasOscillator } from "./antennas";
import { applyAudioStartupConfig } from "./audio-startup-config";
import {
MovePlayer,
MOVE_CATALOG,
MOVE_IDS,
type MoveId,
} from "./move-player";
// ─── Settings & defaults ────────────────────────────────────────────────
// Locked server-side defaults. Model + voice used to be user-editable
// but the ephemeral-key flow (see `ephemeral-key.ts`) provisions a
// session tied to a specific model: the Pollen mint endpoint
// (`/api/openai/ephemeral` on `pollen-robotics-reachy-mini`) currently
// mints for `gpt-realtime-2`. Sending a different model name in the
// SDP handshake's session config makes OpenAI reject the call with
// `400 invalid_model "Model X does not match the realtime token
// model."`, so we MUST pass back the exact same model string the mint
// endpoint defaulted to. If the upstream default ever changes, bump
// this constant in sync; until then, keep these two values aligned
// with `reachy_mini_mobile_app/.../settings.ts:DEFAULT_MODEL`.
const DEFAULT_MODEL = "gpt-realtime-2";
const DEFAULT_VOICE = "cedar";
const DEFAULT_INSTRUCTIONS =
"You are Reachy Mini, a small friendly robot companion. " +
"Keep replies short, warm, and spoken. Avoid long monologues. " +
"You control a small robot body. Two tools are available:\n" +
" - `move_head`: point the head in a named direction (up, down, left, " +
"right, tilt_left, tilt_right, center). Instant, use for subtle gestures " +
"that accompany a sentence.\n" +
" - `play_move`: trigger a short pre-recorded choreography (1-4s). The " +
"catalog mixes `dance` entries (rhythmic, playful) and `emotion` entries " +
"(reactive body language). Pick a dance when the moment calls for " +
"theatricality (hi, joke, groove) and an emotion when reacting to " +
"something the user just said (surprise, curiosity, praise, bad news).\n" +
"Use tools sparingly, never more than once per reply.";
// `instructions` is the only persisted setting now that the API key
// comes from the HF-token ephemeral mint and model/voice are locked
// server-side. The legacy `reachyMini.openai.{apiKey,model,voice}`
// localStorage entries are intentionally left in place for any
// returning visitor - they're simply ignored on read.
const STORAGE_KEYS = {
instructions: "reachyMini.openai.instructions",
} as const;
interface Settings {
instructions: string;
}
function loadSettings(): Settings {
return {
instructions:
localStorage.getItem(STORAGE_KEYS.instructions) ?? DEFAULT_INSTRUCTIONS,
};
}
function saveSettings(s: Settings): void {
localStorage.setItem(STORAGE_KEYS.instructions, s.instructions);
}
// ─── Robot tools exposed to the OpenAI model ────────────────────────────
const HEAD_POSES = {
center: { roll: 0, pitch: 0, yaw: 0 },
up: { roll: 0, pitch: -18, yaw: 0 },
down: { roll: 0, pitch: 18, yaw: 0 },
left: { roll: 0, pitch: 0, yaw: 25 },
right: { roll: 0, pitch: 0, yaw: -25 },
tilt_left: { roll: -15, pitch: 0, yaw: 0 },
tilt_right: { roll: 15, pitch: 0, yaw: 0 },
} as const;
type HeadPoseName = keyof typeof HEAD_POSES;
const ROBOT_TOOLS: RealtimeTool[] = [
{
name: "move_head",
description:
"Point the robot's head in a named direction. Use this to accompany " +
"your speech with a tiny, legible gesture.",
parameters: {
type: "object",
properties: {
direction: {
type: "string",
enum: Object.keys(HEAD_POSES),
description: "Named head pose to assume.",
},
},
required: ["direction"],
},
},
{
name: "play_move",
description:
"Trigger a short pre-recorded body-language move from the Reachy " +
"dances + emotions library. Catalog (id | kind | when to pick it):\n" +
MOVE_CATALOG.map(
(m) => ` - ${m.id} | ${m.kind} | ${m.description}`,
).join("\n"),
parameters: {
type: "object",
properties: {
name: {
type: "string",
enum: [...MOVE_IDS],
description: "Catalog id to play.",
},
},
required: ["name"],
},
},
];
// ─── App state machine ──────────────────────────────────────────────────
//
// The host owns all pre-session state (sign-in / picker / connecting).
// This app's FSM starts at `idle` once the host handed us a live SDK,
// and only transitions between conversation-level states.
type AppState =
| "idle"
| "starting"
| "listening"
| "user-speaking"
| "processing"
| "ai-speaking"
| "error";
interface StateView {
caption: string;
disabled: boolean;
}
const STATE_VIEWS: Record<AppState, StateView> = {
idle: { caption: "Tap to start", disabled: false },
starting: { caption: "Starting", disabled: true },
listening: { caption: "", disabled: false },
"user-speaking": { caption: "", disabled: false },
processing: { caption: "", disabled: false },
"ai-speaking": { caption: "", disabled: false },
error: { caption: "Tap to retry", disabled: false },
};
const STATE_CLASS: Record<AppState, string> = {
idle: "state-authenticated",
starting: "state-starting",
listening: "state-listening",
"user-speaking": "state-user-speaking",
processing: "state-processing",
"ai-speaking": "state-ai-speaking",
error: "state-error",
};
const LIVE_STATES: ReadonlySet<AppState> = new Set([
"listening",
"user-speaking",
"processing",
"ai-speaking",
"starting",
]);
/** Map our FSM to the embed protocol's coarse `AppPhase`.
*
* We deliberately collapse every intra-session state to `"live"` -
* including `"starting"`. The host treats `"connecting"` as "the
* embedded app is not interactive, reveal my ConnectingView overlay
* on top", which made sense during the initial wake-up handshake
* (owned by `connectToHost()` and surfaced from outside this FSM)
* but is wrong for the post-boot transitions. Every time we re-enter
* `"starting"` (user tapped the orb to begin a conversation, or the
* silent reconnect kicked in) the app is already mounted and has its
* own visual feedback - the `.state-starting .ind-spinner` CSS
* showing a spinner inside the central circle. Reporting
* `"connecting"` would re-paint the host's full-bleed overlay over
* our spinner, which both flashes the user back to an
* "establishing-session" screen they already cleared, and breaks
* the silent-reconnect contract for ICE blips.
*/
function mapAppStateToHostPhase(
state: AppState,
): "boot" | "connecting" | "live" | "leaving" | "error" {
if (state === "error") return "error";
return "live";
}
// ─── DOM refs ───────────────────────────────────────────────────────────
const $ = <T extends HTMLElement>(selector: string): T => {
const el = document.querySelector<T>(selector);
if (!el) throw new Error(`Missing element: ${selector}`);
return el;
};
const appRoot = $<HTMLElement>("#app");
const circleBtn = $<HTMLButtonElement>("#main-circle");
const circleCaption = $<HTMLParagraphElement>("#circle-caption");
const toolToast = $<HTMLElement>("#tool-toast");
const toolToastText = toolToast.querySelector<HTMLSpanElement>(
".tool-toast-text",
)!;
const micBtn = $<HTMLButtonElement>("#mic-btn");
const stopBtn = $<HTMLButtonElement>("#stop-btn");
const settingsBtn = $<HTMLButtonElement>("#settings-btn");
const settingsModal = $<HTMLDialogElement>("#settings-modal");
const inputInstructions = $<HTMLTextAreaElement>("#openai-instructions");
const restartBtn = $<HTMLButtonElement>("#restart-conversation");
const restartHint = $<HTMLElement>("#restart-hint");
const settingsForm = settingsModal.querySelector<HTMLFormElement>("form")!;
// ─── Runtime state ──────────────────────────────────────────────────────
let currentState: AppState = "idle";
let settings: Settings = loadSettings();
let robot: ReachyMiniInstance | null = null;
let hostHandle: ConnectedHandle | null = null;
let openai: OpenaiRealtimeClient | null = null;
let openaiSink: HTMLAudioElement | null = null;
let wobbler: HeadWobbler | null = null;
let antennas: AntennasOscillator | null = null;
let micLevel: MicLevelMonitor | null = null;
let aiLevel: AiLevelMonitor | null = null;
let movePlayer: MovePlayer | null = null;
let movePlaying = false;
let toolPoseRestoreTimer: number | null = null;
let openaiReconnecting = false;
let openaiReconnectAttempts = 0;
let wakeLock: { release(): Promise<void> } | null = null;
let wakeLockUnavailable = false;
let micMuted = false;
// ─── UI rendering ───────────────────────────────────────────────────────
function setState(next: AppState): void {
currentState = next;
hostHandle?.setAppState({ phase: mapAppStateToHostPhase(next) });
const view = STATE_VIEWS[next];
circleBtn.disabled = view.disabled;
circleBtn.className = `circle ${STATE_CLASS[next]}`;
if (next !== "error") setCaption(view.caption);
const live = LIVE_STATES.has(next);
document
.querySelector(".orb-wrap")
?.classList.toggle("live", live);
micBtn.setAttribute("aria-hidden", live ? "false" : "true");
stopBtn.setAttribute("aria-hidden", live ? "false" : "true");
micBtn.tabIndex = live ? 0 : -1;
stopBtn.tabIndex = live ? 0 : -1;
updateRestartAvailability();
}
function updateRestartAvailability(): void {
const live = LIVE_STATES.has(currentState);
restartBtn.disabled = !live;
restartHint.hidden = live;
}
function setCaption(text: string, kind: "" | "error" | "muted" = ""): void {
const trimmed = text.trim();
circleCaption.textContent = trimmed;
circleCaption.className = `circle-caption${kind ? ` ${kind}` : ""}${trimmed ? "" : " empty"}`;
}
// ─── Settings modal ─────────────────────────────────────────────────────
function openSettings(): void {
inputInstructions.value = settings.instructions;
updateRestartAvailability();
settingsModal.showModal();
}
settingsBtn.addEventListener("click", () => openSettings());
settingsForm.addEventListener("submit", (event) => {
const submitter = (event as SubmitEvent).submitter as HTMLButtonElement | null;
if (submitter?.value !== "save") return;
settings = {
instructions: inputInstructions.value.trim() || DEFAULT_INSTRUCTIONS,
};
saveSettings(settings);
});
restartBtn.addEventListener("click", async () => {
if (!LIVE_STATES.has(currentState)) return;
settings = {
instructions: inputInstructions.value.trim() || DEFAULT_INSTRUCTIONS,
};
saveSettings(settings);
settingsModal.close();
try {
await teardownConversation();
await startConversation();
} catch (err) {
onFatalError(err);
}
});
// ─── Click handler for the central circle ──────────────────────────────
circleBtn.addEventListener("click", async () => {
try {
if (currentState === "idle") {
await startConversation();
return;
}
if (currentState === "error") {
circleCaption.removeAttribute("title");
setState("idle");
return;
}
} catch (err) {
onFatalError(err);
}
});
// ─── Side controls (mic mute + stop) ────────────────────────────────────
micBtn.addEventListener("click", () => {
if (!robot) return;
micMuted = !micMuted;
robot.setMicMuted(micMuted);
micBtn.classList.toggle("muted", micMuted);
micBtn.setAttribute("aria-label", micMuted ? "Unmute" : "Mute");
micBtn.title = micMuted ? "Unmute" : "Mute";
});
stopBtn.addEventListener("click", async () => {
await teardownConversation();
micMuted = false;
micBtn.classList.remove("muted");
setState("idle");
});
// ─── High-level flow steps ──────────────────────────────────────────────
async function startConversation(): Promise<void> {
if (!robot) return;
setState("starting");
const robotMicTrack = getRobotMicTrack(robot);
if (!robotMicTrack) {
onFatalError(new Error("Could not find the robot's microphone track"));
return;
}
startMicLevelMonitor(robotMicTrack);
startAntennas();
void acquireWakeLock();
openaiReconnectAttempts = 0;
try {
await connectOpenai(robotMicTrack);
} catch (err) {
onFatalError(err);
return;
}
// Make sure the robot's outbound audio path is open so OpenAI's voice
// reaches the speakers.
robot.setMicMuted(false);
}
async function connectOpenai(robotMicTrack: MediaStreamTrack): Promise<void> {
// Mint an OpenAI Realtime ephemeral key from the visitor's HF token
// (seeded into `sessionStorage.hf_token` by the host shell). The
// returned `ek_…` value is used as the Bearer for the SDP handshake.
// Failures bubble back to `startConversation()` as a fatal error
// with a user-friendly caption.
let ephemeralKey: string;
try {
ephemeralKey = await mintEphemeralKey();
} catch (err) {
if (err instanceof EphemeralKeyError) {
throw new Error(captionForEphemeralError(err));
}
throw err;
}
const client = new OpenaiRealtimeClient({
apiKey: ephemeralKey,
model: DEFAULT_MODEL,
voice: DEFAULT_VOICE,
instructions: settings.instructions,
inputTrack: robotMicTrack,
tools: ROBOT_TOOLS,
});
client.on("outputTrack", ({ track }) => {
routeOpenaiToRobot(track);
startWobbler(track);
startAiLevelMonitor(track);
});
client.on("status", ({ status }) => {
switch (status) {
case "connected":
if (currentState === "ai-speaking" && aiLevel) {
aiLevel.waitForSilence(900, () => {
if (currentState === "ai-speaking") {
setState("listening");
antennas?.resume();
}
});
} else {
setState("listening");
antennas?.resume();
}
openaiReconnectAttempts = 0;
break;
case "user-speaking":
aiLevel?.cancelSilenceWait();
setState("user-speaking");
wobbler?.reset();
antennas?.freeze();
break;
case "processing":
setState("processing");
antennas?.resume();
break;
case "ai-speaking":
aiLevel?.cancelSilenceWait();
setState("ai-speaking");
antennas?.resume();
break;
case "error":
if (openaiReconnecting) return;
void tryReconnectOpenai(
robotMicTrack,
new Error("OpenAI connection lost"),
);
break;
default:
break;
}
});
client.on("toolCall", (call) => handleToolCall(call));
client.on("error", ({ error }) => console.error("[openai]", error));
openai = client;
await client.connect();
}
async function tryReconnectOpenai(
robotMicTrack: MediaStreamTrack,
cause: Error,
): Promise<void> {
if (openaiReconnecting) return;
if (openaiReconnectAttempts >= 1) {
onFatalError(cause);
return;
}
openaiReconnecting = true;
openaiReconnectAttempts += 1;
console.warn("[openai] connection lost, attempting silent reconnect…", cause);
setState("starting");
setCaption("Reconnecting", "muted");
stopWobbler();
antennas?.freeze();
try {
await openai?.close();
} catch (err) {
console.warn("[openai] close during reconnect failed:", err);
}
openai = null;
// Drop the cached ephemeral key before the reconnect attempt. The
// mint-cache is sized for back-to-back handshakes in normal
// conditions, but a real disconnect implies the key may have been
// server-revoked (clock skew, master key rotation, rate-limit). The
// extra ~200 ms round-trip to re-mint is cheap compared to the
// alternative: an OpenAI 401 that fails the silent reconnect and
// forces the user back to the central circle.
invalidateEphemeralKey();
await new Promise((resolve) => setTimeout(resolve, 500));
try {
await connectOpenai(robotMicTrack);
} catch (err) {
openaiReconnecting = false;
onFatalError(err instanceof Error ? err : new Error(String(err)));
return;
}
openaiReconnecting = false;
}
/**
* User-facing caption for an `EphemeralKeyError`. Kept terse because
* the central circle's caption only has room for a single short line;
* the full error message goes to the `circleCaption.title` tooltip
* via `onFatalError()`.
*/
function captionForEphemeralError(err: EphemeralKeyError): string {
if (err.reason === "hf_token_missing") {
return "Sign in to Hugging Face to start a conversation";
}
return `Could not reach the OpenAI key service (HTTP ${err.status ?? "?"})`;
}
function getRobotMicTrack(
robotInstance: ReachyMiniInstance,
): MediaStreamTrack | null {
const pc = robotInstance._pc;
if (!pc) return null;
for (const receiver of pc.getReceivers()) {
if (receiver.track && receiver.track.kind === "audio") {
return receiver.track;
}
}
return null;
}
function routeOpenaiToRobot(track: MediaStreamTrack): void {
if (!robot) return;
const pc = robot._pc;
if (!pc) return;
const transceivers = pc.getTransceivers();
const audioTransceiver = transceivers.find(
(t) =>
t.receiver.track?.kind === "audio" || t.sender.track?.kind === "audio",
);
const audioSender = audioTransceiver?.sender ?? null;
if (audioSender) {
if (
audioTransceiver &&
audioTransceiver.direction !== "sendrecv" &&
audioTransceiver.direction !== "sendonly"
) {
try {
audioTransceiver.direction = "sendrecv";
} catch (err) {
console.warn("[main] could not bump transceiver direction:", err);
}
}
audioSender.replaceTrack(track).catch((err) => {
console.error("[main] replaceTrack failed", err);
});
} else {
console.warn(
"[main] no audio transceiver on the robot peer β€” bidirectional audio unavailable",
);
}
if (!openaiSink) {
openaiSink = document.createElement("audio");
openaiSink.autoplay = true;
openaiSink.muted = true;
document.body.appendChild(openaiSink);
}
openaiSink.srcObject = new MediaStream([track]);
}
// ─── Head motion agent ──────────────────────────────────────────────────
function startWobbler(assistantTrack: MediaStreamTrack): void {
if (!robot) return;
wobbler?.stop();
wobbler = new HeadWobbler({
track: assistantTrack,
onOffsets: ({ roll, pitch, yaw }) => {
if (toolPoseRestoreTimer !== null) return;
if (movePlaying) return;
const ok = robot?.setHeadRpyDeg(roll, pitch, yaw) ?? false;
recordSend(ok, "wobbler");
},
});
wobbler.start();
}
function stopWobbler(): void {
wobbler?.stop();
wobbler = null;
robot?.setHeadRpyDeg(0, 0, 0);
}
// ─── Antennas oscillator ────────────────────────────────────────────────
function startAntennas(): void {
if (!robot) return;
antennas?.stop();
antennas = new AntennasOscillator({
onAntennas: (right, left) => {
if (movePlaying) return;
const ok = robot?.setAntennasDeg(right, left) ?? false;
recordSend(ok, "antennas");
},
});
antennas.start();
}
function stopAntennas(): void {
antennas?.stop();
antennas = null;
robot?.setAntennasDeg(0, 0);
}
// ─── Tool-call handler ─────────────────────────────────────────────────
let toolToastTimer: number | null = null;
function showToolToast(text: string, durationMs = 2800): void {
if (toolToastTimer !== null) {
clearTimeout(toolToastTimer);
toolToastTimer = null;
}
toolToastText.textContent = text;
toolToast.classList.add("visible");
toolToast.setAttribute("aria-hidden", "false");
toolToastTimer = window.setTimeout(() => {
toolToast.classList.remove("visible");
toolToast.setAttribute("aria-hidden", "true");
toolToastTimer = null;
}, durationMs);
}
function describeToolCall(name: string, args: Record<string, unknown>): string {
switch (name) {
case "move_head": {
const direction = String(args.direction ?? "").toLowerCase();
const labels: Record<string, string> = {
up: "Looking up",
down: "Looking down",
left: "Looking left",
right: "Looking right",
center: "Looking forward",
neutral: "Looking forward",
};
return labels[direction] ?? `Moving head: ${direction || "?"}`;
}
case "play_move": {
const move = String(args.name ?? "");
return move ? `Playing ${move}` : "Playing move";
}
default:
return `Tool: ${name}`;
}
}
async function handleToolCall({
callId,
name,
arguments: args,
}: {
callId: string;
name: string;
arguments: Record<string, unknown>;
}): Promise<void> {
if (!robot || !openai) return;
showToolToast(describeToolCall(name, args));
let result: { ok: boolean; message: string };
switch (name) {
case "move_head": {
const direction = String(args.direction ?? "");
if (direction in HEAD_POSES) {
const pose = HEAD_POSES[direction as HeadPoseName];
applyToolHeadPose(pose);
result = { ok: true, message: `head moved to ${direction}` };
} else {
result = {
ok: false,
message: `unknown direction '${direction}'. Valid: ${Object.keys(HEAD_POSES).join(", ")}`,
};
}
break;
}
case "play_move": {
const moveName = String(args.name ?? "");
if ((MOVE_IDS as readonly string[]).includes(moveName)) {
try {
await playMove(moveName as MoveId);
result = { ok: true, message: `played move '${moveName}'` };
} catch (err) {
result = {
ok: false,
message: `failed to play '${moveName}': ${err instanceof Error ? err.message : String(err)}`,
};
}
} else {
result = {
ok: false,
message: `unknown move '${moveName}'. Valid: ${MOVE_IDS.join(", ")}`,
};
}
break;
}
default:
result = { ok: false, message: `unknown tool '${name}'` };
}
openai.sendToolResponse(callId, result);
}
async function playMove(name: MoveId): Promise<void> {
if (!robot) return;
movePlayer ??= new MovePlayer(robot);
movePlaying = true;
try {
await movePlayer.play(name);
} finally {
movePlaying = false;
robot.setAntennasDeg(0, 0);
}
}
function applyToolHeadPose(pose: {
roll: number;
pitch: number;
yaw: number;
}): void {
if (!robot) return;
robot.setHeadRpyDeg(pose.roll, pose.pitch, pose.yaw);
if (toolPoseRestoreTimer !== null) clearTimeout(toolPoseRestoreTimer);
toolPoseRestoreTimer = window.setTimeout(() => {
toolPoseRestoreTimer = null;
}, 1200);
}
// ─── Mic-level monitor (circle audio-reactivity) ────────────────────────
class MicLevelMonitor {
private ctx: AudioContext | null = null;
private analyser: AnalyserNode | null = null;
private source: MediaStreamAudioSourceNode | null = null;
private raf = 0;
private timeBuf: Float32Array<ArrayBuffer> | null = null;
private freqBuf: Uint8Array<ArrayBuffer> | null = null;
private level = 0;
private bands = [0, 0, 0, 0, 0];
private static readonly BAND_EDGES = [4, 8, 16, 32, 64, 128];
private static readonly LOG1P_10 = Math.log1p(10);
private static compress(v: number): number {
return Math.log1p(v * 10) / MicLevelMonitor.LOG1P_10;
}
start(track: MediaStreamTrack): void {
this.stop();
const ctx = new AudioContext();
const src = ctx.createMediaStreamSource(new MediaStream([track]));
const analyser = ctx.createAnalyser();
analyser.fftSize = 1024;
analyser.smoothingTimeConstant = 0.75;
src.connect(analyser);
this.ctx = ctx;
this.source = src;
this.analyser = analyser;
this.timeBuf = new Float32Array(new ArrayBuffer(analyser.fftSize * 4));
this.freqBuf = new Uint8Array(
new ArrayBuffer(analyser.frequencyBinCount),
);
const rootStyle = document.documentElement.style;
const tick = (): void => {
const an = this.analyser;
const tbuf = this.timeBuf;
const fbuf = this.freqBuf;
if (!an || !tbuf || !fbuf) return;
an.getFloatTimeDomainData(tbuf);
let sum = 0;
for (let i = 0; i < tbuf.length; i++) sum += tbuf[i]! * tbuf[i]!;
const rms = Math.sqrt(sum / tbuf.length);
const boosted = Math.min(1, Math.pow(rms * 6, 0.7));
const levelAttack = boosted > this.level ? 0.55 : 0.12;
this.level += (boosted - this.level) * levelAttack;
rootStyle.setProperty("--audio-level", this.level.toFixed(3));
an.getByteFrequencyData(fbuf);
const edges = MicLevelMonitor.BAND_EDGES;
for (let b = 0; b < 5; b++) {
const lo = edges[b]!;
const hi = edges[b + 1]!;
let bandSum = 0;
for (let j = lo; j < hi; j++) bandSum += fbuf[j]!;
const raw = MicLevelMonitor.compress(bandSum / (hi - lo) / 255);
const bandAttack = raw > this.bands[b]! ? 0.35 : 0.12;
this.bands[b]! += (raw - this.bands[b]!) * bandAttack;
rootStyle.setProperty(
`--bar${b}`,
Math.min(1, this.bands[b]!).toFixed(3),
);
}
this.raf = requestAnimationFrame(tick);
};
this.raf = requestAnimationFrame(tick);
}
stop(): void {
cancelAnimationFrame(this.raf);
this.raf = 0;
try {
this.source?.disconnect();
this.analyser?.disconnect();
this.ctx?.close();
} catch {
/* swallow */
}
this.ctx = null;
this.source = null;
this.analyser = null;
this.timeBuf = null;
this.freqBuf = null;
this.level = 0;
this.bands = [0, 0, 0, 0, 0];
const rootStyle = document.documentElement.style;
rootStyle.setProperty("--audio-level", "0");
for (let b = 0; b < 5; b++) rootStyle.setProperty(`--bar${b}`, "0");
}
resumeAudio(): void {
const ctx = this.ctx;
if (!ctx || ctx.state !== "suspended") return;
ctx.resume().catch((err) => {
console.warn("[mic-level] audioCtx resume failed:", err);
});
}
}
function startMicLevelMonitor(track: MediaStreamTrack): void {
micLevel ??= new MicLevelMonitor();
micLevel.start(track);
}
function stopMicLevelMonitor(): void {
micLevel?.stop();
}
class AiLevelMonitor {
private ctx: AudioContext | null = null;
private analyser: AnalyserNode | null = null;
private source: MediaStreamAudioSourceNode | null = null;
private raf = 0;
private timeBuf: Float32Array<ArrayBuffer> | null = null;
private level = 0;
private lastActiveTs = 0;
private silenceWait: {
quietMs: number;
cb: () => void;
maxWaitTimer: number | null;
} | null = null;
private static readonly SILENCE_THRESHOLD = 0.006;
start(track: MediaStreamTrack): void {
this.stop();
const ctx = new AudioContext();
const src = ctx.createMediaStreamSource(new MediaStream([track]));
const analyser = ctx.createAnalyser();
analyser.fftSize = 1024;
analyser.smoothingTimeConstant = 0.75;
src.connect(analyser);
this.ctx = ctx;
this.source = src;
this.analyser = analyser;
this.timeBuf = new Float32Array(new ArrayBuffer(analyser.fftSize * 4));
this.lastActiveTs = performance.now();
const rootStyle = document.documentElement.style;
const tick = (): void => {
const an = this.analyser;
const buf = this.timeBuf;
if (!an || !buf) return;
an.getFloatTimeDomainData(buf);
let sum = 0;
for (let i = 0; i < buf.length; i++) sum += buf[i]! * buf[i]!;
const rms = Math.sqrt(sum / buf.length);
const boosted = Math.min(1, Math.pow(rms * 6, 0.7));
const levelAttack = boosted > this.level ? 0.55 : 0.12;
this.level += (boosted - this.level) * levelAttack;
rootStyle.setProperty("--ai-audio-level", this.level.toFixed(3));
const now = performance.now();
if (rms > AiLevelMonitor.SILENCE_THRESHOLD) {
this.lastActiveTs = now;
} else if (this.silenceWait) {
const quietFor = now - this.lastActiveTs;
if (quietFor >= this.silenceWait.quietMs) {
const { cb, maxWaitTimer } = this.silenceWait;
this.silenceWait = null;
if (maxWaitTimer !== null) clearTimeout(maxWaitTimer);
try {
cb();
} catch (err) {
console.warn("[ai-level] silence callback threw:", err);
}
}
}
this.raf = requestAnimationFrame(tick);
};
this.raf = requestAnimationFrame(tick);
}
stop(): void {
cancelAnimationFrame(this.raf);
this.raf = 0;
this.cancelSilenceWait();
try {
this.source?.disconnect();
this.analyser?.disconnect();
this.ctx?.close();
} catch {
/* swallow */
}
this.ctx = null;
this.source = null;
this.analyser = null;
this.timeBuf = null;
this.level = 0;
document.documentElement.style.setProperty("--ai-audio-level", "0");
}
resumeAudio(): void {
const ctx = this.ctx;
if (!ctx || ctx.state !== "suspended") return;
ctx.resume().catch((err) => {
console.warn("[ai-level] audioCtx resume failed:", err);
});
}
waitForSilence(quietMs: number, cb: () => void, maxWaitMs = 8000): void {
this.cancelSilenceWait();
const maxWaitTimer = window.setTimeout(() => {
if (this.silenceWait?.cb === cb) {
this.silenceWait = null;
try {
cb();
} catch (err) {
console.warn("[ai-level] max-wait callback threw:", err);
}
}
}, maxWaitMs);
this.silenceWait = { quietMs, cb, maxWaitTimer };
}
cancelSilenceWait(): void {
if (!this.silenceWait) return;
if (this.silenceWait.maxWaitTimer !== null) {
clearTimeout(this.silenceWait.maxWaitTimer);
}
this.silenceWait = null;
}
}
function startAiLevelMonitor(track: MediaStreamTrack): void {
aiLevel ??= new AiLevelMonitor();
aiLevel.start(track);
}
function stopAiLevelMonitor(): void {
aiLevel?.stop();
}
// ─── Background-tab resilience ──────────────────────────────────────────
async function acquireWakeLock(): Promise<void> {
if (wakeLockUnavailable) return;
const anyNav = navigator as Navigator & {
wakeLock?: {
request(type: "screen"): Promise<{ release(): Promise<void> }>;
};
};
if (!anyNav.wakeLock) {
wakeLockUnavailable = true;
return;
}
if (wakeLock) return;
try {
wakeLock = await anyNav.wakeLock.request("screen");
} catch (err) {
const name = (err as { name?: string } | null)?.name;
if (name === "NotAllowedError" || name === "SecurityError") {
wakeLockUnavailable = true;
} else {
console.warn("[main] wakeLock.request failed:", err);
}
wakeLock = null;
}
}
async function releaseWakeLock(): Promise<void> {
try {
await wakeLock?.release();
} catch {
/* swallow */
}
wakeLock = null;
}
function resumeAudioContexts(): void {
wobbler?.resumeAudio();
micLevel?.resumeAudio();
aiLevel?.resumeAudio();
}
document.addEventListener("visibilitychange", () => {
if (document.hidden) return;
if (LIVE_STATES.has(currentState)) {
void acquireWakeLock();
resumeAudioContexts();
void probeRobotLink();
}
});
// ─── Robot data-channel health ─────────────────────────────────────────
let consecutiveSendFailures = 0;
function recordSend(ok: boolean, where: string): void {
if (ok) {
consecutiveSendFailures = 0;
return;
}
consecutiveSendFailures += 1;
if (consecutiveSendFailures === 1 || consecutiveSendFailures % 20 === 0) {
console.warn(
`[main] robot send failed (${where}), ${consecutiveSendFailures} consecutive failures`,
);
}
if (consecutiveSendFailures >= 40) {
onFatalError(
new Error(
"Lost the robot data channel (no commands acknowledged). Tap the circle to reconnect.",
),
);
}
}
async function probeRobotLink(): Promise<void> {
if (!robot) return;
const ok = robot.setAntennasDeg(0, 0);
if (!ok) {
onFatalError(
new Error(
"Lost the robot data channel while the tab was hidden. Tap the circle to reconnect.",
),
);
}
}
async function teardownConversation(): Promise<void> {
if (toolPoseRestoreTimer !== null) {
clearTimeout(toolPoseRestoreTimer);
toolPoseRestoreTimer = null;
}
movePlayer?.stop();
movePlaying = false;
openaiReconnecting = false;
openaiReconnectAttempts = 0;
try {
await openai?.close();
} catch {
/* swallow */
}
openai = null;
stopWobbler();
stopAntennas();
stopMicLevelMonitor();
stopAiLevelMonitor();
void releaseWakeLock();
if (openaiSink) {
openaiSink.srcObject = null;
openaiSink.remove();
openaiSink = null;
}
}
async function onFatalError(err: unknown): Promise<void> {
const message = err instanceof Error ? err.message : String(err);
console.error("[main] error:", err);
setState("error");
circleCaption.title = message;
await teardownConversation();
}
// ─── Boot ───────────────────────────────────────────────────────────────
async function boot(): Promise<void> {
hostHandle = await connectToHost();
robot = hostHandle.reachy;
// Tune the XVF3800 audio board for conversation. Mirrors the Python
// `apply_audio_startup_config()` call right after `start_recording()`
// / `start_playing()` in `reachy_mini_conversation_app/console.py`:
// by the time `connectToHost()` resolves the host's bridge has
// already negotiated WebRTC audio in both directions and the
// daemon's audio pipeline is hot, so the batched parameter write
// lands on a settled board.
//
// Fire-and-forget: a failure (older SDK without `applyAudioConfig`,
// missing audio board on a Lite running off-robot, DataChannel
// burp) is non-fatal - the helper logs and we keep going with the
// daemon's default tuning. The `void` makes the lack of await
// explicit so we don't block the UI reveal on a multi-parameter
// verify roundtrip (~100 ms Γ— N).
void applyAudioStartupConfig(robot);
// Reveal the UI now that the SDK is ready.
appRoot.classList.remove("hidden");
document.body.classList.remove("booting");
hostHandle.onLeave(async () => {
await teardownConversation();
micMuted = false;
micBtn.classList.remove("muted");
});
setState("idle");
}
void boot().catch((err) => {
console.error("[minimal-conversation/embed] bootstrap failed", err);
try {
window.parent.postMessage(
{
source: "reachy-mini",
type: "embed:error",
version: 1,
message: err instanceof Error ? err.message : String(err),
fatal: true,
},
window.location.origin,
);
} catch {
/* swallow */
}
});