Spaces:

tfrere
/

minimal-conversation

Running

App Files Files Community

minimal-conversation / src /embed.ts

tfrere HF Staff

fix(embed): stop re-raising the host ConnectingView on orb tap

2c51c9e 8 days ago

raw

history blame contribute delete

37.6 kB

	/**
	* Minimal Conversation - embedded app entry point.
	*
	* Mounted by the dispatcher when the URL has `?embedded=1`, i.e. we're
	* inside the host's iframe. The host has already:
	* - signed the user in (HF OAuth),
	* - let them pick a robot,
	* - established the WebRTC session,
	* - run the wake-up trajectory.
	*
	* `connectToHost()` resolves with a live SDK handle past all that.
	* From there this module owns:
	* - kicking off the OpenAI Realtime session (ephemeral key minted
	* from the visitor's HF token, see `ephemeral-key.ts`),
	* - routing audio (robot mic ↔ OpenAI ↔ robot speakers),
	* - the conversation FSM (listening / user-speaking / processing /
	* ai-speaking) and its visual orb,
	* - tool calls (head poses + body-language move catalog),
	* - the in-app side controls (mute, end conversation),
	* - the settings modal (only the model `instructions` is user-
	* editable; model + voice are locked to the server defaults).
	*
	* Sign-in, picking, top-bar avatar, end-session button are NOT this
	* app's concern - they live in `@pollen-robotics/reachy-mini-sdk/host`.
	*/
	import "./style.css";

	import { connectToHost, type ConnectedHandle } from "@pollen-robotics/reachy-mini-sdk/host/embed";
	import type { ReachyMiniInstance } from "@pollen-robotics/reachy-mini-sdk";

	import { OpenaiRealtimeClient, type RealtimeTool } from "./openai-realtime";
	import {
	EphemeralKeyError,
	invalidateEphemeralKey,
	mintEphemeralKey,
	} from "./ephemeral-key";
	import { HeadWobbler } from "./head-wobbler";
	import { AntennasOscillator } from "./antennas";
	import { applyAudioStartupConfig } from "./audio-startup-config";
	import {
	MovePlayer,
	MOVE_CATALOG,
	MOVE_IDS,
	type MoveId,
	} from "./move-player";

	// ─── Settings & defaults ────────────────────────────────────────────────

	// Locked server-side defaults. Model + voice used to be user-editable
	// but the ephemeral-key flow (see `ephemeral-key.ts`) provisions a
	// session tied to a specific model: the Pollen mint endpoint
	// (`/api/openai/ephemeral` on `pollen-robotics-reachy-mini`) currently
	// mints for `gpt-realtime-2`. Sending a different model name in the
	// SDP handshake's session config makes OpenAI reject the call with
	// `400 invalid_model "Model X does not match the realtime token
	// model."`, so we MUST pass back the exact same model string the mint
	// endpoint defaulted to. If the upstream default ever changes, bump
	// this constant in sync; until then, keep these two values aligned
	// with `reachy_mini_mobile_app/.../settings.ts:DEFAULT_MODEL`.
	const DEFAULT_MODEL = "gpt-realtime-2";
	const DEFAULT_VOICE = "cedar";
	const DEFAULT_INSTRUCTIONS =
	"You are Reachy Mini, a small friendly robot companion. " +
	"Keep replies short, warm, and spoken. Avoid long monologues. " +
	"You control a small robot body. Two tools are available:\n" +
	" - `move_head`: point the head in a named direction (up, down, left, " +
	"right, tilt_left, tilt_right, center). Instant, use for subtle gestures " +
	"that accompany a sentence.\n" +
	" - `play_move`: trigger a short pre-recorded choreography (1-4s). The " +
	"catalog mixes `dance` entries (rhythmic, playful) and `emotion` entries " +
	"(reactive body language). Pick a dance when the moment calls for " +
	"theatricality (hi, joke, groove) and an emotion when reacting to " +
	"something the user just said (surprise, curiosity, praise, bad news).\n" +
	"Use tools sparingly, never more than once per reply.";

	// `instructions` is the only persisted setting now that the API key
	// comes from the HF-token ephemeral mint and model/voice are locked
	// server-side. The legacy `reachyMini.openai.{apiKey,model,voice}`
	// localStorage entries are intentionally left in place for any
	// returning visitor - they're simply ignored on read.
	const STORAGE_KEYS = {
	instructions: "reachyMini.openai.instructions",
	} as const;

	interface Settings {
	instructions: string;
	}

	function loadSettings(): Settings {
	return {
	instructions:
	localStorage.getItem(STORAGE_KEYS.instructions) ?? DEFAULT_INSTRUCTIONS,
	};
	}

	function saveSettings(s: Settings): void {
	localStorage.setItem(STORAGE_KEYS.instructions, s.instructions);
	}

	// ─── Robot tools exposed to the OpenAI model ────────────────────────────

	const HEAD_POSES = {
	center: { roll: 0, pitch: 0, yaw: 0 },
	up: { roll: 0, pitch: -18, yaw: 0 },
	down: { roll: 0, pitch: 18, yaw: 0 },
	left: { roll: 0, pitch: 0, yaw: 25 },
	right: { roll: 0, pitch: 0, yaw: -25 },
	tilt_left: { roll: -15, pitch: 0, yaw: 0 },
	tilt_right: { roll: 15, pitch: 0, yaw: 0 },
	} as const;

	type HeadPoseName = keyof typeof HEAD_POSES;

	const ROBOT_TOOLS: RealtimeTool[] = [
	{
	name: "move_head",
	description:
	"Point the robot's head in a named direction. Use this to accompany " +
	"your speech with a tiny, legible gesture.",
	parameters: {
	type: "object",
	properties: {
	direction: {
	type: "string",
	enum: Object.keys(HEAD_POSES),
	description: "Named head pose to assume.",
	},
	},
	required: ["direction"],
	},
	},
	{
	name: "play_move",
	description:
	"Trigger a short pre-recorded body-language move from the Reachy " +
	"dances + emotions library. Catalog (id \| kind \| when to pick it):\n" +
	MOVE_CATALOG.map(
	(m) => ` - ${m.id} \| ${m.kind} \| ${m.description}`,
	).join("\n"),
	parameters: {
	type: "object",
	properties: {
	name: {
	type: "string",
	enum: [...MOVE_IDS],
	description: "Catalog id to play.",
	},
	},
	required: ["name"],
	},
	},
	];

	// ─── App state machine ──────────────────────────────────────────────────
	//
	// The host owns all pre-session state (sign-in / picker / connecting).
	// This app's FSM starts at `idle` once the host handed us a live SDK,
	// and only transitions between conversation-level states.

	type AppState =
	\| "idle"
	\| "starting"
	\| "listening"
	\| "user-speaking"
	\| "processing"
	\| "ai-speaking"
	\| "error";

	interface StateView {
	caption: string;
	disabled: boolean;
	}

	const STATE_VIEWS: Record<AppState, StateView> = {
	idle: { caption: "Tap to start", disabled: false },
	starting: { caption: "Starting", disabled: true },
	listening: { caption: "", disabled: false },
	"user-speaking": { caption: "", disabled: false },
	processing: { caption: "", disabled: false },
	"ai-speaking": { caption: "", disabled: false },
	error: { caption: "Tap to retry", disabled: false },
	};

	const STATE_CLASS: Record<AppState, string> = {
	idle: "state-authenticated",
	starting: "state-starting",
	listening: "state-listening",
	"user-speaking": "state-user-speaking",
	processing: "state-processing",
	"ai-speaking": "state-ai-speaking",
	error: "state-error",
	};

	const LIVE_STATES: ReadonlySet<AppState> = new Set([
	"listening",
	"user-speaking",
	"processing",
	"ai-speaking",
	"starting",
	]);

	/** Map our FSM to the embed protocol's coarse `AppPhase`.
	*
	* We deliberately collapse every intra-session state to `"live"` -
	* including `"starting"`. The host treats `"connecting"` as "the
	* embedded app is not interactive, reveal my ConnectingView overlay
	* on top", which made sense during the initial wake-up handshake
	* (owned by `connectToHost()` and surfaced from outside this FSM)
	* but is wrong for the post-boot transitions. Every time we re-enter
	* `"starting"` (user tapped the orb to begin a conversation, or the
	* silent reconnect kicked in) the app is already mounted and has its
	* own visual feedback - the `.state-starting .ind-spinner` CSS
	* showing a spinner inside the central circle. Reporting
	* `"connecting"` would re-paint the host's full-bleed overlay over
	* our spinner, which both flashes the user back to an
	* "establishing-session" screen they already cleared, and breaks
	* the silent-reconnect contract for ICE blips.
	*/
	function mapAppStateToHostPhase(
	state: AppState,
	): "boot" \| "connecting" \| "live" \| "leaving" \| "error" {
	if (state === "error") return "error";
	return "live";
	}

	// ─── DOM refs ───────────────────────────────────────────────────────────

	const $ = <T extends HTMLElement>(selector: string): T => {
	const el = document.querySelector<T>(selector);
	if (!el) throw new Error(`Missing element: ${selector}`);
	return el;
	};

	const appRoot = $<HTMLElement>("#app");
	const circleBtn = $<HTMLButtonElement>("#main-circle");
	const circleCaption = $<HTMLParagraphElement>("#circle-caption");
	const toolToast = $<HTMLElement>("#tool-toast");
	const toolToastText = toolToast.querySelector<HTMLSpanElement>(
	".tool-toast-text",
	)!;
	const micBtn = $<HTMLButtonElement>("#mic-btn");
	const stopBtn = $<HTMLButtonElement>("#stop-btn");

	const settingsBtn = $<HTMLButtonElement>("#settings-btn");
	const settingsModal = $<HTMLDialogElement>("#settings-modal");
	const inputInstructions = $<HTMLTextAreaElement>("#openai-instructions");
	const restartBtn = $<HTMLButtonElement>("#restart-conversation");
	const restartHint = $<HTMLElement>("#restart-hint");
	const settingsForm = settingsModal.querySelector<HTMLFormElement>("form")!;

	// ─── Runtime state ──────────────────────────────────────────────────────

	let currentState: AppState = "idle";
	let settings: Settings = loadSettings();

	let robot: ReachyMiniInstance \| null = null;
	let hostHandle: ConnectedHandle \| null = null;
	let openai: OpenaiRealtimeClient \| null = null;

	let openaiSink: HTMLAudioElement \| null = null;
	let wobbler: HeadWobbler \| null = null;
	let antennas: AntennasOscillator \| null = null;
	let micLevel: MicLevelMonitor \| null = null;
	let aiLevel: AiLevelMonitor \| null = null;
	let movePlayer: MovePlayer \| null = null;

	let movePlaying = false;
	let toolPoseRestoreTimer: number \| null = null;
	let openaiReconnecting = false;
	let openaiReconnectAttempts = 0;
	let wakeLock: { release(): Promise<void> } \| null = null;
	let wakeLockUnavailable = false;
	let micMuted = false;

	// ─── UI rendering ───────────────────────────────────────────────────────

	function setState(next: AppState): void {
	currentState = next;
	hostHandle?.setAppState({ phase: mapAppStateToHostPhase(next) });

	const view = STATE_VIEWS[next];
	circleBtn.disabled = view.disabled;
	circleBtn.className = `circle ${STATE_CLASS[next]}`;

	if (next !== "error") setCaption(view.caption);

	const live = LIVE_STATES.has(next);
	document
	.querySelector(".orb-wrap")
	?.classList.toggle("live", live);
	micBtn.setAttribute("aria-hidden", live ? "false" : "true");
	stopBtn.setAttribute("aria-hidden", live ? "false" : "true");
	micBtn.tabIndex = live ? 0 : -1;
	stopBtn.tabIndex = live ? 0 : -1;

	updateRestartAvailability();
	}

	function updateRestartAvailability(): void {
	const live = LIVE_STATES.has(currentState);
	restartBtn.disabled = !live;
	restartHint.hidden = live;
	}

	function setCaption(text: string, kind: "" \| "error" \| "muted" = ""): void {
	const trimmed = text.trim();
	circleCaption.textContent = trimmed;
	circleCaption.className = `circle-caption${kind ? ` ${kind}` : ""}${trimmed ? "" : " empty"}`;
	}

	// ─── Settings modal ─────────────────────────────────────────────────────

	function openSettings(): void {
	inputInstructions.value = settings.instructions;
	updateRestartAvailability();
	settingsModal.showModal();
	}

	settingsBtn.addEventListener("click", () => openSettings());

	settingsForm.addEventListener("submit", (event) => {
	const submitter = (event as SubmitEvent).submitter as HTMLButtonElement \| null;
	if (submitter?.value !== "save") return;

	settings = {
	instructions: inputInstructions.value.trim() \|\| DEFAULT_INSTRUCTIONS,
	};
	saveSettings(settings);
	});

	restartBtn.addEventListener("click", async () => {
	if (!LIVE_STATES.has(currentState)) return;

	settings = {
	instructions: inputInstructions.value.trim() \|\| DEFAULT_INSTRUCTIONS,
	};
	saveSettings(settings);
	settingsModal.close();

	try {
	await teardownConversation();
	await startConversation();
	} catch (err) {
	onFatalError(err);
	}
	});

	// ─── Click handler for the central circle ──────────────────────────────

	circleBtn.addEventListener("click", async () => {
	try {
	if (currentState === "idle") {
	await startConversation();
	return;
	}
	if (currentState === "error") {
	circleCaption.removeAttribute("title");
	setState("idle");
	return;
	}
	} catch (err) {
	onFatalError(err);
	}
	});

	// ─── Side controls (mic mute + stop) ────────────────────────────────────

	micBtn.addEventListener("click", () => {
	if (!robot) return;
	micMuted = !micMuted;
	robot.setMicMuted(micMuted);
	micBtn.classList.toggle("muted", micMuted);
	micBtn.setAttribute("aria-label", micMuted ? "Unmute" : "Mute");
	micBtn.title = micMuted ? "Unmute" : "Mute";
	});

	stopBtn.addEventListener("click", async () => {
	await teardownConversation();
	micMuted = false;
	micBtn.classList.remove("muted");
	setState("idle");
	});

	// ─── High-level flow steps ──────────────────────────────────────────────

	async function startConversation(): Promise<void> {
	if (!robot) return;

	setState("starting");

	const robotMicTrack = getRobotMicTrack(robot);
	if (!robotMicTrack) {
	onFatalError(new Error("Could not find the robot's microphone track"));
	return;
	}

	startMicLevelMonitor(robotMicTrack);
	startAntennas();
	void acquireWakeLock();

	openaiReconnectAttempts = 0;
	try {
	await connectOpenai(robotMicTrack);
	} catch (err) {
	onFatalError(err);
	return;
	}

	// Make sure the robot's outbound audio path is open so OpenAI's voice
	// reaches the speakers.
	robot.setMicMuted(false);
	}

	async function connectOpenai(robotMicTrack: MediaStreamTrack): Promise<void> {
	// Mint an OpenAI Realtime ephemeral key from the visitor's HF token
	// (seeded into `sessionStorage.hf_token` by the host shell). The
	// returned `ek_…` value is used as the Bearer for the SDP handshake.
	// Failures bubble back to `startConversation()` as a fatal error
	// with a user-friendly caption.
	let ephemeralKey: string;
	try {
	ephemeralKey = await mintEphemeralKey();
	} catch (err) {
	if (err instanceof EphemeralKeyError) {
	throw new Error(captionForEphemeralError(err));
	}
	throw err;
	}

	const client = new OpenaiRealtimeClient({
	apiKey: ephemeralKey,
	model: DEFAULT_MODEL,
	voice: DEFAULT_VOICE,
	instructions: settings.instructions,
	inputTrack: robotMicTrack,
	tools: ROBOT_TOOLS,
	});

	client.on("outputTrack", ({ track }) => {
	routeOpenaiToRobot(track);
	startWobbler(track);
	startAiLevelMonitor(track);
	});

	client.on("status", ({ status }) => {
	switch (status) {
	case "connected":
	if (currentState === "ai-speaking" && aiLevel) {
	aiLevel.waitForSilence(900, () => {
	if (currentState === "ai-speaking") {
	setState("listening");
	antennas?.resume();
	}
	});
	} else {
	setState("listening");
	antennas?.resume();
	}
	openaiReconnectAttempts = 0;
	break;
	case "user-speaking":
	aiLevel?.cancelSilenceWait();
	setState("user-speaking");
	wobbler?.reset();
	antennas?.freeze();
	break;
	case "processing":
	setState("processing");
	antennas?.resume();
	break;
	case "ai-speaking":
	aiLevel?.cancelSilenceWait();
	setState("ai-speaking");
	antennas?.resume();
	break;
	case "error":
	if (openaiReconnecting) return;
	void tryReconnectOpenai(
	robotMicTrack,
	new Error("OpenAI connection lost"),
	);
	break;
	default:
	break;
	}
	});

	client.on("toolCall", (call) => handleToolCall(call));
	client.on("error", ({ error }) => console.error("[openai]", error));

	openai = client;
	await client.connect();
	}

	async function tryReconnectOpenai(
	robotMicTrack: MediaStreamTrack,
	cause: Error,
	): Promise<void> {
	if (openaiReconnecting) return;
	if (openaiReconnectAttempts >= 1) {
	onFatalError(cause);
	return;
	}

	openaiReconnecting = true;
	openaiReconnectAttempts += 1;
	console.warn("[openai] connection lost, attempting silent reconnect…", cause);
	setState("starting");
	setCaption("Reconnecting", "muted");

	stopWobbler();
	antennas?.freeze();

	try {
	await openai?.close();
	} catch (err) {
	console.warn("[openai] close during reconnect failed:", err);
	}
	openai = null;

	// Drop the cached ephemeral key before the reconnect attempt. The
	// mint-cache is sized for back-to-back handshakes in normal
	// conditions, but a real disconnect implies the key may have been
	// server-revoked (clock skew, master key rotation, rate-limit). The
	// extra ~200 ms round-trip to re-mint is cheap compared to the
	// alternative: an OpenAI 401 that fails the silent reconnect and
	// forces the user back to the central circle.
	invalidateEphemeralKey();

	await new Promise((resolve) => setTimeout(resolve, 500));

	try {
	await connectOpenai(robotMicTrack);
	} catch (err) {
	openaiReconnecting = false;
	onFatalError(err instanceof Error ? err : new Error(String(err)));
	return;
	}

	openaiReconnecting = false;
	}

	/**
	* User-facing caption for an `EphemeralKeyError`. Kept terse because
	* the central circle's caption only has room for a single short line;
	* the full error message goes to the `circleCaption.title` tooltip
	* via `onFatalError()`.
	*/
	function captionForEphemeralError(err: EphemeralKeyError): string {
	if (err.reason === "hf_token_missing") {
	return "Sign in to Hugging Face to start a conversation";
	}
	return `Could not reach the OpenAI key service (HTTP ${err.status ?? "?"})`;
	}

	function getRobotMicTrack(
	robotInstance: ReachyMiniInstance,
	): MediaStreamTrack \| null {
	const pc = robotInstance._pc;
	if (!pc) return null;
	for (const receiver of pc.getReceivers()) {
	if (receiver.track && receiver.track.kind === "audio") {
	return receiver.track;
	}
	}
	return null;
	}

	function routeOpenaiToRobot(track: MediaStreamTrack): void {
	if (!robot) return;
	const pc = robot._pc;
	if (!pc) return;

	const transceivers = pc.getTransceivers();
	const audioTransceiver = transceivers.find(
	(t) =>
	t.receiver.track?.kind === "audio" \|\| t.sender.track?.kind === "audio",
	);
	const audioSender = audioTransceiver?.sender ?? null;
	if (audioSender) {
	if (
	audioTransceiver &&
	audioTransceiver.direction !== "sendrecv" &&
	audioTransceiver.direction !== "sendonly"
	) {
	try {
	audioTransceiver.direction = "sendrecv";
	} catch (err) {
	console.warn("[main] could not bump transceiver direction:", err);
	}
	}
	audioSender.replaceTrack(track).catch((err) => {
	console.error("[main] replaceTrack failed", err);
	});
	} else {
	console.warn(
	"[main] no audio transceiver on the robot peer — bidirectional audio unavailable",
	);
	}

	if (!openaiSink) {
	openaiSink = document.createElement("audio");
	openaiSink.autoplay = true;
	openaiSink.muted = true;
	document.body.appendChild(openaiSink);
	}
	openaiSink.srcObject = new MediaStream([track]);
	}

	// ─── Head motion agent ──────────────────────────────────────────────────

	function startWobbler(assistantTrack: MediaStreamTrack): void {
	if (!robot) return;

	wobbler?.stop();
	wobbler = new HeadWobbler({
	track: assistantTrack,
	onOffsets: ({ roll, pitch, yaw }) => {
	if (toolPoseRestoreTimer !== null) return;
	if (movePlaying) return;
	const ok = robot?.setHeadRpyDeg(roll, pitch, yaw) ?? false;
	recordSend(ok, "wobbler");
	},
	});
	wobbler.start();
	}

	function stopWobbler(): void {
	wobbler?.stop();
	wobbler = null;
	robot?.setHeadRpyDeg(0, 0, 0);
	}

	// ─── Antennas oscillator ────────────────────────────────────────────────

	function startAntennas(): void {
	if (!robot) return;
	antennas?.stop();
	antennas = new AntennasOscillator({
	onAntennas: (right, left) => {
	if (movePlaying) return;
	const ok = robot?.setAntennasDeg(right, left) ?? false;
	recordSend(ok, "antennas");
	},
	});
	antennas.start();
	}

	function stopAntennas(): void {
	antennas?.stop();
	antennas = null;
	robot?.setAntennasDeg(0, 0);
	}

	// ─── Tool-call handler ─────────────────────────────────────────────────

	let toolToastTimer: number \| null = null;
	function showToolToast(text: string, durationMs = 2800): void {
	if (toolToastTimer !== null) {
	clearTimeout(toolToastTimer);
	toolToastTimer = null;
	}
	toolToastText.textContent = text;
	toolToast.classList.add("visible");
	toolToast.setAttribute("aria-hidden", "false");
	toolToastTimer = window.setTimeout(() => {
	toolToast.classList.remove("visible");
	toolToast.setAttribute("aria-hidden", "true");
	toolToastTimer = null;
	}, durationMs);
	}

	function describeToolCall(name: string, args: Record<string, unknown>): string {
	switch (name) {
	case "move_head": {
	const direction = String(args.direction ?? "").toLowerCase();
	const labels: Record<string, string> = {
	up: "Looking up",
	down: "Looking down",
	left: "Looking left",
	right: "Looking right",
	center: "Looking forward",
	neutral: "Looking forward",
	};
	return labels[direction] ?? `Moving head: ${direction \|\| "?"}`;
	}
	case "play_move": {
	const move = String(args.name ?? "");
	return move ? `Playing ${move}` : "Playing move";
	}
	default:
	return `Tool: ${name}`;
	}
	}

	async function handleToolCall({
	callId,
	name,
	arguments: args,
	}: {
	callId: string;
	name: string;
	arguments: Record<string, unknown>;
	}): Promise<void> {
	if (!robot \|\| !openai) return;
	showToolToast(describeToolCall(name, args));

	let result: { ok: boolean; message: string };
	switch (name) {
	case "move_head": {
	const direction = String(args.direction ?? "");
	if (direction in HEAD_POSES) {
	const pose = HEAD_POSES[direction as HeadPoseName];
	applyToolHeadPose(pose);
	result = { ok: true, message: `head moved to ${direction}` };
	} else {
	result = {
	ok: false,
	message: `unknown direction '${direction}'. Valid: ${Object.keys(HEAD_POSES).join(", ")}`,
	};
	}
	break;
	}
	case "play_move": {
	const moveName = String(args.name ?? "");
	if ((MOVE_IDS as readonly string[]).includes(moveName)) {
	try {
	await playMove(moveName as MoveId);
	result = { ok: true, message: `played move '${moveName}'` };
	} catch (err) {
	result = {
	ok: false,
	message: `failed to play '${moveName}': ${err instanceof Error ? err.message : String(err)}`,
	};
	}
	} else {
	result = {
	ok: false,
	message: `unknown move '${moveName}'. Valid: ${MOVE_IDS.join(", ")}`,
	};
	}
	break;
	}
	default:
	result = { ok: false, message: `unknown tool '${name}'` };
	}

	openai.sendToolResponse(callId, result);
	}

	async function playMove(name: MoveId): Promise<void> {
	if (!robot) return;
	movePlayer ??= new MovePlayer(robot);

	movePlaying = true;
	try {
	await movePlayer.play(name);
	} finally {
	movePlaying = false;
	robot.setAntennasDeg(0, 0);
	}
	}

	function applyToolHeadPose(pose: {
	roll: number;
	pitch: number;
	yaw: number;
	}): void {
	if (!robot) return;
	robot.setHeadRpyDeg(pose.roll, pose.pitch, pose.yaw);
	if (toolPoseRestoreTimer !== null) clearTimeout(toolPoseRestoreTimer);
	toolPoseRestoreTimer = window.setTimeout(() => {
	toolPoseRestoreTimer = null;
	}, 1200);
	}

	// ─── Mic-level monitor (circle audio-reactivity) ────────────────────────

	class MicLevelMonitor {
	private ctx: AudioContext \| null = null;
	private analyser: AnalyserNode \| null = null;
	private source: MediaStreamAudioSourceNode \| null = null;
	private raf = 0;
	private timeBuf: Float32Array<ArrayBuffer> \| null = null;
	private freqBuf: Uint8Array<ArrayBuffer> \| null = null;
	private level = 0;
	private bands = [0, 0, 0, 0, 0];

	private static readonly BAND_EDGES = [4, 8, 16, 32, 64, 128];
	private static readonly LOG1P_10 = Math.log1p(10);
	private static compress(v: number): number {
	return Math.log1p(v * 10) / MicLevelMonitor.LOG1P_10;
	}

	start(track: MediaStreamTrack): void {
	this.stop();
	const ctx = new AudioContext();
	const src = ctx.createMediaStreamSource(new MediaStream([track]));
	const analyser = ctx.createAnalyser();
	analyser.fftSize = 1024;
	analyser.smoothingTimeConstant = 0.75;
	src.connect(analyser);

	this.ctx = ctx;
	this.source = src;
	this.analyser = analyser;
	this.timeBuf = new Float32Array(new ArrayBuffer(analyser.fftSize * 4));
	this.freqBuf = new Uint8Array(
	new ArrayBuffer(analyser.frequencyBinCount),
	);

	const rootStyle = document.documentElement.style;
	const tick = (): void => {
	const an = this.analyser;
	const tbuf = this.timeBuf;
	const fbuf = this.freqBuf;
	if (!an \|\| !tbuf \|\| !fbuf) return;

	an.getFloatTimeDomainData(tbuf);
	let sum = 0;
	for (let i = 0; i < tbuf.length; i++) sum += tbuf[i]! * tbuf[i]!;
	const rms = Math.sqrt(sum / tbuf.length);
	const boosted = Math.min(1, Math.pow(rms * 6, 0.7));
	const levelAttack = boosted > this.level ? 0.55 : 0.12;
	this.level += (boosted - this.level) * levelAttack;
	rootStyle.setProperty("--audio-level", this.level.toFixed(3));

	an.getByteFrequencyData(fbuf);
	const edges = MicLevelMonitor.BAND_EDGES;
	for (let b = 0; b < 5; b++) {
	const lo = edges[b]!;
	const hi = edges[b + 1]!;
	let bandSum = 0;
	for (let j = lo; j < hi; j++) bandSum += fbuf[j]!;
	const raw = MicLevelMonitor.compress(bandSum / (hi - lo) / 255);
	const bandAttack = raw > this.bands[b]! ? 0.35 : 0.12;
	this.bands[b]! += (raw - this.bands[b]!) * bandAttack;
	rootStyle.setProperty(
	`--bar${b}`,
	Math.min(1, this.bands[b]!).toFixed(3),
	);
	}
	this.raf = requestAnimationFrame(tick);
	};
	this.raf = requestAnimationFrame(tick);
	}

	stop(): void {
	cancelAnimationFrame(this.raf);
	this.raf = 0;
	try {
	this.source?.disconnect();
	this.analyser?.disconnect();
	this.ctx?.close();
	} catch {
	/* swallow */
	}
	this.ctx = null;
	this.source = null;
	this.analyser = null;
	this.timeBuf = null;
	this.freqBuf = null;
	this.level = 0;
	this.bands = [0, 0, 0, 0, 0];
	const rootStyle = document.documentElement.style;
	rootStyle.setProperty("--audio-level", "0");
	for (let b = 0; b < 5; b++) rootStyle.setProperty(`--bar${b}`, "0");
	}

	resumeAudio(): void {
	const ctx = this.ctx;
	if (!ctx \|\| ctx.state !== "suspended") return;
	ctx.resume().catch((err) => {
	console.warn("[mic-level] audioCtx resume failed:", err);
	});
	}
	}

	function startMicLevelMonitor(track: MediaStreamTrack): void {
	micLevel ??= new MicLevelMonitor();
	micLevel.start(track);
	}

	function stopMicLevelMonitor(): void {
	micLevel?.stop();
	}

	class AiLevelMonitor {
	private ctx: AudioContext \| null = null;
	private analyser: AnalyserNode \| null = null;
	private source: MediaStreamAudioSourceNode \| null = null;
	private raf = 0;
	private timeBuf: Float32Array<ArrayBuffer> \| null = null;
	private level = 0;
	private lastActiveTs = 0;
	private silenceWait: {
	quietMs: number;
	cb: () => void;
	maxWaitTimer: number \| null;
	} \| null = null;
	private static readonly SILENCE_THRESHOLD = 0.006;

	start(track: MediaStreamTrack): void {
	this.stop();
	const ctx = new AudioContext();
	const src = ctx.createMediaStreamSource(new MediaStream([track]));
	const analyser = ctx.createAnalyser();
	analyser.fftSize = 1024;
	analyser.smoothingTimeConstant = 0.75;
	src.connect(analyser);

	this.ctx = ctx;
	this.source = src;
	this.analyser = analyser;
	this.timeBuf = new Float32Array(new ArrayBuffer(analyser.fftSize * 4));
	this.lastActiveTs = performance.now();

	const rootStyle = document.documentElement.style;
	const tick = (): void => {
	const an = this.analyser;
	const buf = this.timeBuf;
	if (!an \|\| !buf) return;
	an.getFloatTimeDomainData(buf);
	let sum = 0;
	for (let i = 0; i < buf.length; i++) sum += buf[i]! * buf[i]!;
	const rms = Math.sqrt(sum / buf.length);
	const boosted = Math.min(1, Math.pow(rms * 6, 0.7));
	const levelAttack = boosted > this.level ? 0.55 : 0.12;
	this.level += (boosted - this.level) * levelAttack;
	rootStyle.setProperty("--ai-audio-level", this.level.toFixed(3));

	const now = performance.now();
	if (rms > AiLevelMonitor.SILENCE_THRESHOLD) {
	this.lastActiveTs = now;
	} else if (this.silenceWait) {
	const quietFor = now - this.lastActiveTs;
	if (quietFor >= this.silenceWait.quietMs) {
	const { cb, maxWaitTimer } = this.silenceWait;
	this.silenceWait = null;
	if (maxWaitTimer !== null) clearTimeout(maxWaitTimer);
	try {
	cb();
	} catch (err) {
	console.warn("[ai-level] silence callback threw:", err);
	}
	}
	}
	this.raf = requestAnimationFrame(tick);
	};
	this.raf = requestAnimationFrame(tick);
	}

	stop(): void {
	cancelAnimationFrame(this.raf);
	this.raf = 0;
	this.cancelSilenceWait();
	try {
	this.source?.disconnect();
	this.analyser?.disconnect();
	this.ctx?.close();
	} catch {
	/* swallow */
	}
	this.ctx = null;
	this.source = null;
	this.analyser = null;
	this.timeBuf = null;
	this.level = 0;
	document.documentElement.style.setProperty("--ai-audio-level", "0");
	}

	resumeAudio(): void {
	const ctx = this.ctx;
	if (!ctx \|\| ctx.state !== "suspended") return;
	ctx.resume().catch((err) => {
	console.warn("[ai-level] audioCtx resume failed:", err);
	});
	}

	waitForSilence(quietMs: number, cb: () => void, maxWaitMs = 8000): void {
	this.cancelSilenceWait();
	const maxWaitTimer = window.setTimeout(() => {
	if (this.silenceWait?.cb === cb) {
	this.silenceWait = null;
	try {
	cb();
	} catch (err) {
	console.warn("[ai-level] max-wait callback threw:", err);
	}
	}
	}, maxWaitMs);
	this.silenceWait = { quietMs, cb, maxWaitTimer };
	}

	cancelSilenceWait(): void {
	if (!this.silenceWait) return;
	if (this.silenceWait.maxWaitTimer !== null) {
	clearTimeout(this.silenceWait.maxWaitTimer);
	}
	this.silenceWait = null;
	}
	}

	function startAiLevelMonitor(track: MediaStreamTrack): void {
	aiLevel ??= new AiLevelMonitor();
	aiLevel.start(track);
	}

	function stopAiLevelMonitor(): void {
	aiLevel?.stop();
	}

	// ─── Background-tab resilience ──────────────────────────────────────────

	async function acquireWakeLock(): Promise<void> {
	if (wakeLockUnavailable) return;
	const anyNav = navigator as Navigator & {
	wakeLock?: {
	request(type: "screen"): Promise<{ release(): Promise<void> }>;
	};
	};
	if (!anyNav.wakeLock) {
	wakeLockUnavailable = true;
	return;
	}
	if (wakeLock) return;
	try {
	wakeLock = await anyNav.wakeLock.request("screen");
	} catch (err) {
	const name = (err as { name?: string } \| null)?.name;
	if (name === "NotAllowedError" \|\| name === "SecurityError") {
	wakeLockUnavailable = true;
	} else {
	console.warn("[main] wakeLock.request failed:", err);
	}
	wakeLock = null;
	}
	}

	async function releaseWakeLock(): Promise<void> {
	try {
	await wakeLock?.release();
	} catch {
	/* swallow */
	}
	wakeLock = null;
	}

	function resumeAudioContexts(): void {
	wobbler?.resumeAudio();
	micLevel?.resumeAudio();
	aiLevel?.resumeAudio();
	}

	document.addEventListener("visibilitychange", () => {
	if (document.hidden) return;
	if (LIVE_STATES.has(currentState)) {
	void acquireWakeLock();
	resumeAudioContexts();
	void probeRobotLink();
	}
	});

	// ─── Robot data-channel health ─────────────────────────────────────────

	let consecutiveSendFailures = 0;
	function recordSend(ok: boolean, where: string): void {
	if (ok) {
	consecutiveSendFailures = 0;
	return;
	}
	consecutiveSendFailures += 1;
	if (consecutiveSendFailures === 1 \|\| consecutiveSendFailures % 20 === 0) {
	console.warn(
	`[main] robot send failed (${where}), ${consecutiveSendFailures} consecutive failures`,
	);
	}
	if (consecutiveSendFailures >= 40) {
	onFatalError(
	new Error(
	"Lost the robot data channel (no commands acknowledged). Tap the circle to reconnect.",
	),
	);
	}
	}

	async function probeRobotLink(): Promise<void> {
	if (!robot) return;
	const ok = robot.setAntennasDeg(0, 0);
	if (!ok) {
	onFatalError(
	new Error(
	"Lost the robot data channel while the tab was hidden. Tap the circle to reconnect.",
	),
	);
	}
	}

	async function teardownConversation(): Promise<void> {
	if (toolPoseRestoreTimer !== null) {
	clearTimeout(toolPoseRestoreTimer);
	toolPoseRestoreTimer = null;
	}
	movePlayer?.stop();
	movePlaying = false;
	openaiReconnecting = false;
	openaiReconnectAttempts = 0;

	try {
	await openai?.close();
	} catch {
	/* swallow */
	}
	openai = null;

	stopWobbler();
	stopAntennas();
	stopMicLevelMonitor();
	stopAiLevelMonitor();
	void releaseWakeLock();

	if (openaiSink) {
	openaiSink.srcObject = null;
	openaiSink.remove();
	openaiSink = null;
	}
	}

	async function onFatalError(err: unknown): Promise<void> {
	const message = err instanceof Error ? err.message : String(err);
	console.error("[main] error:", err);
	setState("error");
	circleCaption.title = message;
	await teardownConversation();
	}

	// ─── Boot ───────────────────────────────────────────────────────────────

	async function boot(): Promise<void> {
	hostHandle = await connectToHost();
	robot = hostHandle.reachy;

	// Tune the XVF3800 audio board for conversation. Mirrors the Python
	// `apply_audio_startup_config()` call right after `start_recording()`
	// / `start_playing()` in `reachy_mini_conversation_app/console.py`:
	// by the time `connectToHost()` resolves the host's bridge has
	// already negotiated WebRTC audio in both directions and the
	// daemon's audio pipeline is hot, so the batched parameter write
	// lands on a settled board.
	//
	// Fire-and-forget: a failure (older SDK without `applyAudioConfig`,
	// missing audio board on a Lite running off-robot, DataChannel
	// burp) is non-fatal - the helper logs and we keep going with the
	// daemon's default tuning. The `void` makes the lack of await
	// explicit so we don't block the UI reveal on a multi-parameter
	// verify roundtrip (~100 ms × N).
	void applyAudioStartupConfig(robot);

	// Reveal the UI now that the SDK is ready.
	appRoot.classList.remove("hidden");
	document.body.classList.remove("booting");

	hostHandle.onLeave(async () => {
	await teardownConversation();
	micMuted = false;
	micBtn.classList.remove("muted");
	});

	setState("idle");
	}

	void boot().catch((err) => {
	console.error("[minimal-conversation/embed] bootstrap failed", err);
	try {
	window.parent.postMessage(
	{
	source: "reachy-mini",
	type: "embed:error",
	version: 1,
	message: err instanceof Error ? err.message : String(err),
	fatal: true,
	},
	window.location.origin,
	);
	} catch {
	/* swallow */
	}
	});