codexmobile-relay / client /src /hooks /useMessageSpeech.js
Codex
deploy: CodexMobile Relay
90f0300
Raw
History Blame Contribute Delete
8.68 kB
import { useCallback, useEffect, useRef, useState } from 'react';
import { apiBlobFetch } from '../api.js';
import { spokenReplyText, splitSpeechSegments } from '../app-voice-utils.js';
const DEFAULT_SPEECH_LANG = 'zh-CN';
const DEFAULT_SPEECH_RATE = 1;
const DEFAULT_SPEECH_PITCH = 1;
function ensureMessageSpeechAudio(audioRef) {
if (!audioRef.current) {
const audio = new Audio();
audio.preload = 'auto';
audio.playsInline = true;
audioRef.current = audio;
}
return audioRef.current;
}
function clearMessageSpeechAudio(ctx, { release = false, abortRequest = false } = {}) {
if (abortRequest) {
ctx.abortRef.current?.abort?.();
ctx.abortRef.current = null;
}
ctx.stopPlaybackRef.current?.();
const audio = ctx.audioRef.current;
if (audio) {
audio.pause();
audio.onended = null;
audio.onerror = null;
audio.removeAttribute('src');
audio.load?.();
if (release) {
ctx.audioRef.current = null;
}
}
if (ctx.audioUrlRef.current) {
URL.revokeObjectURL(ctx.audioUrlRef.current);
ctx.audioUrlRef.current = '';
}
window.speechSynthesis?.cancel?.();
}
function stopMessageSpeech(ctx, { release = false, resetState = true } = {}) {
ctx.runRef.current += 1;
clearMessageSpeechAudio(ctx, { release, abortRequest: true });
if (resetState) {
ctx.setSpeakingMessageId('');
ctx.setSpeechLoadingMessageId('');
}
}
function speakWithBrowser(text, stopPlaybackRef, options = {}) {
return new Promise((resolve, reject) => {
if (!window.speechSynthesis || !window.SpeechSynthesisUtterance) {
reject(new Error('Browser speech synthesis is not supported'));
return;
}
const utterance = new SpeechSynthesisUtterance(text);
let settled = false;
const finish = (error) => {
if (settled) {
return;
}
settled = true;
if (stopPlaybackRef.current === stopBrowserSpeech) {
stopPlaybackRef.current = null;
}
utterance.onend = null;
utterance.onerror = null;
error ? reject(error) : resolve();
};
function stopBrowserSpeech() {
window.speechSynthesis.cancel();
finish();
}
utterance.lang = options.lang || DEFAULT_SPEECH_LANG;
utterance.rate = Number.isFinite(options.rate) ? options.rate : DEFAULT_SPEECH_RATE;
utterance.pitch = Number.isFinite(options.pitch) ? options.pitch : DEFAULT_SPEECH_PITCH;
utterance.onend = () => finish();
utterance.onerror = () => finish(new Error('Browser speech synthesis failed'));
stopPlaybackRef.current = stopBrowserSpeech;
window.speechSynthesis.cancel();
window.speechSynthesis.speak(utterance);
});
}
function playMessageAudioBlob(ctx, blob) {
return new Promise((resolve, reject) => {
const url = URL.createObjectURL(blob);
const audio = ensureMessageSpeechAudio(ctx.audioRef);
let settled = false;
const finish = (error) => {
if (settled) {
return;
}
settled = true;
if (ctx.stopPlaybackRef.current === finish) {
ctx.stopPlaybackRef.current = null;
}
audio.onended = null;
audio.onerror = null;
error ? reject(error) : resolve();
};
clearMessageSpeechAudio(ctx);
ctx.audioUrlRef.current = url;
ctx.stopPlaybackRef.current = finish;
audio.muted = false;
audio.src = url;
audio.playsInline = true;
audio.onended = () => finish();
audio.onerror = () => finish(new Error('Message speech playback failed'));
audio.load?.();
audio.play().catch(finish);
});
}
async function fetchMessageSpeechBlob(ctx, text, runId) {
if (ctx.runRef.current !== runId) {
throw new Error('Message speech was stopped');
}
const controller = new AbortController();
ctx.abortRef.current = controller;
try {
return await apiBlobFetch('/api/voice/speech', {
method: 'POST',
body: { text },
signal: controller.signal
});
} finally {
if (ctx.abortRef.current === controller) {
ctx.abortRef.current = null;
}
}
}
function messageSpeechProviderError(error, fallbackStartIndex) {
const wrappedError = new Error(error?.message || String(error || 'Message speech failed'));
wrappedError.name = error?.name || 'MessageSpeechProviderError';
wrappedError.fallbackStartIndex = fallbackStartIndex;
return wrappedError;
}
async function playProviderSegments(ctx, { messageId, segments, runId }) {
let blob = await fetchMessageSpeechBlob(ctx, segments[0], runId);
if (ctx.runRef.current !== runId) {
return { stopped: true, fallbackStartIndex: 0 };
}
ctx.setSpeechLoadingMessageId('');
ctx.setSpeakingMessageId(messageId);
let fallbackStartIndex = 0;
try {
for (let index = 0; index < segments.length; index += 1) {
fallbackStartIndex = index;
const nextIndex = index + 1;
const nextBlobPromise = nextIndex < segments.length
? fetchMessageSpeechBlob(ctx, segments[nextIndex], runId)
.then((nextBlob) => ({ blob: nextBlob }))
.catch((error) => ({ error }))
: null;
await playMessageAudioBlob(ctx, blob);
fallbackStartIndex = index + 1;
if (ctx.runRef.current !== runId || !nextBlobPromise) {
break;
}
ctx.setSpeechLoadingMessageId(messageId);
const next = await nextBlobPromise;
if (next.error) {
throw next.error;
}
blob = next.blob;
ctx.setSpeechLoadingMessageId('');
ctx.setSpeakingMessageId(messageId);
}
} catch (error) {
throw messageSpeechProviderError(error, fallbackStartIndex);
}
return { stopped: ctx.runRef.current !== runId, fallbackStartIndex };
}
async function fallbackMessageSpeech(ctx, { messageId, segments, fallbackStartIndex, runId }) {
const startIndex = Math.max(0, Number(fallbackStartIndex) || 0);
const fallbackText = segments.slice(startIndex).join(' ');
try {
ctx.setSpeechLoadingMessageId('');
ctx.setSpeakingMessageId(messageId);
if (fallbackText) {
await speakWithBrowser(fallbackText, ctx.stopPlaybackRef);
}
} catch (fallbackError) {
if (ctx.runRef.current === runId) {
console.warn('[voice] browser message speech failed:', fallbackError.message || fallbackError);
}
}
}
async function speakMessageFromContext(ctx, message) {
const messageId = String(message?.id || '');
if (!messageId || message?.role !== 'assistant') {
return;
}
if (ctx.speakingMessageId === messageId || ctx.speechLoadingMessageId === messageId) {
stopMessageSpeech(ctx);
return;
}
const segments = splitSpeechSegments(spokenReplyText(message.content));
if (!segments.length) {
return;
}
ctx.runRef.current += 1;
const runId = ctx.runRef.current;
clearMessageSpeechAudio(ctx, { abortRequest: true });
ctx.setSpeechLoadingMessageId(messageId);
ctx.setSpeakingMessageId('');
try {
const result = await playProviderSegments(ctx, { messageId, segments, runId });
if (result.stopped) {
return;
}
} catch (error) {
if (error?.name === 'AbortError' || ctx.runRef.current !== runId) {
return;
}
console.warn('[voice] message speech failed, using browser fallback:', error.message || error);
await fallbackMessageSpeech(ctx, { messageId, segments, runId, fallbackStartIndex: error?.fallbackStartIndex });
} finally {
if (ctx.runRef.current === runId) {
clearMessageSpeechAudio(ctx);
ctx.setSpeakingMessageId('');
ctx.setSpeechLoadingMessageId('');
}
}
}
export function useMessageSpeech(selectedSessionId) {
const [speakingMessageId, setSpeakingMessageId] = useState('');
const [speechLoadingMessageId, setSpeechLoadingMessageId] = useState('');
const ctxRef = useRef({});
ctxRef.current = {
speakingMessageId,
speechLoadingMessageId,
setSpeakingMessageId,
setSpeechLoadingMessageId,
audioRef: ctxRef.current.audioRef || { current: null },
audioUrlRef: ctxRef.current.audioUrlRef || { current: '' },
abortRef: ctxRef.current.abortRef || { current: null },
stopPlaybackRef: ctxRef.current.stopPlaybackRef || { current: null },
runRef: ctxRef.current.runRef || { current: 0 }
};
const stopSpeech = useCallback((options) => {
stopMessageSpeech(ctxRef.current, options);
}, []);
const speakMessage = useCallback((message) => {
speakMessageFromContext(ctxRef.current, message);
}, []);
useEffect(() => () => stopSpeech({ release: true, resetState: false }), [stopSpeech]);
useEffect(() => {
stopSpeech();
}, [selectedSessionId, stopSpeech]);
return {
speakingMessageId,
speechLoadingMessageId,
speakMessage,
stopSpeech
};
}