import { useCallback, useRef } from 'react'; import { realtimeVoiceWebsocketUrl } from '../api.js'; import { handleRealtimeVoiceEvent } from './voice-realtime-events.js'; import { startRealtimeMicrophone } from './voice-realtime-microphone.js'; import { playRealtimeAudioDelta, stopRealtimePlayback } from './voice-realtime-playback.js'; const REALTIME_CLOSE_MESSAGES = { mac_offline: 'Mac 连接器未在线', mac_reconnected: 'Mac 连接已刷新,请重试', relay_realtime_frame_too_large: '实时语音数据过大', relay_realtime_backpressure: '实时语音连接拥塞,请稍后重试', relay_realtime_frame_invalid: '实时语音数据格式异常', relay_realtime_closed: '实时语音连接已关闭' }; export function realtimeCloseMessage(event) { const reason = String(event?.reason || '').trim(); if (!reason || reason === 'browser_closed' || reason === 'fixture_realtime_done') { return ''; } return REALTIME_CLOSE_MESSAGES[reason] || reason; } export function useRealtimeVoiceDialog(common) { const socketRef = useRef(null); const streamRef = useRef(null); const audioContextRef = useRef(null); const audioSourceRef = useRef(null); const processorRef = useRef(null); const playbackContextRef = useRef(null); const playbackSourcesRef = useRef(new Set()); const playheadRef = useRef(0); const assistantTextRef = useRef(''); const speechStartedRef = useRef(false); const turnStartedAtRef = useRef(0); const lastSoundAtRef = useRef(0); const awaitingResponseRef = useRef(false); const bargeInStartedAtRef = useRef(0); const suppressAssistantAudioRef = useRef(false); const runtimeContextRef = useRef(null); const appendIdeaTranscript = useCallback((transcript) => { const text = String(transcript || '').replace(/\s+/g, ' ').trim(); if (!text) { return; } const buffer = common.ideaBufferRef.current; if (buffer[buffer.length - 1] === text) { return; } buffer.push(text); if (buffer.length > 30) { buffer.splice(0, buffer.length - 30); } }, [common.ideaBufferRef]); const stopPlayback = useCallback((options = {}) => { stopRealtimePlayback({ playbackSourcesRef, playbackContextRef, playheadRef }, options); }, []); const playAudioDelta = useCallback((delta) => { playRealtimeAudioDelta({ ...common, playbackSourcesRef, playbackContextRef, playheadRef }, delta); }, [common]); const stopRealtime = useCallback(({ keepPanel = false } = {}) => { const socket = socketRef.current; socketRef.current = null; if (socket) { socket.onopen = null; socket.onmessage = null; socket.onerror = null; socket.onclose = null; try { socket.send(JSON.stringify({ type: 'close' })); } catch { // Socket may already be closed. } try { socket.close(); } catch { // Socket may already be closed. } } processorRef.current?.disconnect?.(); processorRef.current = null; audioSourceRef.current?.disconnect?.(); audioSourceRef.current = null; streamRef.current?.getTracks?.().forEach((track) => track.stop()); streamRef.current = null; const context = audioContextRef.current; audioContextRef.current = null; if (context && context.state !== 'closed') { context.close?.().catch?.(() => null); } assistantTextRef.current = ''; speechStartedRef.current = false; turnStartedAtRef.current = 0; lastSoundAtRef.current = 0; awaitingResponseRef.current = false; bargeInStartedAtRef.current = 0; suppressAssistantAudioRef.current = false; stopPlayback({ release: true }); if (!keepPanel) { common.realtimeRef.current = false; } }, [common, stopPlayback]); const requestHandoffSummary = useCallback((triggerText = '') => { const socket = socketRef.current; const transcripts = common.ideaBufferRef.current.filter(Boolean); if (!transcripts.length) { common.setErrorBriefly('还没有可整理的语音内容'); return; } if (!socket || socket.readyState !== WebSocket.OPEN) { common.setErrorBriefly('实时语音连接不可用'); return; } stopPlayback(); suppressAssistantAudioRef.current = true; awaitingResponseRef.current = false; bargeInStartedAtRef.current = 0; assistantTextRef.current = ''; common.setAssistantText(''); common.setHandoffDraft(''); common.setError(''); common.setMode('summarizing'); try { socket.send(JSON.stringify({ type: 'voice.handoff.summarize', transcripts, trigger: triggerText })); } catch { suppressAssistantAudioRef.current = false; common.setErrorBriefly('实时语音连接不可用'); } }, [common, stopPlayback]); const resumeAssistantAudio = useCallback(() => { suppressAssistantAudioRef.current = false; }, []); const runtimeContext = { ...common, socketRef, streamRef, audioContextRef, audioSourceRef, processorRef, playbackContextRef, playbackSourcesRef, playheadRef, assistantTextRef, speechStartedRef, turnStartedAtRef, lastSoundAtRef, awaitingResponseRef, bargeInStartedAtRef, suppressAssistantAudioRef, appendIdeaTranscript, requestHandoffSummary, stopPlayback, playAudioDelta, stopRealtime, startMicrophone: (socket) => startRealtimeMicrophone(runtimeContextRef.current, socket) }; runtimeContextRef.current = runtimeContext; const startRealtime = useCallback(() => { if (!common.status.voiceRealtime?.configured) { common.setErrorBriefly('未配置实时语音'); return; } if (socketRef.current) { return; } common.clearRecordedAudio(); stopRealtime({ keepPanel: true }); common.realtimeRef.current = true; assistantTextRef.current = ''; common.setError(''); common.setTranscript(''); common.setAssistantText(''); common.setMode('waiting'); const socket = new WebSocket(realtimeVoiceWebsocketUrl()); socketRef.current = socket; socket.onopen = () => { common.setMode('waiting'); }; socket.onmessage = (event) => { try { handleRealtimeVoiceEvent(runtimeContextRef.current, JSON.parse(event.data)); } catch { // Ignore malformed proxy events. } }; socket.onerror = () => { common.setErrorBriefly('实时语音连接失败'); stopRealtime({ keepPanel: true }); }; socket.onclose = (event) => { if (common.openRef.current && common.realtimeRef.current) { const message = realtimeCloseMessage(event); stopRealtime({ keepPanel: true }); if (message) { common.setErrorBriefly(message); } common.setMode('idle'); } }; }, [common, stopRealtime]); return { startRealtime, stopRealtime, stopPlayback, requestHandoffSummary, resumeAssistantAudio }; }