import React, { useState, useRef, useEffect } from 'react'; import { Mic, Loader2, Bot, Volume2, Radio, RefreshCw, ChevronDown, Phone, PhoneOff } from 'lucide-react'; import { api } from '../services/api'; // --- Audio Types & Helpers --- const TARGET_SAMPLE_RATE = 16000; const OUTPUT_SAMPLE_RATE = 24000; function base64ToUint8Array(base64: string) { const binaryString = atob(base64); const len = binaryString.length; const bytes = new Uint8Array(len); for (let i = 0; i < len; i++) { bytes[i] = binaryString.charCodeAt(i); } return bytes; } // Downsampling: Force input to 16kHz for backend compatibility function downsampleBuffer(buffer: Float32Array, inputRate: number, outputRate: number) { if (outputRate === inputRate) { return buffer; } const compression = inputRate / outputRate; const length = Math.ceil(buffer.length / compression); const result = new Float32Array(length); let index = 0; let inputIndex = 0; while (index < length) { const intIndex = Math.floor(inputIndex); result[index] = buffer[intIndex] || 0; index++; inputIndex += compression; } return result; } export const LiveAssistant: React.FC = () => { const [isOpen, setIsOpen] = useState(false); const [status, setStatus] = useState<'DISCONNECTED' | 'CONNECTING' | 'CONNECTED' | 'LISTENING' | 'THINKING' | 'SPEAKING'>('DISCONNECTED'); const [transcript, setTranscript] = useState(''); const [volumeLevel, setVolumeLevel] = useState(0); // Dragging State const [position, setPosition] = useState<{x: number, y: number} | null>(null); const containerRef = useRef(null); const dragRef = useRef({ isDragging: false, startX: 0, startY: 0, initialLeft: 0, initialTop: 0 }); const hasMovedRef = useRef(false); const prevButtonPos = useRef<{x: number, y: number} | null>(null); // Audio Refs const audioContextRef = useRef(null); // Output Context const inputAudioContextRef = useRef(null); // Input Context const mediaStreamRef = useRef(null); const processorRef = useRef(null); const sourceNodeRef = useRef(null); const wsRef = useRef(null); const nextPlayTimeRef = useRef(0); const analyserRef = useRef(null); const volumeIntervalRef = useRef(null); // State Refs for async safety const isRecordingRef = useRef(false); useEffect(() => { if (!isOpen) { handleDisconnect(); } // Boundary check on open if (position && containerRef.current) { const { innerWidth, innerHeight } = window; const rect = containerRef.current.getBoundingClientRect(); const newX = Math.min(Math.max(0, position.x), innerWidth - rect.width); const newY = Math.min(Math.max(0, position.y), innerHeight - rect.height); if (newX !== position.x || newY !== position.y) { setPosition({ x: newX, y: newY }); } } return () => { handleDisconnect(); }; }, [isOpen]); // Drag Logic useEffect(() => { const handleMove = (e: MouseEvent | TouchEvent) => { if (!dragRef.current.isDragging) return; const clientX = 'touches' in e ? e.touches[0].clientX : (e as MouseEvent).clientX; const clientY = 'touches' in e ? e.touches[0].clientY : (e as MouseEvent).clientY; const deltaX = clientX - dragRef.current.startX; const deltaY = clientY - dragRef.current.startY; if (Math.abs(deltaX) > 5 || Math.abs(deltaY) > 5) { hasMovedRef.current = true; } let newX = dragRef.current.initialLeft + deltaX; let newY = dragRef.current.initialTop + deltaY; // Bounds check if (containerRef.current) { const rect = containerRef.current.getBoundingClientRect(); const { innerWidth, innerHeight } = window; newX = Math.min(Math.max(0, newX), innerWidth - rect.width); newY = Math.min(Math.max(0, newY), innerHeight - rect.height); } setPosition({ x: newX, y: newY }); }; const handleUp = () => { dragRef.current.isDragging = false; document.body.style.userSelect = ''; }; window.addEventListener('mousemove', handleMove); window.addEventListener('mouseup', handleUp); window.addEventListener('touchmove', handleMove, { passive: false }); window.addEventListener('touchend', handleUp); return () => { window.removeEventListener('mousemove', handleMove); window.removeEventListener('mouseup', handleUp); window.removeEventListener('touchmove', handleMove); window.removeEventListener('touchend', handleUp); }; }, []); // Volume Visualizer useEffect(() => { if (status === 'DISCONNECTED') { setVolumeLevel(0); return; } volumeIntervalRef.current = setInterval(() => { if (analyserRef.current) { const array = new Uint8Array(analyserRef.current.frequencyBinCount); analyserRef.current.getByteFrequencyData(array); const avg = array.reduce((a,b)=>a+b) / array.length; setVolumeLevel(Math.min(100, avg * 1.5)); } }, 100); return () => clearInterval(volumeIntervalRef.current); }, [status]); const handleDragStart = (e: React.MouseEvent | React.TouchEvent) => { if (!containerRef.current) return; const clientX = 'touches' in e ? e.touches[0].clientX : (e as React.MouseEvent).clientX; const clientY = 'touches' in e ? e.touches[0].clientY : (e as React.MouseEvent).clientY; const rect = containerRef.current.getBoundingClientRect(); if (!position) { setPosition({ x: rect.left, y: rect.top }); dragRef.current = { isDragging: true, startX: clientX, startY: clientY, initialLeft: rect.left, initialTop: rect.top }; } else { dragRef.current = { isDragging: true, startX: clientX, startY: clientY, initialLeft: position.x, initialTop: position.y }; } hasMovedRef.current = false; document.body.style.userSelect = 'none'; }; const handleToggleOpen = () => { if (!hasMovedRef.current) { if (!isOpen) { // Opening: Save current position as "button position" if (position) prevButtonPos.current = position; setIsOpen(true); } else { setIsOpen(false); } } }; const handleMinimize = () => { setIsOpen(false); if (prevButtonPos.current) { setPosition(prevButtonPos.current); } }; const initOutputAudioContext = () => { if (!audioContextRef.current) { // @ts-ignore const AudioCtor = window.AudioContext || window.webkitAudioContext; const ctx = new AudioCtor({ sampleRate: OUTPUT_SAMPLE_RATE }); const analyser = ctx.createAnalyser(); analyser.fftSize = 64; audioContextRef.current = ctx; analyserRef.current = analyser; } if (audioContextRef.current.state === 'suspended') { audioContextRef.current.resume(); } }; const handleConnect = async () => { const user = api.auth.getCurrentUser(); if (!user) return; setStatus('CONNECTING'); setTranscript('正在呼叫 AI 助理...'); try { initOutputAudioContext(); const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; const wsUrl = `${protocol}//${window.location.host}/ws/live?userId=${user._id}&username=${encodeURIComponent(user.username)}`; console.log("Connecting to", wsUrl); const ws = new WebSocket(wsUrl); wsRef.current = ws; ws.onopen = async () => { console.log('WS Open'); setStatus('CONNECTED'); setTranscript('通话已接通'); // Automatically start recording once connected (simulate phone call behavior) await startRecording(); }; ws.onmessage = async (event) => { try { const msg = JSON.parse(event.data); handleServerMessage(msg); } catch (e) { console.error("Parse error", e); } }; ws.onclose = () => { console.log('WS Close'); handleDisconnect(); }; ws.onerror = (e) => { console.error('WS Error', e); setTranscript('连接中断'); handleDisconnect(); }; } catch (e) { console.error("Connect failed", e); setStatus('DISCONNECTED'); setTranscript('呼叫失败'); } }; const handleServerMessage = async (msg: any) => { if (msg.type === 'audio' && msg.data && audioContextRef.current) { setStatus('SPEAKING'); const ctx = audioContextRef.current; const bytes = base64ToUint8Array(msg.data); const int16 = new Int16Array(bytes.buffer); const float32 = new Float32Array(int16.length); for(let i=0; i { if (ctx.currentTime >= nextPlayTimeRef.current - 0.1) { setStatus('LISTENING'); } }; } if (msg.type === 'text' && msg.content) { setTranscript(msg.content); } if (msg.type === 'turnComplete') { setStatus('THINKING'); } if (msg.type === 'error') { setTranscript(`错误: ${msg.message}`); } }; const startRecording = async () => { if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) return; try { isRecordingRef.current = true; // 1. Get Stream const stream = await navigator.mediaDevices.getUserMedia({ audio: { channelCount: 1, echoCancellation: true, autoGainControl: true, noiseSuppression: true } }); if (!isRecordingRef.current) { stream.getTracks().forEach(t => t.stop()); return; } mediaStreamRef.current = stream; // 2. Create Input Context // @ts-ignore const AudioCtor = window.AudioContext || window.webkitAudioContext; const ctx = new AudioCtor(); inputAudioContextRef.current = ctx; await ctx.resume(); const source = ctx.createMediaStreamSource(stream); const processor = ctx.createScriptProcessor(4096, 1, 1); // Mute gain const muteGain = ctx.createGain(); muteGain.gain.value = 0; source.connect(processor); processor.connect(muteGain); muteGain.connect(ctx.destination); const contextSampleRate = ctx.sampleRate; processor.onaudioprocess = (e) => { if (!isRecordingRef.current) return; const inputData = e.inputBuffer.getChannelData(0); // 3. Downsample to 16000Hz for API compatibility const downsampledData = downsampleBuffer(inputData, contextSampleRate, TARGET_SAMPLE_RATE); // 4. Convert to PCM16 const l = downsampledData.length; const int16Data = new Int16Array(l); for (let i = 0; i < l; i++) { let s = Math.max(-1, Math.min(1, downsampledData[i])); int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7FFF; } // 5. Send let binary = ''; const bytes = new Uint8Array(int16Data.buffer); const len = bytes.byteLength; for (let i = 0; i < len; i++) { binary += String.fromCharCode(bytes[i]); } const b64 = btoa(binary); if (wsRef.current && wsRef.current.readyState === WebSocket.OPEN) { wsRef.current.send(JSON.stringify({ type: 'audio', data: b64 })); } }; sourceNodeRef.current = source; processorRef.current = processor; setStatus('LISTENING'); // Don't set transcript here, keep "Connected" message until AI speaks or user status changes } catch (e) { console.error(e); isRecordingRef.current = false; setTranscript('麦克风访问失败'); } }; const stopRecording = () => { isRecordingRef.current = false; if (processorRef.current) { processorRef.current.disconnect(); processorRef.current = null; } if (sourceNodeRef.current) { sourceNodeRef.current.disconnect(); sourceNodeRef.current = null; } if (mediaStreamRef.current) { mediaStreamRef.current.getTracks().forEach(t => t.stop()); mediaStreamRef.current = null; } if (inputAudioContextRef.current) { inputAudioContextRef.current.close().catch(()=>{}); inputAudioContextRef.current = null; } }; const handleDisconnect = () => { if (wsRef.current) { wsRef.current.close(); wsRef.current = null; } if (audioContextRef.current) { audioContextRef.current.close().catch(()=>{}); audioContextRef.current = null; } stopRecording(); setStatus('DISCONNECTED'); setTranscript(''); nextPlayTimeRef.current = 0; }; if (!api.auth.getCurrentUser()) return null; return (
{!isOpen && (
)} {isOpen && (
{/* Header */}
AI 实时通话
{status === 'DISCONNECTED' && ( )}
{/* Main Visual */}
{/* Pulse Effect */}
{/* Ripple 1 */}
{/* Ripple 2 */}
{/* Center Icon */}
{status === 'SPEAKING' ? : status === 'LISTENING' ? : status === 'THINKING' ? : status === 'CONNECTED' ? : }

{status === 'DISCONNECTED' ? '未连接' : status === 'CONNECTING' ? '呼叫中...' : status === 'CONNECTED' ? '通话建立' : status === 'LISTENING' ? '正在聆听...' : status === 'THINKING' ? '思考中...' : '正在说话'}

{transcript}

{/* Controls */}
{status === 'DISCONNECTED' ? ( ) : (
挂断
)}
)}
); };