Spaces:
Sleeping
Sleeping
| import React, { useState, useRef, useEffect } from 'react'; | |
| import { Mic, Loader2, Bot, Volume2, Radio, RefreshCw, ChevronDown, Phone, PhoneOff } from 'lucide-react'; | |
| import { api } from '../services/api'; | |
| // --- Audio Types & Helpers --- | |
| const TARGET_SAMPLE_RATE = 16000; | |
| const OUTPUT_SAMPLE_RATE = 24000; | |
| function base64ToUint8Array(base64: string) { | |
| const binaryString = atob(base64); | |
| const len = binaryString.length; | |
| const bytes = new Uint8Array(len); | |
| for (let i = 0; i < len; i++) { | |
| bytes[i] = binaryString.charCodeAt(i); | |
| } | |
| return bytes; | |
| } | |
| // Downsampling: Force input to 16kHz for backend compatibility | |
| function downsampleBuffer(buffer: Float32Array, inputRate: number, outputRate: number) { | |
| if (outputRate === inputRate) { | |
| return buffer; | |
| } | |
| const compression = inputRate / outputRate; | |
| const length = Math.ceil(buffer.length / compression); | |
| const result = new Float32Array(length); | |
| let index = 0; | |
| let inputIndex = 0; | |
| while (index < length) { | |
| const intIndex = Math.floor(inputIndex); | |
| result[index] = buffer[intIndex] || 0; | |
| index++; | |
| inputIndex += compression; | |
| } | |
| return result; | |
| } | |
| export const LiveAssistant: React.FC = () => { | |
| const [isOpen, setIsOpen] = useState(false); | |
| const [status, setStatus] = useState<'DISCONNECTED' | 'CONNECTING' | 'CONNECTED' | 'LISTENING' | 'THINKING' | 'SPEAKING'>('DISCONNECTED'); | |
| const [transcript, setTranscript] = useState(''); | |
| const [volumeLevel, setVolumeLevel] = useState(0); | |
| // Dragging State | |
| const [position, setPosition] = useState<{x: number, y: number} | null>(null); | |
| const containerRef = useRef<HTMLDivElement>(null); | |
| const dragRef = useRef({ isDragging: false, startX: 0, startY: 0, initialLeft: 0, initialTop: 0 }); | |
| const hasMovedRef = useRef(false); | |
| const prevButtonPos = useRef<{x: number, y: number} | null>(null); | |
| // Audio Refs | |
| const audioContextRef = useRef<AudioContext | null>(null); // Output Context | |
| const inputAudioContextRef = useRef<AudioContext | null>(null); // Input Context | |
| const mediaStreamRef = useRef<MediaStream | null>(null); | |
| const processorRef = useRef<ScriptProcessorNode | null>(null); | |
| const sourceNodeRef = useRef<MediaStreamAudioSourceNode | null>(null); | |
| const wsRef = useRef<WebSocket | null>(null); | |
| const nextPlayTimeRef = useRef<number>(0); | |
| const analyserRef = useRef<AnalyserNode | null>(null); | |
| const volumeIntervalRef = useRef<any>(null); | |
| // State Refs for async safety | |
| const isRecordingRef = useRef(false); | |
| useEffect(() => { | |
| if (!isOpen) { | |
| handleDisconnect(); | |
| } | |
| // Boundary check on open | |
| if (position && containerRef.current) { | |
| const { innerWidth, innerHeight } = window; | |
| const rect = containerRef.current.getBoundingClientRect(); | |
| const newX = Math.min(Math.max(0, position.x), innerWidth - rect.width); | |
| const newY = Math.min(Math.max(0, position.y), innerHeight - rect.height); | |
| if (newX !== position.x || newY !== position.y) { | |
| setPosition({ x: newX, y: newY }); | |
| } | |
| } | |
| return () => { | |
| handleDisconnect(); | |
| }; | |
| }, [isOpen]); | |
| // Drag Logic | |
| useEffect(() => { | |
| const handleMove = (e: MouseEvent | TouchEvent) => { | |
| if (!dragRef.current.isDragging) return; | |
| const clientX = 'touches' in e ? e.touches[0].clientX : (e as MouseEvent).clientX; | |
| const clientY = 'touches' in e ? e.touches[0].clientY : (e as MouseEvent).clientY; | |
| const deltaX = clientX - dragRef.current.startX; | |
| const deltaY = clientY - dragRef.current.startY; | |
| if (Math.abs(deltaX) > 5 || Math.abs(deltaY) > 5) { | |
| hasMovedRef.current = true; | |
| } | |
| let newX = dragRef.current.initialLeft + deltaX; | |
| let newY = dragRef.current.initialTop + deltaY; | |
| // Bounds check | |
| if (containerRef.current) { | |
| const rect = containerRef.current.getBoundingClientRect(); | |
| const { innerWidth, innerHeight } = window; | |
| newX = Math.min(Math.max(0, newX), innerWidth - rect.width); | |
| newY = Math.min(Math.max(0, newY), innerHeight - rect.height); | |
| } | |
| setPosition({ x: newX, y: newY }); | |
| }; | |
| const handleUp = () => { | |
| dragRef.current.isDragging = false; | |
| document.body.style.userSelect = ''; | |
| }; | |
| window.addEventListener('mousemove', handleMove); | |
| window.addEventListener('mouseup', handleUp); | |
| window.addEventListener('touchmove', handleMove, { passive: false }); | |
| window.addEventListener('touchend', handleUp); | |
| return () => { | |
| window.removeEventListener('mousemove', handleMove); | |
| window.removeEventListener('mouseup', handleUp); | |
| window.removeEventListener('touchmove', handleMove); | |
| window.removeEventListener('touchend', handleUp); | |
| }; | |
| }, []); | |
| // Volume Visualizer | |
| useEffect(() => { | |
| if (status === 'DISCONNECTED') { | |
| setVolumeLevel(0); | |
| return; | |
| } | |
| volumeIntervalRef.current = setInterval(() => { | |
| if (analyserRef.current) { | |
| const array = new Uint8Array(analyserRef.current.frequencyBinCount); | |
| analyserRef.current.getByteFrequencyData(array); | |
| const avg = array.reduce((a,b)=>a+b) / array.length; | |
| setVolumeLevel(Math.min(100, avg * 1.5)); | |
| } | |
| }, 100); | |
| return () => clearInterval(volumeIntervalRef.current); | |
| }, [status]); | |
| const handleDragStart = (e: React.MouseEvent | React.TouchEvent) => { | |
| if (!containerRef.current) return; | |
| const clientX = 'touches' in e ? e.touches[0].clientX : (e as React.MouseEvent).clientX; | |
| const clientY = 'touches' in e ? e.touches[0].clientY : (e as React.MouseEvent).clientY; | |
| const rect = containerRef.current.getBoundingClientRect(); | |
| if (!position) { | |
| setPosition({ x: rect.left, y: rect.top }); | |
| dragRef.current = { isDragging: true, startX: clientX, startY: clientY, initialLeft: rect.left, initialTop: rect.top }; | |
| } else { | |
| dragRef.current = { isDragging: true, startX: clientX, startY: clientY, initialLeft: position.x, initialTop: position.y }; | |
| } | |
| hasMovedRef.current = false; | |
| document.body.style.userSelect = 'none'; | |
| }; | |
| const handleToggleOpen = () => { | |
| if (!hasMovedRef.current) { | |
| if (!isOpen) { | |
| // Opening: Save current position as "button position" | |
| if (position) prevButtonPos.current = position; | |
| setIsOpen(true); | |
| } else { | |
| setIsOpen(false); | |
| } | |
| } | |
| }; | |
| const handleMinimize = () => { | |
| setIsOpen(false); | |
| if (prevButtonPos.current) { | |
| setPosition(prevButtonPos.current); | |
| } | |
| }; | |
| const initOutputAudioContext = () => { | |
| if (!audioContextRef.current) { | |
| // @ts-ignore | |
| const AudioCtor = window.AudioContext || window.webkitAudioContext; | |
| const ctx = new AudioCtor({ sampleRate: OUTPUT_SAMPLE_RATE }); | |
| const analyser = ctx.createAnalyser(); | |
| analyser.fftSize = 64; | |
| audioContextRef.current = ctx; | |
| analyserRef.current = analyser; | |
| } | |
| if (audioContextRef.current.state === 'suspended') { | |
| audioContextRef.current.resume(); | |
| } | |
| }; | |
| const handleConnect = async () => { | |
| const user = api.auth.getCurrentUser(); | |
| if (!user) return; | |
| setStatus('CONNECTING'); | |
| setTranscript('正在呼叫 AI 助理...'); | |
| try { | |
| initOutputAudioContext(); | |
| const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; | |
| const wsUrl = `${protocol}//${window.location.host}/ws/live?userId=${user._id}&username=${encodeURIComponent(user.username)}`; | |
| console.log("Connecting to", wsUrl); | |
| const ws = new WebSocket(wsUrl); | |
| wsRef.current = ws; | |
| ws.onopen = async () => { | |
| console.log('WS Open'); | |
| setStatus('CONNECTED'); | |
| setTranscript('通话已接通'); | |
| // Automatically start recording once connected (simulate phone call behavior) | |
| await startRecording(); | |
| }; | |
| ws.onmessage = async (event) => { | |
| try { | |
| const msg = JSON.parse(event.data); | |
| handleServerMessage(msg); | |
| } catch (e) { | |
| console.error("Parse error", e); | |
| } | |
| }; | |
| ws.onclose = () => { | |
| console.log('WS Close'); | |
| handleDisconnect(); | |
| }; | |
| ws.onerror = (e) => { | |
| console.error('WS Error', e); | |
| setTranscript('连接中断'); | |
| handleDisconnect(); | |
| }; | |
| } catch (e) { | |
| console.error("Connect failed", e); | |
| setStatus('DISCONNECTED'); | |
| setTranscript('呼叫失败'); | |
| } | |
| }; | |
| const handleServerMessage = async (msg: any) => { | |
| if (msg.type === 'audio' && msg.data && audioContextRef.current) { | |
| setStatus('SPEAKING'); | |
| const ctx = audioContextRef.current; | |
| const bytes = base64ToUint8Array(msg.data); | |
| const int16 = new Int16Array(bytes.buffer); | |
| const float32 = new Float32Array(int16.length); | |
| for(let i=0; i<int16.length; i++) float32[i] = int16[i] / 32768.0; | |
| const buffer = ctx.createBuffer(1, float32.length, OUTPUT_SAMPLE_RATE); | |
| buffer.copyToChannel(float32, 0); | |
| const source = ctx.createBufferSource(); | |
| source.buffer = buffer; | |
| if (analyserRef.current) { | |
| source.connect(analyserRef.current); | |
| analyserRef.current.connect(ctx.destination); | |
| } else { | |
| source.connect(ctx.destination); | |
| } | |
| const now = ctx.currentTime; | |
| const startTime = Math.max(now, nextPlayTimeRef.current); | |
| source.start(startTime); | |
| nextPlayTimeRef.current = startTime + buffer.duration; | |
| source.onended = () => { | |
| if (ctx.currentTime >= nextPlayTimeRef.current - 0.1) { | |
| setStatus('LISTENING'); | |
| } | |
| }; | |
| } | |
| if (msg.type === 'text' && msg.content) { | |
| setTranscript(msg.content); | |
| } | |
| if (msg.type === 'turnComplete') { | |
| setStatus('THINKING'); | |
| } | |
| if (msg.type === 'error') { | |
| setTranscript(`错误: ${msg.message}`); | |
| } | |
| }; | |
| const startRecording = async () => { | |
| if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) return; | |
| try { | |
| isRecordingRef.current = true; | |
| // 1. Get Stream | |
| const stream = await navigator.mediaDevices.getUserMedia({ | |
| audio: { | |
| channelCount: 1, | |
| echoCancellation: true, | |
| autoGainControl: true, | |
| noiseSuppression: true | |
| } | |
| }); | |
| if (!isRecordingRef.current) { | |
| stream.getTracks().forEach(t => t.stop()); | |
| return; | |
| } | |
| mediaStreamRef.current = stream; | |
| // 2. Create Input Context | |
| // @ts-ignore | |
| const AudioCtor = window.AudioContext || window.webkitAudioContext; | |
| const ctx = new AudioCtor(); | |
| inputAudioContextRef.current = ctx; | |
| await ctx.resume(); | |
| const source = ctx.createMediaStreamSource(stream); | |
| const processor = ctx.createScriptProcessor(4096, 1, 1); | |
| // Mute gain | |
| const muteGain = ctx.createGain(); | |
| muteGain.gain.value = 0; | |
| source.connect(processor); | |
| processor.connect(muteGain); | |
| muteGain.connect(ctx.destination); | |
| const contextSampleRate = ctx.sampleRate; | |
| processor.onaudioprocess = (e) => { | |
| if (!isRecordingRef.current) return; | |
| const inputData = e.inputBuffer.getChannelData(0); | |
| // 3. Downsample to 16000Hz for API compatibility | |
| const downsampledData = downsampleBuffer(inputData, contextSampleRate, TARGET_SAMPLE_RATE); | |
| // 4. Convert to PCM16 | |
| const l = downsampledData.length; | |
| const int16Data = new Int16Array(l); | |
| for (let i = 0; i < l; i++) { | |
| let s = Math.max(-1, Math.min(1, downsampledData[i])); | |
| int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7FFF; | |
| } | |
| // 5. Send | |
| let binary = ''; | |
| const bytes = new Uint8Array(int16Data.buffer); | |
| const len = bytes.byteLength; | |
| for (let i = 0; i < len; i++) { | |
| binary += String.fromCharCode(bytes[i]); | |
| } | |
| const b64 = btoa(binary); | |
| if (wsRef.current && wsRef.current.readyState === WebSocket.OPEN) { | |
| wsRef.current.send(JSON.stringify({ | |
| type: 'audio', | |
| data: b64 | |
| })); | |
| } | |
| }; | |
| sourceNodeRef.current = source; | |
| processorRef.current = processor; | |
| setStatus('LISTENING'); | |
| // Don't set transcript here, keep "Connected" message until AI speaks or user status changes | |
| } catch (e) { | |
| console.error(e); | |
| isRecordingRef.current = false; | |
| setTranscript('麦克风访问失败'); | |
| } | |
| }; | |
| const stopRecording = () => { | |
| isRecordingRef.current = false; | |
| if (processorRef.current) { | |
| processorRef.current.disconnect(); | |
| processorRef.current = null; | |
| } | |
| if (sourceNodeRef.current) { | |
| sourceNodeRef.current.disconnect(); | |
| sourceNodeRef.current = null; | |
| } | |
| if (mediaStreamRef.current) { | |
| mediaStreamRef.current.getTracks().forEach(t => t.stop()); | |
| mediaStreamRef.current = null; | |
| } | |
| if (inputAudioContextRef.current) { | |
| inputAudioContextRef.current.close().catch(()=>{}); | |
| inputAudioContextRef.current = null; | |
| } | |
| }; | |
| const handleDisconnect = () => { | |
| if (wsRef.current) { | |
| wsRef.current.close(); | |
| wsRef.current = null; | |
| } | |
| if (audioContextRef.current) { | |
| audioContextRef.current.close().catch(()=>{}); | |
| audioContextRef.current = null; | |
| } | |
| stopRecording(); | |
| setStatus('DISCONNECTED'); | |
| setTranscript(''); | |
| nextPlayTimeRef.current = 0; | |
| }; | |
| if (!api.auth.getCurrentUser()) return null; | |
| return ( | |
| <div | |
| ref={containerRef} | |
| className={`fixed z-[9999] touch-none ${position ? '' : 'bottom-6 right-6'}`} | |
| style={position ? { left: position.x, top: position.y } : undefined} | |
| > | |
| {!isOpen && ( | |
| <div | |
| className="cursor-move" | |
| onMouseDown={handleDragStart} | |
| onTouchStart={handleDragStart} | |
| > | |
| <button | |
| onClick={handleToggleOpen} | |
| className="w-14 h-14 rounded-full bg-gradient-to-br from-indigo-600 to-purple-600 text-white shadow-2xl flex items-center justify-center hover:scale-110 transition-transform cursor-pointer border-2 border-white/20 animate-in zoom-in" | |
| > | |
| <Bot size={28} /> | |
| </button> | |
| </div> | |
| )} | |
| {isOpen && ( | |
| <div className="bg-slate-900 w-80 md:w-96 rounded-3xl shadow-2xl border border-slate-700 overflow-hidden flex flex-col animate-in slide-in-from-bottom-5 fade-in duration-300 h-[500px]"> | |
| {/* Header */} | |
| <div | |
| className="bg-slate-800/50 p-4 flex justify-between items-center text-white shrink-0 backdrop-blur-md cursor-move select-none" | |
| onMouseDown={handleDragStart} | |
| onTouchStart={handleDragStart} | |
| > | |
| <div className="flex items-center gap-2"> | |
| <div className={`w-2 h-2 rounded-full ${status === 'DISCONNECTED' ? 'bg-red-500' : 'bg-green-500 animate-pulse'}`}></div> | |
| <span className="font-bold text-sm">AI 实时通话</span> | |
| </div> | |
| <div className="flex gap-2"> | |
| {status === 'DISCONNECTED' && ( | |
| <button onClick={handleDisconnect} title="重置" className="hover:bg-white/10 p-1.5 rounded-full text-gray-400 hover:text-white transition-colors cursor-pointer" onMouseDown={e=>e.stopPropagation()}><RefreshCw size={16}/></button> | |
| )} | |
| <button onClick={handleMinimize} title="最小化" className="hover:bg-white/10 p-1.5 rounded-full text-gray-400 hover:text-white transition-colors cursor-pointer" onMouseDown={e=>e.stopPropagation()}><ChevronDown size={20}/></button> | |
| </div> | |
| </div> | |
| {/* Main Visual */} | |
| <div className="flex-1 flex flex-col items-center justify-center p-6 relative"> | |
| <div className={`relative w-40 h-40 flex items-center justify-center transition-all duration-500 ${status === 'LISTENING' ? 'scale-110' : 'scale-100'}`}> | |
| {/* Pulse Effect */} | |
| <div | |
| className={`absolute inset-0 rounded-full blur-2xl transition-all duration-300 ${ | |
| status === 'SPEAKING' ? 'bg-blue-500/40' : | |
| status === 'LISTENING' ? 'bg-green-500/40' : | |
| status === 'THINKING' ? 'bg-purple-500/40' : 'bg-gray-500/10' | |
| }`} | |
| style={{ opacity: 0.5 + (volumeLevel / 200) }} | |
| ></div> | |
| {/* Ripple 1 */} | |
| <div | |
| className={`absolute inset-0 rounded-full border-2 border-white/10 transition-all duration-100`} | |
| style={{ transform: `scale(${1 + volumeLevel/100})` }} | |
| ></div> | |
| {/* Ripple 2 */} | |
| <div | |
| className={`absolute inset-0 rounded-full border border-white/20 transition-all duration-100 delay-75`} | |
| style={{ transform: `scale(${1 + volumeLevel/150})` }} | |
| ></div> | |
| {/* Center Icon */} | |
| <div className={`z-10 w-24 h-24 rounded-full flex items-center justify-center text-white shadow-xl transition-colors duration-500 ${ | |
| status === 'SPEAKING' ? 'bg-blue-600' : | |
| status === 'LISTENING' ? 'bg-green-600' : | |
| status === 'THINKING' ? 'bg-purple-600' : | |
| status === 'CONNECTED' ? 'bg-slate-700' : 'bg-slate-800' | |
| }`}> | |
| {status === 'SPEAKING' ? <Volume2 size={40} className="animate-pulse"/> : | |
| status === 'LISTENING' ? <Mic size={40} className="animate-pulse"/> : | |
| status === 'THINKING' ? <Loader2 size={40} className="animate-spin"/> : | |
| status === 'CONNECTED' ? <Radio size={40}/> : <Phone size={40}/>} | |
| </div> | |
| </div> | |
| <div className="mt-8 text-center px-4 w-full"> | |
| <p className={`text-sm font-bold uppercase tracking-wider mb-2 ${ | |
| status === 'SPEAKING' ? 'text-blue-400' : | |
| status === 'LISTENING' ? 'text-green-400' : | |
| status === 'THINKING' ? 'text-purple-400' : 'text-gray-500' | |
| }`}> | |
| {status === 'DISCONNECTED' ? '未连接' : | |
| status === 'CONNECTING' ? '呼叫中...' : | |
| status === 'CONNECTED' ? '通话建立' : | |
| status === 'LISTENING' ? '正在聆听...' : | |
| status === 'THINKING' ? '思考中...' : '正在说话'} | |
| </p> | |
| <p className="text-white text-lg font-medium leading-relaxed min-h-[3rem] line-clamp-3 transition-all"> | |
| {transcript} | |
| </p> | |
| </div> | |
| </div> | |
| {/* Controls */} | |
| <div className="p-6 pb-8 bg-slate-800/50 backdrop-blur-md border-t border-slate-700 flex justify-center"> | |
| {status === 'DISCONNECTED' ? ( | |
| <button | |
| onClick={handleConnect} | |
| className="w-full py-4 bg-green-500 hover:bg-green-600 text-white rounded-2xl font-bold flex items-center justify-center gap-2 transition-all hover:scale-[1.02] active:scale-95 shadow-lg shadow-green-500/30" | |
| > | |
| <Phone size={24} fill="currentColor" /> 呼叫 AI 助理 | |
| </button> | |
| ) : ( | |
| <div className="flex items-center gap-4 w-full justify-center"> | |
| <div className="relative group"> | |
| <button | |
| onClick={handleDisconnect} | |
| className="w-20 h-20 rounded-full flex items-center justify-center shadow-2xl transition-all transform bg-red-500 hover:bg-red-600 text-white scale-100 hover:scale-110 active:scale-95 ring-4 ring-red-100" | |
| > | |
| <PhoneOff size={32} /> | |
| </button> | |
| <div className="absolute -bottom-8 left-1/2 -translate-x-1/2 text-xs text-gray-400 whitespace-nowrap opacity-80 mt-2 font-bold"> | |
| 挂断 | |
| </div> | |
| </div> | |
| </div> | |
| )} | |
| </div> | |
| </div> | |
| )} | |
| </div> | |
| ); | |
| }; | |