stud-manager / components /LiveAssistant.tsx
dvc890's picture
Update components/LiveAssistant.tsx
0ae61be verified
import React, { useState, useRef, useEffect } from 'react';
import { Mic, Loader2, Bot, Volume2, Radio, RefreshCw, ChevronDown, Phone, PhoneOff } from 'lucide-react';
import { api } from '../services/api';
// --- Audio Types & Helpers ---
const TARGET_SAMPLE_RATE = 16000;
const OUTPUT_SAMPLE_RATE = 24000;
function base64ToUint8Array(base64: string) {
const binaryString = atob(base64);
const len = binaryString.length;
const bytes = new Uint8Array(len);
for (let i = 0; i < len; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes;
}
// Downsampling: Force input to 16kHz for backend compatibility
function downsampleBuffer(buffer: Float32Array, inputRate: number, outputRate: number) {
if (outputRate === inputRate) {
return buffer;
}
const compression = inputRate / outputRate;
const length = Math.ceil(buffer.length / compression);
const result = new Float32Array(length);
let index = 0;
let inputIndex = 0;
while (index < length) {
const intIndex = Math.floor(inputIndex);
result[index] = buffer[intIndex] || 0;
index++;
inputIndex += compression;
}
return result;
}
export const LiveAssistant: React.FC = () => {
const [isOpen, setIsOpen] = useState(false);
const [status, setStatus] = useState<'DISCONNECTED' | 'CONNECTING' | 'CONNECTED' | 'LISTENING' | 'THINKING' | 'SPEAKING'>('DISCONNECTED');
const [transcript, setTranscript] = useState('');
const [volumeLevel, setVolumeLevel] = useState(0);
// Dragging State
const [position, setPosition] = useState<{x: number, y: number} | null>(null);
const containerRef = useRef<HTMLDivElement>(null);
const dragRef = useRef({ isDragging: false, startX: 0, startY: 0, initialLeft: 0, initialTop: 0 });
const hasMovedRef = useRef(false);
const prevButtonPos = useRef<{x: number, y: number} | null>(null);
// Audio Refs
const audioContextRef = useRef<AudioContext | null>(null); // Output Context
const inputAudioContextRef = useRef<AudioContext | null>(null); // Input Context
const mediaStreamRef = useRef<MediaStream | null>(null);
const processorRef = useRef<ScriptProcessorNode | null>(null);
const sourceNodeRef = useRef<MediaStreamAudioSourceNode | null>(null);
const wsRef = useRef<WebSocket | null>(null);
const nextPlayTimeRef = useRef<number>(0);
const analyserRef = useRef<AnalyserNode | null>(null);
const volumeIntervalRef = useRef<any>(null);
// State Refs for async safety
const isRecordingRef = useRef(false);
useEffect(() => {
if (!isOpen) {
handleDisconnect();
}
// Boundary check on open
if (position && containerRef.current) {
const { innerWidth, innerHeight } = window;
const rect = containerRef.current.getBoundingClientRect();
const newX = Math.min(Math.max(0, position.x), innerWidth - rect.width);
const newY = Math.min(Math.max(0, position.y), innerHeight - rect.height);
if (newX !== position.x || newY !== position.y) {
setPosition({ x: newX, y: newY });
}
}
return () => {
handleDisconnect();
};
}, [isOpen]);
// Drag Logic
useEffect(() => {
const handleMove = (e: MouseEvent | TouchEvent) => {
if (!dragRef.current.isDragging) return;
const clientX = 'touches' in e ? e.touches[0].clientX : (e as MouseEvent).clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : (e as MouseEvent).clientY;
const deltaX = clientX - dragRef.current.startX;
const deltaY = clientY - dragRef.current.startY;
if (Math.abs(deltaX) > 5 || Math.abs(deltaY) > 5) {
hasMovedRef.current = true;
}
let newX = dragRef.current.initialLeft + deltaX;
let newY = dragRef.current.initialTop + deltaY;
// Bounds check
if (containerRef.current) {
const rect = containerRef.current.getBoundingClientRect();
const { innerWidth, innerHeight } = window;
newX = Math.min(Math.max(0, newX), innerWidth - rect.width);
newY = Math.min(Math.max(0, newY), innerHeight - rect.height);
}
setPosition({ x: newX, y: newY });
};
const handleUp = () => {
dragRef.current.isDragging = false;
document.body.style.userSelect = '';
};
window.addEventListener('mousemove', handleMove);
window.addEventListener('mouseup', handleUp);
window.addEventListener('touchmove', handleMove, { passive: false });
window.addEventListener('touchend', handleUp);
return () => {
window.removeEventListener('mousemove', handleMove);
window.removeEventListener('mouseup', handleUp);
window.removeEventListener('touchmove', handleMove);
window.removeEventListener('touchend', handleUp);
};
}, []);
// Volume Visualizer
useEffect(() => {
if (status === 'DISCONNECTED') {
setVolumeLevel(0);
return;
}
volumeIntervalRef.current = setInterval(() => {
if (analyserRef.current) {
const array = new Uint8Array(analyserRef.current.frequencyBinCount);
analyserRef.current.getByteFrequencyData(array);
const avg = array.reduce((a,b)=>a+b) / array.length;
setVolumeLevel(Math.min(100, avg * 1.5));
}
}, 100);
return () => clearInterval(volumeIntervalRef.current);
}, [status]);
const handleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
if (!containerRef.current) return;
const clientX = 'touches' in e ? e.touches[0].clientX : (e as React.MouseEvent).clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : (e as React.MouseEvent).clientY;
const rect = containerRef.current.getBoundingClientRect();
if (!position) {
setPosition({ x: rect.left, y: rect.top });
dragRef.current = { isDragging: true, startX: clientX, startY: clientY, initialLeft: rect.left, initialTop: rect.top };
} else {
dragRef.current = { isDragging: true, startX: clientX, startY: clientY, initialLeft: position.x, initialTop: position.y };
}
hasMovedRef.current = false;
document.body.style.userSelect = 'none';
};
const handleToggleOpen = () => {
if (!hasMovedRef.current) {
if (!isOpen) {
// Opening: Save current position as "button position"
if (position) prevButtonPos.current = position;
setIsOpen(true);
} else {
setIsOpen(false);
}
}
};
const handleMinimize = () => {
setIsOpen(false);
if (prevButtonPos.current) {
setPosition(prevButtonPos.current);
}
};
const initOutputAudioContext = () => {
if (!audioContextRef.current) {
// @ts-ignore
const AudioCtor = window.AudioContext || window.webkitAudioContext;
const ctx = new AudioCtor({ sampleRate: OUTPUT_SAMPLE_RATE });
const analyser = ctx.createAnalyser();
analyser.fftSize = 64;
audioContextRef.current = ctx;
analyserRef.current = analyser;
}
if (audioContextRef.current.state === 'suspended') {
audioContextRef.current.resume();
}
};
const handleConnect = async () => {
const user = api.auth.getCurrentUser();
if (!user) return;
setStatus('CONNECTING');
setTranscript('正在呼叫 AI 助理...');
try {
initOutputAudioContext();
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
const wsUrl = `${protocol}//${window.location.host}/ws/live?userId=${user._id}&username=${encodeURIComponent(user.username)}`;
console.log("Connecting to", wsUrl);
const ws = new WebSocket(wsUrl);
wsRef.current = ws;
ws.onopen = async () => {
console.log('WS Open');
setStatus('CONNECTED');
setTranscript('通话已接通');
// Automatically start recording once connected (simulate phone call behavior)
await startRecording();
};
ws.onmessage = async (event) => {
try {
const msg = JSON.parse(event.data);
handleServerMessage(msg);
} catch (e) {
console.error("Parse error", e);
}
};
ws.onclose = () => {
console.log('WS Close');
handleDisconnect();
};
ws.onerror = (e) => {
console.error('WS Error', e);
setTranscript('连接中断');
handleDisconnect();
};
} catch (e) {
console.error("Connect failed", e);
setStatus('DISCONNECTED');
setTranscript('呼叫失败');
}
};
const handleServerMessage = async (msg: any) => {
if (msg.type === 'audio' && msg.data && audioContextRef.current) {
setStatus('SPEAKING');
const ctx = audioContextRef.current;
const bytes = base64ToUint8Array(msg.data);
const int16 = new Int16Array(bytes.buffer);
const float32 = new Float32Array(int16.length);
for(let i=0; i<int16.length; i++) float32[i] = int16[i] / 32768.0;
const buffer = ctx.createBuffer(1, float32.length, OUTPUT_SAMPLE_RATE);
buffer.copyToChannel(float32, 0);
const source = ctx.createBufferSource();
source.buffer = buffer;
if (analyserRef.current) {
source.connect(analyserRef.current);
analyserRef.current.connect(ctx.destination);
} else {
source.connect(ctx.destination);
}
const now = ctx.currentTime;
const startTime = Math.max(now, nextPlayTimeRef.current);
source.start(startTime);
nextPlayTimeRef.current = startTime + buffer.duration;
source.onended = () => {
if (ctx.currentTime >= nextPlayTimeRef.current - 0.1) {
setStatus('LISTENING');
}
};
}
if (msg.type === 'text' && msg.content) {
setTranscript(msg.content);
}
if (msg.type === 'turnComplete') {
setStatus('THINKING');
}
if (msg.type === 'error') {
setTranscript(`错误: ${msg.message}`);
}
};
const startRecording = async () => {
if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) return;
try {
isRecordingRef.current = true;
// 1. Get Stream
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
echoCancellation: true,
autoGainControl: true,
noiseSuppression: true
}
});
if (!isRecordingRef.current) {
stream.getTracks().forEach(t => t.stop());
return;
}
mediaStreamRef.current = stream;
// 2. Create Input Context
// @ts-ignore
const AudioCtor = window.AudioContext || window.webkitAudioContext;
const ctx = new AudioCtor();
inputAudioContextRef.current = ctx;
await ctx.resume();
const source = ctx.createMediaStreamSource(stream);
const processor = ctx.createScriptProcessor(4096, 1, 1);
// Mute gain
const muteGain = ctx.createGain();
muteGain.gain.value = 0;
source.connect(processor);
processor.connect(muteGain);
muteGain.connect(ctx.destination);
const contextSampleRate = ctx.sampleRate;
processor.onaudioprocess = (e) => {
if (!isRecordingRef.current) return;
const inputData = e.inputBuffer.getChannelData(0);
// 3. Downsample to 16000Hz for API compatibility
const downsampledData = downsampleBuffer(inputData, contextSampleRate, TARGET_SAMPLE_RATE);
// 4. Convert to PCM16
const l = downsampledData.length;
const int16Data = new Int16Array(l);
for (let i = 0; i < l; i++) {
let s = Math.max(-1, Math.min(1, downsampledData[i]));
int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
}
// 5. Send
let binary = '';
const bytes = new Uint8Array(int16Data.buffer);
const len = bytes.byteLength;
for (let i = 0; i < len; i++) {
binary += String.fromCharCode(bytes[i]);
}
const b64 = btoa(binary);
if (wsRef.current && wsRef.current.readyState === WebSocket.OPEN) {
wsRef.current.send(JSON.stringify({
type: 'audio',
data: b64
}));
}
};
sourceNodeRef.current = source;
processorRef.current = processor;
setStatus('LISTENING');
// Don't set transcript here, keep "Connected" message until AI speaks or user status changes
} catch (e) {
console.error(e);
isRecordingRef.current = false;
setTranscript('麦克风访问失败');
}
};
const stopRecording = () => {
isRecordingRef.current = false;
if (processorRef.current) {
processorRef.current.disconnect();
processorRef.current = null;
}
if (sourceNodeRef.current) {
sourceNodeRef.current.disconnect();
sourceNodeRef.current = null;
}
if (mediaStreamRef.current) {
mediaStreamRef.current.getTracks().forEach(t => t.stop());
mediaStreamRef.current = null;
}
if (inputAudioContextRef.current) {
inputAudioContextRef.current.close().catch(()=>{});
inputAudioContextRef.current = null;
}
};
const handleDisconnect = () => {
if (wsRef.current) {
wsRef.current.close();
wsRef.current = null;
}
if (audioContextRef.current) {
audioContextRef.current.close().catch(()=>{});
audioContextRef.current = null;
}
stopRecording();
setStatus('DISCONNECTED');
setTranscript('');
nextPlayTimeRef.current = 0;
};
if (!api.auth.getCurrentUser()) return null;
return (
<div
ref={containerRef}
className={`fixed z-[9999] touch-none ${position ? '' : 'bottom-6 right-6'}`}
style={position ? { left: position.x, top: position.y } : undefined}
>
{!isOpen && (
<div
className="cursor-move"
onMouseDown={handleDragStart}
onTouchStart={handleDragStart}
>
<button
onClick={handleToggleOpen}
className="w-14 h-14 rounded-full bg-gradient-to-br from-indigo-600 to-purple-600 text-white shadow-2xl flex items-center justify-center hover:scale-110 transition-transform cursor-pointer border-2 border-white/20 animate-in zoom-in"
>
<Bot size={28} />
</button>
</div>
)}
{isOpen && (
<div className="bg-slate-900 w-80 md:w-96 rounded-3xl shadow-2xl border border-slate-700 overflow-hidden flex flex-col animate-in slide-in-from-bottom-5 fade-in duration-300 h-[500px]">
{/* Header */}
<div
className="bg-slate-800/50 p-4 flex justify-between items-center text-white shrink-0 backdrop-blur-md cursor-move select-none"
onMouseDown={handleDragStart}
onTouchStart={handleDragStart}
>
<div className="flex items-center gap-2">
<div className={`w-2 h-2 rounded-full ${status === 'DISCONNECTED' ? 'bg-red-500' : 'bg-green-500 animate-pulse'}`}></div>
<span className="font-bold text-sm">AI 实时通话</span>
</div>
<div className="flex gap-2">
{status === 'DISCONNECTED' && (
<button onClick={handleDisconnect} title="重置" className="hover:bg-white/10 p-1.5 rounded-full text-gray-400 hover:text-white transition-colors cursor-pointer" onMouseDown={e=>e.stopPropagation()}><RefreshCw size={16}/></button>
)}
<button onClick={handleMinimize} title="最小化" className="hover:bg-white/10 p-1.5 rounded-full text-gray-400 hover:text-white transition-colors cursor-pointer" onMouseDown={e=>e.stopPropagation()}><ChevronDown size={20}/></button>
</div>
</div>
{/* Main Visual */}
<div className="flex-1 flex flex-col items-center justify-center p-6 relative">
<div className={`relative w-40 h-40 flex items-center justify-center transition-all duration-500 ${status === 'LISTENING' ? 'scale-110' : 'scale-100'}`}>
{/* Pulse Effect */}
<div
className={`absolute inset-0 rounded-full blur-2xl transition-all duration-300 ${
status === 'SPEAKING' ? 'bg-blue-500/40' :
status === 'LISTENING' ? 'bg-green-500/40' :
status === 'THINKING' ? 'bg-purple-500/40' : 'bg-gray-500/10'
}`}
style={{ opacity: 0.5 + (volumeLevel / 200) }}
></div>
{/* Ripple 1 */}
<div
className={`absolute inset-0 rounded-full border-2 border-white/10 transition-all duration-100`}
style={{ transform: `scale(${1 + volumeLevel/100})` }}
></div>
{/* Ripple 2 */}
<div
className={`absolute inset-0 rounded-full border border-white/20 transition-all duration-100 delay-75`}
style={{ transform: `scale(${1 + volumeLevel/150})` }}
></div>
{/* Center Icon */}
<div className={`z-10 w-24 h-24 rounded-full flex items-center justify-center text-white shadow-xl transition-colors duration-500 ${
status === 'SPEAKING' ? 'bg-blue-600' :
status === 'LISTENING' ? 'bg-green-600' :
status === 'THINKING' ? 'bg-purple-600' :
status === 'CONNECTED' ? 'bg-slate-700' : 'bg-slate-800'
}`}>
{status === 'SPEAKING' ? <Volume2 size={40} className="animate-pulse"/> :
status === 'LISTENING' ? <Mic size={40} className="animate-pulse"/> :
status === 'THINKING' ? <Loader2 size={40} className="animate-spin"/> :
status === 'CONNECTED' ? <Radio size={40}/> : <Phone size={40}/>}
</div>
</div>
<div className="mt-8 text-center px-4 w-full">
<p className={`text-sm font-bold uppercase tracking-wider mb-2 ${
status === 'SPEAKING' ? 'text-blue-400' :
status === 'LISTENING' ? 'text-green-400' :
status === 'THINKING' ? 'text-purple-400' : 'text-gray-500'
}`}>
{status === 'DISCONNECTED' ? '未连接' :
status === 'CONNECTING' ? '呼叫中...' :
status === 'CONNECTED' ? '通话建立' :
status === 'LISTENING' ? '正在聆听...' :
status === 'THINKING' ? '思考中...' : '正在说话'}
</p>
<p className="text-white text-lg font-medium leading-relaxed min-h-[3rem] line-clamp-3 transition-all">
{transcript}
</p>
</div>
</div>
{/* Controls */}
<div className="p-6 pb-8 bg-slate-800/50 backdrop-blur-md border-t border-slate-700 flex justify-center">
{status === 'DISCONNECTED' ? (
<button
onClick={handleConnect}
className="w-full py-4 bg-green-500 hover:bg-green-600 text-white rounded-2xl font-bold flex items-center justify-center gap-2 transition-all hover:scale-[1.02] active:scale-95 shadow-lg shadow-green-500/30"
>
<Phone size={24} fill="currentColor" /> 呼叫 AI 助理
</button>
) : (
<div className="flex items-center gap-4 w-full justify-center">
<div className="relative group">
<button
onClick={handleDisconnect}
className="w-20 h-20 rounded-full flex items-center justify-center shadow-2xl transition-all transform bg-red-500 hover:bg-red-600 text-white scale-100 hover:scale-110 active:scale-95 ring-4 ring-red-100"
>
<PhoneOff size={32} />
</button>
<div className="absolute -bottom-8 left-1/2 -translate-x-1/2 text-xs text-gray-400 whitespace-nowrap opacity-80 mt-2 font-bold">
挂断
</div>
</div>
</div>
)}
</div>
</div>
)}
</div>
);
};