Spaces:
Sleeping
Sleeping
Update components/LiveAssistant.tsx
Browse files- components/LiveAssistant.tsx +73 -40
components/LiveAssistant.tsx
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
|
| 2 |
import React, { useState, useRef, useEffect } from 'react';
|
| 3 |
-
import { Mic, Power, Loader2, Bot, Volume2, Radio, RefreshCw, ChevronDown } from 'lucide-react';
|
| 4 |
import { api } from '../services/api';
|
| 5 |
|
| 6 |
// --- Audio Types & Helpers ---
|
| 7 |
-
const
|
| 8 |
const OUTPUT_SAMPLE_RATE = 24000;
|
| 9 |
|
| 10 |
function base64ToUint8Array(base64: string) {
|
|
@@ -17,6 +17,26 @@ function base64ToUint8Array(base64: string) {
|
|
| 17 |
return bytes;
|
| 18 |
}
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
export const LiveAssistant: React.FC = () => {
|
| 21 |
const [isOpen, setIsOpen] = useState(false);
|
| 22 |
const [status, setStatus] = useState<'DISCONNECTED' | 'CONNECTING' | 'CONNECTED' | 'LISTENING' | 'THINKING' | 'SPEAKING'>('DISCONNECTED');
|
|
@@ -28,7 +48,7 @@ export const LiveAssistant: React.FC = () => {
|
|
| 28 |
const containerRef = useRef<HTMLDivElement>(null);
|
| 29 |
const dragRef = useRef({ isDragging: false, startX: 0, startY: 0, initialLeft: 0, initialTop: 0 });
|
| 30 |
const hasMovedRef = useRef(false);
|
| 31 |
-
const prevButtonPos = useRef<{x: number, y: number} | null>(null);
|
| 32 |
|
| 33 |
// Audio Refs
|
| 34 |
const audioContextRef = useRef<AudioContext | null>(null); // Output Context
|
|
@@ -44,7 +64,7 @@ export const LiveAssistant: React.FC = () => {
|
|
| 44 |
const volumeIntervalRef = useRef<any>(null);
|
| 45 |
|
| 46 |
// State Refs for async safety
|
| 47 |
-
const
|
| 48 |
|
| 49 |
useEffect(() => {
|
| 50 |
if (!isOpen) {
|
|
@@ -152,7 +172,6 @@ export const LiveAssistant: React.FC = () => {
|
|
| 152 |
if (position) prevButtonPos.current = position;
|
| 153 |
setIsOpen(true);
|
| 154 |
} else {
|
| 155 |
-
// Closing (via button click, though typically via minimize button)
|
| 156 |
setIsOpen(false);
|
| 157 |
}
|
| 158 |
}
|
|
@@ -160,7 +179,7 @@ export const LiveAssistant: React.FC = () => {
|
|
| 160 |
|
| 161 |
const handleMinimize = () => {
|
| 162 |
setIsOpen(false);
|
| 163 |
-
// Restore previous button position if it exists
|
| 164 |
if (prevButtonPos.current) {
|
| 165 |
setPosition(prevButtonPos.current);
|
| 166 |
}
|
|
@@ -200,7 +219,7 @@ export const LiveAssistant: React.FC = () => {
|
|
| 200 |
ws.onopen = () => {
|
| 201 |
console.log('WS Open');
|
| 202 |
setStatus('CONNECTED');
|
| 203 |
-
setTranscript('
|
| 204 |
};
|
| 205 |
|
| 206 |
ws.onmessage = async (event) => {
|
|
@@ -277,16 +296,24 @@ export const LiveAssistant: React.FC = () => {
|
|
| 277 |
}
|
| 278 |
};
|
| 279 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
const startRecording = async () => {
|
|
|
|
| 281 |
if (status !== 'CONNECTED' && status !== 'SPEAKING') return;
|
| 282 |
|
| 283 |
try {
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
const stream = await navigator.mediaDevices.getUserMedia({
|
| 288 |
audio: {
|
| 289 |
-
sampleRate: INPUT_SAMPLE_RATE,
|
| 290 |
channelCount: 1,
|
| 291 |
echoCancellation: true,
|
| 292 |
autoGainControl: true,
|
|
@@ -294,24 +321,24 @@ export const LiveAssistant: React.FC = () => {
|
|
| 294 |
}
|
| 295 |
});
|
| 296 |
|
| 297 |
-
|
| 298 |
-
if (!isRecordingIntentRef.current) {
|
| 299 |
stream.getTracks().forEach(t => t.stop());
|
| 300 |
return;
|
| 301 |
}
|
| 302 |
|
| 303 |
mediaStreamRef.current = stream;
|
| 304 |
|
|
|
|
| 305 |
// @ts-ignore
|
| 306 |
const AudioCtor = window.AudioContext || window.webkitAudioContext;
|
| 307 |
-
const ctx = new AudioCtor(
|
| 308 |
inputAudioContextRef.current = ctx;
|
| 309 |
await ctx.resume();
|
| 310 |
|
| 311 |
const source = ctx.createMediaStreamSource(stream);
|
| 312 |
const processor = ctx.createScriptProcessor(4096, 1, 1);
|
| 313 |
|
| 314 |
-
//
|
| 315 |
const muteGain = ctx.createGain();
|
| 316 |
muteGain.gain.value = 0;
|
| 317 |
|
|
@@ -319,17 +346,25 @@ export const LiveAssistant: React.FC = () => {
|
|
| 319 |
processor.connect(muteGain);
|
| 320 |
muteGain.connect(ctx.destination);
|
| 321 |
|
|
|
|
|
|
|
| 322 |
processor.onaudioprocess = (e) => {
|
| 323 |
-
if (!
|
| 324 |
|
| 325 |
const inputData = e.inputBuffer.getChannelData(0);
|
| 326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
const int16Data = new Int16Array(l);
|
| 328 |
for (let i = 0; i < l; i++) {
|
| 329 |
-
|
|
|
|
| 330 |
}
|
| 331 |
|
| 332 |
-
//
|
| 333 |
let binary = '';
|
| 334 |
const bytes = new Uint8Array(int16Data.buffer);
|
| 335 |
const len = bytes.byteLength;
|
|
@@ -354,13 +389,13 @@ export const LiveAssistant: React.FC = () => {
|
|
| 354 |
|
| 355 |
} catch (e) {
|
| 356 |
console.error(e);
|
| 357 |
-
|
| 358 |
setTranscript('无法访问麦克风');
|
| 359 |
}
|
| 360 |
};
|
| 361 |
|
| 362 |
const stopRecording = () => {
|
| 363 |
-
|
| 364 |
|
| 365 |
// Cleanup Mic Processing
|
| 366 |
if (processorRef.current) {
|
|
@@ -434,7 +469,7 @@ export const LiveAssistant: React.FC = () => {
|
|
| 434 |
>
|
| 435 |
<div className="flex items-center gap-2">
|
| 436 |
<div className={`w-2 h-2 rounded-full ${status === 'DISCONNECTED' ? 'bg-red-500' : 'bg-green-500 animate-pulse'}`}></div>
|
| 437 |
-
<span className="font-bold text-sm">AI 实时通话 (
|
| 438 |
</div>
|
| 439 |
<div className="flex gap-2">
|
| 440 |
<button onClick={handleDisconnect} title="重置" className="hover:bg-white/10 p-1.5 rounded-full text-gray-400 hover:text-white transition-colors cursor-pointer" onMouseDown={e=>e.stopPropagation()}><RefreshCw size={16}/></button>
|
|
@@ -447,7 +482,7 @@ export const LiveAssistant: React.FC = () => {
|
|
| 447 |
<div
|
| 448 |
className={`absolute inset-0 rounded-full blur-2xl transition-all duration-300 ${
|
| 449 |
status === 'SPEAKING' ? 'bg-blue-500/40' :
|
| 450 |
-
status === 'LISTENING' ? 'bg-
|
| 451 |
status === 'THINKING' ? 'bg-purple-500/40' : 'bg-gray-500/10'
|
| 452 |
}`}
|
| 453 |
style={{ opacity: 0.5 + (volumeLevel / 200) }}
|
|
@@ -462,12 +497,12 @@ export const LiveAssistant: React.FC = () => {
|
|
| 462 |
></div>
|
| 463 |
<div className={`z-10 w-24 h-24 rounded-full flex items-center justify-center text-white shadow-xl transition-colors duration-500 ${
|
| 464 |
status === 'SPEAKING' ? 'bg-blue-600' :
|
| 465 |
-
status === 'LISTENING' ? 'bg-
|
| 466 |
status === 'THINKING' ? 'bg-purple-600' :
|
| 467 |
status === 'CONNECTED' ? 'bg-slate-700' : 'bg-slate-800'
|
| 468 |
}`}>
|
| 469 |
{status === 'SPEAKING' ? <Volume2 size={40} className="animate-pulse"/> :
|
| 470 |
-
status === 'LISTENING' ? <Mic size={40} className="animate-
|
| 471 |
status === 'THINKING' ? <Loader2 size={40} className="animate-spin"/> :
|
| 472 |
status === 'CONNECTED' ? <Radio size={40}/> : <Power size={40}/>}
|
| 473 |
</div>
|
|
@@ -476,13 +511,13 @@ export const LiveAssistant: React.FC = () => {
|
|
| 476 |
<div className="mt-8 text-center px-4 w-full">
|
| 477 |
<p className={`text-sm font-bold uppercase tracking-wider mb-2 ${
|
| 478 |
status === 'SPEAKING' ? 'text-blue-400' :
|
| 479 |
-
status === 'LISTENING' ? 'text-
|
| 480 |
status === 'THINKING' ? 'text-purple-400' : 'text-gray-500'
|
| 481 |
}`}>
|
| 482 |
{status === 'DISCONNECTED' ? '未连接' :
|
| 483 |
status === 'CONNECTING' ? '连接中...' :
|
| 484 |
status === 'CONNECTED' ? '准备就绪' :
|
| 485 |
-
status === 'LISTENING' ? '
|
| 486 |
status === 'THINKING' ? '思考中...' : '正在说话'}
|
| 487 |
</p>
|
| 488 |
<p className="text-white text-lg font-medium leading-relaxed min-h-[3rem] line-clamp-3 transition-all">
|
|
@@ -503,23 +538,21 @@ export const LiveAssistant: React.FC = () => {
|
|
| 503 |
<div className="flex items-center gap-4 w-full justify-center">
|
| 504 |
<div className="relative group">
|
| 505 |
<button
|
| 506 |
-
|
| 507 |
-
onMouseUp={stopRecording}
|
| 508 |
-
onMouseLeave={stopRecording}
|
| 509 |
-
onTouchStart={(e) => { e.preventDefault(); startRecording(); }}
|
| 510 |
-
onTouchEnd={(e) => { e.preventDefault(); stopRecording(); }}
|
| 511 |
className={`w-20 h-20 rounded-full flex items-center justify-center shadow-lg transition-all transform ${
|
| 512 |
-
status === 'LISTENING' ? 'bg-
|
| 513 |
-
'bg-white text-slate-900 hover:bg-gray-100'
|
| 514 |
}`}
|
| 515 |
>
|
| 516 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 517 |
</button>
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
</div>
|
| 522 |
-
)}
|
| 523 |
</div>
|
| 524 |
</div>
|
| 525 |
)}
|
|
|
|
| 1 |
|
| 2 |
import React, { useState, useRef, useEffect } from 'react';
|
| 3 |
+
import { Mic, Power, Loader2, Bot, Volume2, Radio, RefreshCw, ChevronDown, Square, StopCircle } from 'lucide-react';
|
| 4 |
import { api } from '../services/api';
|
| 5 |
|
| 6 |
// --- Audio Types & Helpers ---
|
| 7 |
+
const TARGET_SAMPLE_RATE = 16000;
|
| 8 |
const OUTPUT_SAMPLE_RATE = 24000;
|
| 9 |
|
| 10 |
function base64ToUint8Array(base64: string) {
|
|
|
|
| 17 |
return bytes;
|
| 18 |
}
|
| 19 |
|
| 20 |
+
// Simple downsampling algorithm (e.g. 48000 -> 16000)
|
| 21 |
+
function downsampleBuffer(buffer: Float32Array, inputRate: number, outputRate: number) {
|
| 22 |
+
if (outputRate === inputRate) {
|
| 23 |
+
return buffer;
|
| 24 |
+
}
|
| 25 |
+
const compression = inputRate / outputRate;
|
| 26 |
+
const length = Math.ceil(buffer.length / compression);
|
| 27 |
+
const result = new Float32Array(length);
|
| 28 |
+
let index = 0;
|
| 29 |
+
let inputIndex = 0;
|
| 30 |
+
|
| 31 |
+
while (index < length) {
|
| 32 |
+
const intIndex = Math.floor(inputIndex);
|
| 33 |
+
result[index] = buffer[intIndex] || 0;
|
| 34 |
+
index++;
|
| 35 |
+
inputIndex += compression;
|
| 36 |
+
}
|
| 37 |
+
return result;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
export const LiveAssistant: React.FC = () => {
|
| 41 |
const [isOpen, setIsOpen] = useState(false);
|
| 42 |
const [status, setStatus] = useState<'DISCONNECTED' | 'CONNECTING' | 'CONNECTED' | 'LISTENING' | 'THINKING' | 'SPEAKING'>('DISCONNECTED');
|
|
|
|
| 48 |
const containerRef = useRef<HTMLDivElement>(null);
|
| 49 |
const dragRef = useRef({ isDragging: false, startX: 0, startY: 0, initialLeft: 0, initialTop: 0 });
|
| 50 |
const hasMovedRef = useRef(false);
|
| 51 |
+
const prevButtonPos = useRef<{x: number, y: number} | null>(null);
|
| 52 |
|
| 53 |
// Audio Refs
|
| 54 |
const audioContextRef = useRef<AudioContext | null>(null); // Output Context
|
|
|
|
| 64 |
const volumeIntervalRef = useRef<any>(null);
|
| 65 |
|
| 66 |
// State Refs for async safety
|
| 67 |
+
const isRecordingRef = useRef(false);
|
| 68 |
|
| 69 |
useEffect(() => {
|
| 70 |
if (!isOpen) {
|
|
|
|
| 172 |
if (position) prevButtonPos.current = position;
|
| 173 |
setIsOpen(true);
|
| 174 |
} else {
|
|
|
|
| 175 |
setIsOpen(false);
|
| 176 |
}
|
| 177 |
}
|
|
|
|
| 179 |
|
| 180 |
const handleMinimize = () => {
|
| 181 |
setIsOpen(false);
|
| 182 |
+
// Restore previous button position if it exists
|
| 183 |
if (prevButtonPos.current) {
|
| 184 |
setPosition(prevButtonPos.current);
|
| 185 |
}
|
|
|
|
| 219 |
ws.onopen = () => {
|
| 220 |
console.log('WS Open');
|
| 221 |
setStatus('CONNECTED');
|
| 222 |
+
setTranscript('连接成功,点击麦克风开始说话');
|
| 223 |
};
|
| 224 |
|
| 225 |
ws.onmessage = async (event) => {
|
|
|
|
| 296 |
}
|
| 297 |
};
|
| 298 |
|
| 299 |
+
const toggleRecording = async () => {
|
| 300 |
+
if (status === 'LISTENING') {
|
| 301 |
+
stopRecording();
|
| 302 |
+
} else {
|
| 303 |
+
startRecording();
|
| 304 |
+
}
|
| 305 |
+
};
|
| 306 |
+
|
| 307 |
const startRecording = async () => {
|
| 308 |
+
// Allow interrupting Speaker to talk
|
| 309 |
if (status !== 'CONNECTED' && status !== 'SPEAKING') return;
|
| 310 |
|
| 311 |
try {
|
| 312 |
+
isRecordingRef.current = true;
|
| 313 |
+
|
| 314 |
+
// 1. Get Stream
|
| 315 |
const stream = await navigator.mediaDevices.getUserMedia({
|
| 316 |
audio: {
|
|
|
|
| 317 |
channelCount: 1,
|
| 318 |
echoCancellation: true,
|
| 319 |
autoGainControl: true,
|
|
|
|
| 321 |
}
|
| 322 |
});
|
| 323 |
|
| 324 |
+
if (!isRecordingRef.current) {
|
|
|
|
| 325 |
stream.getTracks().forEach(t => t.stop());
|
| 326 |
return;
|
| 327 |
}
|
| 328 |
|
| 329 |
mediaStreamRef.current = stream;
|
| 330 |
|
| 331 |
+
// 2. Create Input Context
|
| 332 |
// @ts-ignore
|
| 333 |
const AudioCtor = window.AudioContext || window.webkitAudioContext;
|
| 334 |
+
const ctx = new AudioCtor();
|
| 335 |
inputAudioContextRef.current = ctx;
|
| 336 |
await ctx.resume();
|
| 337 |
|
| 338 |
const source = ctx.createMediaStreamSource(stream);
|
| 339 |
const processor = ctx.createScriptProcessor(4096, 1, 1);
|
| 340 |
|
| 341 |
+
// Mute gain to prevent feedback loop
|
| 342 |
const muteGain = ctx.createGain();
|
| 343 |
muteGain.gain.value = 0;
|
| 344 |
|
|
|
|
| 346 |
processor.connect(muteGain);
|
| 347 |
muteGain.connect(ctx.destination);
|
| 348 |
|
| 349 |
+
const contextSampleRate = ctx.sampleRate; // e.g. 48000 or 44100
|
| 350 |
+
|
| 351 |
processor.onaudioprocess = (e) => {
|
| 352 |
+
if (!isRecordingRef.current) return;
|
| 353 |
|
| 354 |
const inputData = e.inputBuffer.getChannelData(0);
|
| 355 |
+
|
| 356 |
+
// 3. DOWNSAMPLE if necessary (Force to 16000 for backend compatibility)
|
| 357 |
+
const downsampledData = downsampleBuffer(inputData, contextSampleRate, TARGET_SAMPLE_RATE);
|
| 358 |
+
|
| 359 |
+
// 4. Convert to PCM16
|
| 360 |
+
const l = downsampledData.length;
|
| 361 |
const int16Data = new Int16Array(l);
|
| 362 |
for (let i = 0; i < l; i++) {
|
| 363 |
+
let s = Math.max(-1, Math.min(1, downsampledData[i]));
|
| 364 |
+
int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
| 365 |
}
|
| 366 |
|
| 367 |
+
// 5. Send
|
| 368 |
let binary = '';
|
| 369 |
const bytes = new Uint8Array(int16Data.buffer);
|
| 370 |
const len = bytes.byteLength;
|
|
|
|
| 389 |
|
| 390 |
} catch (e) {
|
| 391 |
console.error(e);
|
| 392 |
+
isRecordingRef.current = false;
|
| 393 |
setTranscript('无法访问麦克风');
|
| 394 |
}
|
| 395 |
};
|
| 396 |
|
| 397 |
const stopRecording = () => {
|
| 398 |
+
isRecordingRef.current = false;
|
| 399 |
|
| 400 |
// Cleanup Mic Processing
|
| 401 |
if (processorRef.current) {
|
|
|
|
| 469 |
>
|
| 470 |
<div className="flex items-center gap-2">
|
| 471 |
<div className={`w-2 h-2 rounded-full ${status === 'DISCONNECTED' ? 'bg-red-500' : 'bg-green-500 animate-pulse'}`}></div>
|
| 472 |
+
<span className="font-bold text-sm">AI 实时通话 (Toggle模式)</span>
|
| 473 |
</div>
|
| 474 |
<div className="flex gap-2">
|
| 475 |
<button onClick={handleDisconnect} title="重置" className="hover:bg-white/10 p-1.5 rounded-full text-gray-400 hover:text-white transition-colors cursor-pointer" onMouseDown={e=>e.stopPropagation()}><RefreshCw size={16}/></button>
|
|
|
|
| 482 |
<div
|
| 483 |
className={`absolute inset-0 rounded-full blur-2xl transition-all duration-300 ${
|
| 484 |
status === 'SPEAKING' ? 'bg-blue-500/40' :
|
| 485 |
+
status === 'LISTENING' ? 'bg-red-500/40' :
|
| 486 |
status === 'THINKING' ? 'bg-purple-500/40' : 'bg-gray-500/10'
|
| 487 |
}`}
|
| 488 |
style={{ opacity: 0.5 + (volumeLevel / 200) }}
|
|
|
|
| 497 |
></div>
|
| 498 |
<div className={`z-10 w-24 h-24 rounded-full flex items-center justify-center text-white shadow-xl transition-colors duration-500 ${
|
| 499 |
status === 'SPEAKING' ? 'bg-blue-600' :
|
| 500 |
+
status === 'LISTENING' ? 'bg-red-500' :
|
| 501 |
status === 'THINKING' ? 'bg-purple-600' :
|
| 502 |
status === 'CONNECTED' ? 'bg-slate-700' : 'bg-slate-800'
|
| 503 |
}`}>
|
| 504 |
{status === 'SPEAKING' ? <Volume2 size={40} className="animate-pulse"/> :
|
| 505 |
+
status === 'LISTENING' ? <Mic size={40} className="animate-pulse"/> :
|
| 506 |
status === 'THINKING' ? <Loader2 size={40} className="animate-spin"/> :
|
| 507 |
status === 'CONNECTED' ? <Radio size={40}/> : <Power size={40}/>}
|
| 508 |
</div>
|
|
|
|
| 511 |
<div className="mt-8 text-center px-4 w-full">
|
| 512 |
<p className={`text-sm font-bold uppercase tracking-wider mb-2 ${
|
| 513 |
status === 'SPEAKING' ? 'text-blue-400' :
|
| 514 |
+
status === 'LISTENING' ? 'text-red-400' :
|
| 515 |
status === 'THINKING' ? 'text-purple-400' : 'text-gray-500'
|
| 516 |
}`}>
|
| 517 |
{status === 'DISCONNECTED' ? '未连接' :
|
| 518 |
status === 'CONNECTING' ? '连接中...' :
|
| 519 |
status === 'CONNECTED' ? '准备就绪' :
|
| 520 |
+
status === 'LISTENING' ? '正在录音...' :
|
| 521 |
status === 'THINKING' ? '思考中...' : '正在说话'}
|
| 522 |
</p>
|
| 523 |
<p className="text-white text-lg font-medium leading-relaxed min-h-[3rem] line-clamp-3 transition-all">
|
|
|
|
| 538 |
<div className="flex items-center gap-4 w-full justify-center">
|
| 539 |
<div className="relative group">
|
| 540 |
<button
|
| 541 |
+
onClick={toggleRecording}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 542 |
className={`w-20 h-20 rounded-full flex items-center justify-center shadow-lg transition-all transform ${
|
| 543 |
+
status === 'LISTENING' ? 'bg-red-500 hover:bg-red-600 scale-110 ring-4 ring-red-500/30' :
|
| 544 |
+
'bg-white text-slate-900 hover:bg-gray-100 hover:scale-105'
|
| 545 |
}`}
|
| 546 |
>
|
| 547 |
+
{status === 'LISTENING' ? (
|
| 548 |
+
<Square size={28} fill="white" className="text-white" />
|
| 549 |
+
) : (
|
| 550 |
+
<Mic size={32} fill="currentColor" />
|
| 551 |
+
)}
|
| 552 |
</button>
|
| 553 |
+
<div className="absolute -bottom-8 left-1/2 -translate-x-1/2 text-xs text-gray-400 whitespace-nowrap opacity-80 mt-2">
|
| 554 |
+
{status === 'LISTENING' ? '点击停止' : '点击说话'}
|
| 555 |
+
</div>
|
|
|
|
|
|
|
| 556 |
</div>
|
| 557 |
</div>
|
| 558 |
)}
|