dvc890 commited on
Commit
38975da
·
verified ·
1 Parent(s): 973cb77

Update components/LiveAssistant.tsx

Browse files
Files changed (1) hide show
  1. components/LiveAssistant.tsx +73 -40
components/LiveAssistant.tsx CHANGED
@@ -1,10 +1,10 @@
1
 
2
  import React, { useState, useRef, useEffect } from 'react';
3
- import { Mic, Power, Loader2, Bot, Volume2, Radio, RefreshCw, ChevronDown } from 'lucide-react';
4
  import { api } from '../services/api';
5
 
6
  // --- Audio Types & Helpers ---
7
- const INPUT_SAMPLE_RATE = 16000;
8
  const OUTPUT_SAMPLE_RATE = 24000;
9
 
10
  function base64ToUint8Array(base64: string) {
@@ -17,6 +17,26 @@ function base64ToUint8Array(base64: string) {
17
  return bytes;
18
  }
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  export const LiveAssistant: React.FC = () => {
21
  const [isOpen, setIsOpen] = useState(false);
22
  const [status, setStatus] = useState<'DISCONNECTED' | 'CONNECTING' | 'CONNECTED' | 'LISTENING' | 'THINKING' | 'SPEAKING'>('DISCONNECTED');
@@ -28,7 +48,7 @@ export const LiveAssistant: React.FC = () => {
28
  const containerRef = useRef<HTMLDivElement>(null);
29
  const dragRef = useRef({ isDragging: false, startX: 0, startY: 0, initialLeft: 0, initialTop: 0 });
30
  const hasMovedRef = useRef(false);
31
- const prevButtonPos = useRef<{x: number, y: number} | null>(null); // Store pos before expand
32
 
33
  // Audio Refs
34
  const audioContextRef = useRef<AudioContext | null>(null); // Output Context
@@ -44,7 +64,7 @@ export const LiveAssistant: React.FC = () => {
44
  const volumeIntervalRef = useRef<any>(null);
45
 
46
  // State Refs for async safety
47
- const isRecordingIntentRef = useRef(false);
48
 
49
  useEffect(() => {
50
  if (!isOpen) {
@@ -152,7 +172,6 @@ export const LiveAssistant: React.FC = () => {
152
  if (position) prevButtonPos.current = position;
153
  setIsOpen(true);
154
  } else {
155
- // Closing (via button click, though typically via minimize button)
156
  setIsOpen(false);
157
  }
158
  }
@@ -160,7 +179,7 @@ export const LiveAssistant: React.FC = () => {
160
 
161
  const handleMinimize = () => {
162
  setIsOpen(false);
163
- // Restore previous button position if it exists, otherwise leave it where it is (or reset)
164
  if (prevButtonPos.current) {
165
  setPosition(prevButtonPos.current);
166
  }
@@ -200,7 +219,7 @@ export const LiveAssistant: React.FC = () => {
200
  ws.onopen = () => {
201
  console.log('WS Open');
202
  setStatus('CONNECTED');
203
- setTranscript('连接成功,请按住麦克风说话');
204
  };
205
 
206
  ws.onmessage = async (event) => {
@@ -277,16 +296,24 @@ export const LiveAssistant: React.FC = () => {
277
  }
278
  };
279
 
 
 
 
 
 
 
 
 
280
  const startRecording = async () => {
 
281
  if (status !== 'CONNECTED' && status !== 'SPEAKING') return;
282
 
283
  try {
284
- isRecordingIntentRef.current = true;
285
- if (status === 'SPEAKING') setStatus('CONNECTED');
286
-
287
  const stream = await navigator.mediaDevices.getUserMedia({
288
  audio: {
289
- sampleRate: INPUT_SAMPLE_RATE,
290
  channelCount: 1,
291
  echoCancellation: true,
292
  autoGainControl: true,
@@ -294,24 +321,24 @@ export const LiveAssistant: React.FC = () => {
294
  }
295
  });
296
 
297
- // Safety check: if user released mouse while getting stream
298
- if (!isRecordingIntentRef.current) {
299
  stream.getTracks().forEach(t => t.stop());
300
  return;
301
  }
302
 
303
  mediaStreamRef.current = stream;
304
 
 
305
  // @ts-ignore
306
  const AudioCtor = window.AudioContext || window.webkitAudioContext;
307
- const ctx = new AudioCtor({ sampleRate: INPUT_SAMPLE_RATE });
308
  inputAudioContextRef.current = ctx;
309
  await ctx.resume();
310
 
311
  const source = ctx.createMediaStreamSource(stream);
312
  const processor = ctx.createScriptProcessor(4096, 1, 1);
313
 
314
- // Create a mute gain node to prevent feedback while keeping the processor alive
315
  const muteGain = ctx.createGain();
316
  muteGain.gain.value = 0;
317
 
@@ -319,17 +346,25 @@ export const LiveAssistant: React.FC = () => {
319
  processor.connect(muteGain);
320
  muteGain.connect(ctx.destination);
321
 
 
 
322
  processor.onaudioprocess = (e) => {
323
- if (!isRecordingIntentRef.current) return;
324
 
325
  const inputData = e.inputBuffer.getChannelData(0);
326
- const l = inputData.length;
 
 
 
 
 
327
  const int16Data = new Int16Array(l);
328
  for (let i = 0; i < l; i++) {
329
- int16Data[i] = Math.max(-1, Math.min(1, inputData[i])) * 32767; // Clamp and convert
 
330
  }
331
 
332
- // Raw PCM -> Base64
333
  let binary = '';
334
  const bytes = new Uint8Array(int16Data.buffer);
335
  const len = bytes.byteLength;
@@ -354,13 +389,13 @@ export const LiveAssistant: React.FC = () => {
354
 
355
  } catch (e) {
356
  console.error(e);
357
- isRecordingIntentRef.current = false;
358
  setTranscript('无法访问麦克风');
359
  }
360
  };
361
 
362
  const stopRecording = () => {
363
- isRecordingIntentRef.current = false;
364
 
365
  // Cleanup Mic Processing
366
  if (processorRef.current) {
@@ -434,7 +469,7 @@ export const LiveAssistant: React.FC = () => {
434
  >
435
  <div className="flex items-center gap-2">
436
  <div className={`w-2 h-2 rounded-full ${status === 'DISCONNECTED' ? 'bg-red-500' : 'bg-green-500 animate-pulse'}`}></div>
437
- <span className="font-bold text-sm">AI 实时通话 (代理模式)</span>
438
  </div>
439
  <div className="flex gap-2">
440
  <button onClick={handleDisconnect} title="重置" className="hover:bg-white/10 p-1.5 rounded-full text-gray-400 hover:text-white transition-colors cursor-pointer" onMouseDown={e=>e.stopPropagation()}><RefreshCw size={16}/></button>
@@ -447,7 +482,7 @@ export const LiveAssistant: React.FC = () => {
447
  <div
448
  className={`absolute inset-0 rounded-full blur-2xl transition-all duration-300 ${
449
  status === 'SPEAKING' ? 'bg-blue-500/40' :
450
- status === 'LISTENING' ? 'bg-green-500/40' :
451
  status === 'THINKING' ? 'bg-purple-500/40' : 'bg-gray-500/10'
452
  }`}
453
  style={{ opacity: 0.5 + (volumeLevel / 200) }}
@@ -462,12 +497,12 @@ export const LiveAssistant: React.FC = () => {
462
  ></div>
463
  <div className={`z-10 w-24 h-24 rounded-full flex items-center justify-center text-white shadow-xl transition-colors duration-500 ${
464
  status === 'SPEAKING' ? 'bg-blue-600' :
465
- status === 'LISTENING' ? 'bg-green-600' :
466
  status === 'THINKING' ? 'bg-purple-600' :
467
  status === 'CONNECTED' ? 'bg-slate-700' : 'bg-slate-800'
468
  }`}>
469
  {status === 'SPEAKING' ? <Volume2 size={40} className="animate-pulse"/> :
470
- status === 'LISTENING' ? <Mic size={40} className="animate-bounce"/> :
471
  status === 'THINKING' ? <Loader2 size={40} className="animate-spin"/> :
472
  status === 'CONNECTED' ? <Radio size={40}/> : <Power size={40}/>}
473
  </div>
@@ -476,13 +511,13 @@ export const LiveAssistant: React.FC = () => {
476
  <div className="mt-8 text-center px-4 w-full">
477
  <p className={`text-sm font-bold uppercase tracking-wider mb-2 ${
478
  status === 'SPEAKING' ? 'text-blue-400' :
479
- status === 'LISTENING' ? 'text-green-400' :
480
  status === 'THINKING' ? 'text-purple-400' : 'text-gray-500'
481
  }`}>
482
  {status === 'DISCONNECTED' ? '未连接' :
483
  status === 'CONNECTING' ? '连接中...' :
484
  status === 'CONNECTED' ? '准备就绪' :
485
- status === 'LISTENING' ? '正在聆听...' :
486
  status === 'THINKING' ? '思考中...' : '正在说话'}
487
  </p>
488
  <p className="text-white text-lg font-medium leading-relaxed min-h-[3rem] line-clamp-3 transition-all">
@@ -503,23 +538,21 @@ export const LiveAssistant: React.FC = () => {
503
  <div className="flex items-center gap-4 w-full justify-center">
504
  <div className="relative group">
505
  <button
506
- onMouseDown={startRecording}
507
- onMouseUp={stopRecording}
508
- onMouseLeave={stopRecording}
509
- onTouchStart={(e) => { e.preventDefault(); startRecording(); }}
510
- onTouchEnd={(e) => { e.preventDefault(); stopRecording(); }}
511
  className={`w-20 h-20 rounded-full flex items-center justify-center shadow-lg transition-all transform ${
512
- status === 'LISTENING' ? 'bg-green-500 scale-110 ring-4 ring-green-500/30' :
513
- 'bg-white text-slate-900 hover:bg-gray-100'
514
  }`}
515
  >
516
- <Mic size={32} fill={status==='LISTENING' ? 'white' : 'currentColor'} className={status==='LISTENING'?'text-white':''}/>
 
 
 
 
517
  </button>
518
- {status === 'CONNECTED' && (
519
- <div className="absolute -bottom-8 left-1/2 -translate-x-1/2 text-xs text-gray-400 whitespace-nowrap opacity-0 group-hover:opacity-100 transition-opacity">
520
- 按住说话
521
- </div>
522
- )}
523
  </div>
524
  </div>
525
  )}
 
1
 
2
  import React, { useState, useRef, useEffect } from 'react';
3
+ import { Mic, Power, Loader2, Bot, Volume2, Radio, RefreshCw, ChevronDown, Square, StopCircle } from 'lucide-react';
4
  import { api } from '../services/api';
5
 
6
  // --- Audio Types & Helpers ---
7
+ const TARGET_SAMPLE_RATE = 16000;
8
  const OUTPUT_SAMPLE_RATE = 24000;
9
 
10
  function base64ToUint8Array(base64: string) {
 
17
  return bytes;
18
  }
19
 
20
+ // Simple downsampling algorithm (e.g. 48000 -> 16000)
21
+ function downsampleBuffer(buffer: Float32Array, inputRate: number, outputRate: number) {
22
+ if (outputRate === inputRate) {
23
+ return buffer;
24
+ }
25
+ const compression = inputRate / outputRate;
26
+ const length = Math.ceil(buffer.length / compression);
27
+ const result = new Float32Array(length);
28
+ let index = 0;
29
+ let inputIndex = 0;
30
+
31
+ while (index < length) {
32
+ const intIndex = Math.floor(inputIndex);
33
+ result[index] = buffer[intIndex] || 0;
34
+ index++;
35
+ inputIndex += compression;
36
+ }
37
+ return result;
38
+ }
39
+
40
  export const LiveAssistant: React.FC = () => {
41
  const [isOpen, setIsOpen] = useState(false);
42
  const [status, setStatus] = useState<'DISCONNECTED' | 'CONNECTING' | 'CONNECTED' | 'LISTENING' | 'THINKING' | 'SPEAKING'>('DISCONNECTED');
 
48
  const containerRef = useRef<HTMLDivElement>(null);
49
  const dragRef = useRef({ isDragging: false, startX: 0, startY: 0, initialLeft: 0, initialTop: 0 });
50
  const hasMovedRef = useRef(false);
51
+ const prevButtonPos = useRef<{x: number, y: number} | null>(null);
52
 
53
  // Audio Refs
54
  const audioContextRef = useRef<AudioContext | null>(null); // Output Context
 
64
  const volumeIntervalRef = useRef<any>(null);
65
 
66
  // State Refs for async safety
67
+ const isRecordingRef = useRef(false);
68
 
69
  useEffect(() => {
70
  if (!isOpen) {
 
172
  if (position) prevButtonPos.current = position;
173
  setIsOpen(true);
174
  } else {
 
175
  setIsOpen(false);
176
  }
177
  }
 
179
 
180
  const handleMinimize = () => {
181
  setIsOpen(false);
182
+ // Restore previous button position if it exists
183
  if (prevButtonPos.current) {
184
  setPosition(prevButtonPos.current);
185
  }
 
219
  ws.onopen = () => {
220
  console.log('WS Open');
221
  setStatus('CONNECTED');
222
+ setTranscript('连接成功,点击麦克风开始说话');
223
  };
224
 
225
  ws.onmessage = async (event) => {
 
296
  }
297
  };
298
 
299
+ const toggleRecording = async () => {
300
+ if (status === 'LISTENING') {
301
+ stopRecording();
302
+ } else {
303
+ startRecording();
304
+ }
305
+ };
306
+
307
  const startRecording = async () => {
308
+ // Allow interrupting Speaker to talk
309
  if (status !== 'CONNECTED' && status !== 'SPEAKING') return;
310
 
311
  try {
312
+ isRecordingRef.current = true;
313
+
314
+ // 1. Get Stream
315
  const stream = await navigator.mediaDevices.getUserMedia({
316
  audio: {
 
317
  channelCount: 1,
318
  echoCancellation: true,
319
  autoGainControl: true,
 
321
  }
322
  });
323
 
324
+ if (!isRecordingRef.current) {
 
325
  stream.getTracks().forEach(t => t.stop());
326
  return;
327
  }
328
 
329
  mediaStreamRef.current = stream;
330
 
331
+ // 2. Create Input Context
332
  // @ts-ignore
333
  const AudioCtor = window.AudioContext || window.webkitAudioContext;
334
+ const ctx = new AudioCtor();
335
  inputAudioContextRef.current = ctx;
336
  await ctx.resume();
337
 
338
  const source = ctx.createMediaStreamSource(stream);
339
  const processor = ctx.createScriptProcessor(4096, 1, 1);
340
 
341
+ // Mute gain to prevent feedback loop
342
  const muteGain = ctx.createGain();
343
  muteGain.gain.value = 0;
344
 
 
346
  processor.connect(muteGain);
347
  muteGain.connect(ctx.destination);
348
 
349
+ const contextSampleRate = ctx.sampleRate; // e.g. 48000 or 44100
350
+
351
  processor.onaudioprocess = (e) => {
352
+ if (!isRecordingRef.current) return;
353
 
354
  const inputData = e.inputBuffer.getChannelData(0);
355
+
356
+ // 3. DOWNSAMPLE if necessary (Force to 16000 for backend compatibility)
357
+ const downsampledData = downsampleBuffer(inputData, contextSampleRate, TARGET_SAMPLE_RATE);
358
+
359
+ // 4. Convert to PCM16
360
+ const l = downsampledData.length;
361
  const int16Data = new Int16Array(l);
362
  for (let i = 0; i < l; i++) {
363
+ let s = Math.max(-1, Math.min(1, downsampledData[i]));
364
+ int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
365
  }
366
 
367
+ // 5. Send
368
  let binary = '';
369
  const bytes = new Uint8Array(int16Data.buffer);
370
  const len = bytes.byteLength;
 
389
 
390
  } catch (e) {
391
  console.error(e);
392
+ isRecordingRef.current = false;
393
  setTranscript('无法访问麦克风');
394
  }
395
  };
396
 
397
  const stopRecording = () => {
398
+ isRecordingRef.current = false;
399
 
400
  // Cleanup Mic Processing
401
  if (processorRef.current) {
 
469
  >
470
  <div className="flex items-center gap-2">
471
  <div className={`w-2 h-2 rounded-full ${status === 'DISCONNECTED' ? 'bg-red-500' : 'bg-green-500 animate-pulse'}`}></div>
472
+ <span className="font-bold text-sm">AI 实时通话 (Toggle模式)</span>
473
  </div>
474
  <div className="flex gap-2">
475
  <button onClick={handleDisconnect} title="重置" className="hover:bg-white/10 p-1.5 rounded-full text-gray-400 hover:text-white transition-colors cursor-pointer" onMouseDown={e=>e.stopPropagation()}><RefreshCw size={16}/></button>
 
482
  <div
483
  className={`absolute inset-0 rounded-full blur-2xl transition-all duration-300 ${
484
  status === 'SPEAKING' ? 'bg-blue-500/40' :
485
+ status === 'LISTENING' ? 'bg-red-500/40' :
486
  status === 'THINKING' ? 'bg-purple-500/40' : 'bg-gray-500/10'
487
  }`}
488
  style={{ opacity: 0.5 + (volumeLevel / 200) }}
 
497
  ></div>
498
  <div className={`z-10 w-24 h-24 rounded-full flex items-center justify-center text-white shadow-xl transition-colors duration-500 ${
499
  status === 'SPEAKING' ? 'bg-blue-600' :
500
+ status === 'LISTENING' ? 'bg-red-500' :
501
  status === 'THINKING' ? 'bg-purple-600' :
502
  status === 'CONNECTED' ? 'bg-slate-700' : 'bg-slate-800'
503
  }`}>
504
  {status === 'SPEAKING' ? <Volume2 size={40} className="animate-pulse"/> :
505
+ status === 'LISTENING' ? <Mic size={40} className="animate-pulse"/> :
506
  status === 'THINKING' ? <Loader2 size={40} className="animate-spin"/> :
507
  status === 'CONNECTED' ? <Radio size={40}/> : <Power size={40}/>}
508
  </div>
 
511
  <div className="mt-8 text-center px-4 w-full">
512
  <p className={`text-sm font-bold uppercase tracking-wider mb-2 ${
513
  status === 'SPEAKING' ? 'text-blue-400' :
514
+ status === 'LISTENING' ? 'text-red-400' :
515
  status === 'THINKING' ? 'text-purple-400' : 'text-gray-500'
516
  }`}>
517
  {status === 'DISCONNECTED' ? '未连接' :
518
  status === 'CONNECTING' ? '连接中...' :
519
  status === 'CONNECTED' ? '准备就绪' :
520
+ status === 'LISTENING' ? '正在录音...' :
521
  status === 'THINKING' ? '思考中...' : '正在说话'}
522
  </p>
523
  <p className="text-white text-lg font-medium leading-relaxed min-h-[3rem] line-clamp-3 transition-all">
 
538
  <div className="flex items-center gap-4 w-full justify-center">
539
  <div className="relative group">
540
  <button
541
+ onClick={toggleRecording}
 
 
 
 
542
  className={`w-20 h-20 rounded-full flex items-center justify-center shadow-lg transition-all transform ${
543
+ status === 'LISTENING' ? 'bg-red-500 hover:bg-red-600 scale-110 ring-4 ring-red-500/30' :
544
+ 'bg-white text-slate-900 hover:bg-gray-100 hover:scale-105'
545
  }`}
546
  >
547
+ {status === 'LISTENING' ? (
548
+ <Square size={28} fill="white" className="text-white" />
549
+ ) : (
550
+ <Mic size={32} fill="currentColor" />
551
+ )}
552
  </button>
553
+ <div className="absolute -bottom-8 left-1/2 -translate-x-1/2 text-xs text-gray-400 whitespace-nowrap opacity-80 mt-2">
554
+ {status === 'LISTENING' ? '点击停止' : '点击说话'}
555
+ </div>
 
 
556
  </div>
557
  </div>
558
  )}