Spaces:
Sleeping
Sleeping
Update components/LiveAssistant.tsx
Browse files- components/LiveAssistant.tsx +66 -65
components/LiveAssistant.tsx
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
|
| 2 |
import React, { useState, useRef, useEffect } from 'react';
|
| 3 |
-
import { Mic,
|
| 4 |
import { api } from '../services/api';
|
| 5 |
|
| 6 |
// --- Audio Types & Helpers ---
|
|
@@ -28,29 +28,29 @@ export const LiveAssistant: React.FC = () => {
|
|
| 28 |
const containerRef = useRef<HTMLDivElement>(null);
|
| 29 |
const dragRef = useRef({ isDragging: false, startX: 0, startY: 0, initialLeft: 0, initialTop: 0 });
|
| 30 |
const hasMovedRef = useRef(false);
|
|
|
|
| 31 |
|
| 32 |
-
// Refs
|
| 33 |
const audioContextRef = useRef<AudioContext | null>(null); // Output Context
|
| 34 |
-
const inputAudioContextRef = useRef<AudioContext | null>(null); // Input Context
|
| 35 |
|
| 36 |
const mediaStreamRef = useRef<MediaStream | null>(null);
|
| 37 |
const processorRef = useRef<ScriptProcessorNode | null>(null);
|
| 38 |
const sourceNodeRef = useRef<MediaStreamAudioSourceNode | null>(null);
|
| 39 |
-
const gainNodeRef = useRef<GainNode | null>(null);
|
| 40 |
|
| 41 |
const wsRef = useRef<WebSocket | null>(null);
|
| 42 |
const nextPlayTimeRef = useRef<number>(0);
|
| 43 |
const analyserRef = useRef<AnalyserNode | null>(null);
|
| 44 |
const volumeIntervalRef = useRef<any>(null);
|
| 45 |
|
| 46 |
-
//
|
| 47 |
-
const
|
| 48 |
|
| 49 |
useEffect(() => {
|
| 50 |
if (!isOpen) {
|
| 51 |
handleDisconnect();
|
| 52 |
}
|
| 53 |
-
//
|
| 54 |
if (position && containerRef.current) {
|
| 55 |
const { innerWidth, innerHeight } = window;
|
| 56 |
const rect = containerRef.current.getBoundingClientRect();
|
|
@@ -65,6 +65,7 @@ export const LiveAssistant: React.FC = () => {
|
|
| 65 |
};
|
| 66 |
}, [isOpen]);
|
| 67 |
|
|
|
|
| 68 |
useEffect(() => {
|
| 69 |
const handleMove = (e: MouseEvent | TouchEvent) => {
|
| 70 |
if (!dragRef.current.isDragging) return;
|
|
@@ -111,6 +112,7 @@ export const LiveAssistant: React.FC = () => {
|
|
| 111 |
};
|
| 112 |
}, []);
|
| 113 |
|
|
|
|
| 114 |
useEffect(() => {
|
| 115 |
if (status === 'DISCONNECTED') {
|
| 116 |
setVolumeLevel(0);
|
|
@@ -129,42 +131,38 @@ export const LiveAssistant: React.FC = () => {
|
|
| 129 |
|
| 130 |
const handleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
|
| 131 |
if (!containerRef.current) return;
|
| 132 |
-
|
| 133 |
-
// Prevent default to stop scrolling on mobile while dragging
|
| 134 |
-
// e.preventDefault(); // Note: might block click if not careful, handled by checking dragging state
|
| 135 |
-
|
| 136 |
const clientX = 'touches' in e ? e.touches[0].clientX : (e as React.MouseEvent).clientX;
|
| 137 |
const clientY = 'touches' in e ? e.touches[0].clientY : (e as React.MouseEvent).clientY;
|
| 138 |
-
|
| 139 |
const rect = containerRef.current.getBoundingClientRect();
|
| 140 |
|
| 141 |
-
// If position is null (initial state), set it to current computed position
|
| 142 |
if (!position) {
|
| 143 |
setPosition({ x: rect.left, y: rect.top });
|
| 144 |
-
dragRef.current = {
|
| 145 |
-
isDragging: true,
|
| 146 |
-
startX: clientX,
|
| 147 |
-
startY: clientY,
|
| 148 |
-
initialLeft: rect.left,
|
| 149 |
-
initialTop: rect.top
|
| 150 |
-
};
|
| 151 |
} else {
|
| 152 |
-
dragRef.current = {
|
| 153 |
-
isDragging: true,
|
| 154 |
-
startX: clientX,
|
| 155 |
-
startY: clientY,
|
| 156 |
-
initialLeft: position.x,
|
| 157 |
-
initialTop: position.y
|
| 158 |
-
};
|
| 159 |
}
|
| 160 |
-
|
| 161 |
hasMovedRef.current = false;
|
| 162 |
document.body.style.userSelect = 'none';
|
| 163 |
};
|
| 164 |
|
| 165 |
const handleToggleOpen = () => {
|
| 166 |
if (!hasMovedRef.current) {
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
}
|
| 169 |
};
|
| 170 |
|
|
@@ -173,16 +171,10 @@ export const LiveAssistant: React.FC = () => {
|
|
| 173 |
// @ts-ignore
|
| 174 |
const AudioCtor = window.AudioContext || window.webkitAudioContext;
|
| 175 |
const ctx = new AudioCtor({ sampleRate: OUTPUT_SAMPLE_RATE });
|
| 176 |
-
|
| 177 |
const analyser = ctx.createAnalyser();
|
| 178 |
analyser.fftSize = 64;
|
| 179 |
-
|
| 180 |
-
const gain = ctx.createGain();
|
| 181 |
-
gain.connect(ctx.destination);
|
| 182 |
-
|
| 183 |
audioContextRef.current = ctx;
|
| 184 |
analyserRef.current = analyser;
|
| 185 |
-
gainNodeRef.current = gain;
|
| 186 |
}
|
| 187 |
if (audioContextRef.current.state === 'suspended') {
|
| 188 |
audioContextRef.current.resume();
|
|
@@ -198,9 +190,7 @@ export const LiveAssistant: React.FC = () => {
|
|
| 198 |
|
| 199 |
try {
|
| 200 |
initOutputAudioContext();
|
| 201 |
-
|
| 202 |
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
| 203 |
-
// Append User Info to ensure backend creates a distinct session context
|
| 204 |
const wsUrl = `${protocol}//${window.location.host}/ws/live?userId=${user._id}&username=${encodeURIComponent(user.username)}`;
|
| 205 |
|
| 206 |
console.log("Connecting to", wsUrl);
|
|
@@ -256,9 +246,9 @@ export const LiveAssistant: React.FC = () => {
|
|
| 256 |
const source = ctx.createBufferSource();
|
| 257 |
source.buffer = buffer;
|
| 258 |
|
| 259 |
-
if (analyserRef.current
|
| 260 |
source.connect(analyserRef.current);
|
| 261 |
-
analyserRef.current.connect(
|
| 262 |
} else {
|
| 263 |
source.connect(ctx.destination);
|
| 264 |
}
|
|
@@ -291,41 +281,55 @@ export const LiveAssistant: React.FC = () => {
|
|
| 291 |
if (status !== 'CONNECTED' && status !== 'SPEAKING') return;
|
| 292 |
|
| 293 |
try {
|
| 294 |
-
|
| 295 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
}
|
| 297 |
|
| 298 |
-
isRecordingRef.current = true; // Mark as recording
|
| 299 |
-
|
| 300 |
-
const stream = await navigator.mediaDevices.getUserMedia({ audio: {
|
| 301 |
-
sampleRate: INPUT_SAMPLE_RATE,
|
| 302 |
-
channelCount: 1,
|
| 303 |
-
echoCancellation: true,
|
| 304 |
-
autoGainControl: true,
|
| 305 |
-
noiseSuppression: true
|
| 306 |
-
}});
|
| 307 |
mediaStreamRef.current = stream;
|
| 308 |
|
| 309 |
-
// Use a new context for input to ensure 16k rate if browser supports specific ctx rate
|
| 310 |
// @ts-ignore
|
| 311 |
const AudioCtor = window.AudioContext || window.webkitAudioContext;
|
| 312 |
const ctx = new AudioCtor({ sampleRate: INPUT_SAMPLE_RATE });
|
| 313 |
inputAudioContextRef.current = ctx;
|
|
|
|
| 314 |
|
| 315 |
const source = ctx.createMediaStreamSource(stream);
|
| 316 |
const processor = ctx.createScriptProcessor(4096, 1, 1);
|
| 317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
processor.onaudioprocess = (e) => {
|
| 319 |
-
|
| 320 |
-
if (!isRecordingRef.current) return;
|
| 321 |
|
| 322 |
const inputData = e.inputBuffer.getChannelData(0);
|
| 323 |
const l = inputData.length;
|
| 324 |
const int16Data = new Int16Array(l);
|
| 325 |
for (let i = 0; i < l; i++) {
|
| 326 |
-
int16Data[i] = inputData[i] *
|
| 327 |
}
|
| 328 |
|
|
|
|
| 329 |
let binary = '';
|
| 330 |
const bytes = new Uint8Array(int16Data.buffer);
|
| 331 |
const len = bytes.byteLength;
|
|
@@ -341,29 +345,25 @@ export const LiveAssistant: React.FC = () => {
|
|
| 341 |
}));
|
| 342 |
}
|
| 343 |
};
|
| 344 |
-
|
| 345 |
-
source.connect(processor);
|
| 346 |
-
processor.connect(ctx.destination);
|
| 347 |
|
| 348 |
sourceNodeRef.current = source;
|
| 349 |
processorRef.current = processor;
|
|
|
|
| 350 |
setStatus('LISTENING');
|
| 351 |
setTranscript('正在聆听...');
|
| 352 |
|
| 353 |
} catch (e) {
|
| 354 |
console.error(e);
|
| 355 |
-
|
| 356 |
setTranscript('无法访问麦克风');
|
| 357 |
}
|
| 358 |
};
|
| 359 |
|
| 360 |
const stopRecording = () => {
|
| 361 |
-
|
| 362 |
-
isRecordingRef.current = false; // Stop processing loop immediately
|
| 363 |
|
| 364 |
// Cleanup Mic Processing
|
| 365 |
if (processorRef.current) {
|
| 366 |
-
processorRef.current.onaudioprocess = null; // Detach handler
|
| 367 |
processorRef.current.disconnect();
|
| 368 |
processorRef.current = null;
|
| 369 |
}
|
|
@@ -375,14 +375,15 @@ export const LiveAssistant: React.FC = () => {
|
|
| 375 |
mediaStreamRef.current.getTracks().forEach(t => t.stop());
|
| 376 |
mediaStreamRef.current = null;
|
| 377 |
}
|
| 378 |
-
// Force close input context to ensure hardware releases
|
| 379 |
if (inputAudioContextRef.current) {
|
| 380 |
inputAudioContextRef.current.close().catch(()=>{});
|
| 381 |
inputAudioContextRef.current = null;
|
| 382 |
}
|
| 383 |
|
| 384 |
-
|
| 385 |
-
|
|
|
|
|
|
|
| 386 |
};
|
| 387 |
|
| 388 |
const handleDisconnect = () => {
|
|
@@ -437,7 +438,7 @@ export const LiveAssistant: React.FC = () => {
|
|
| 437 |
</div>
|
| 438 |
<div className="flex gap-2">
|
| 439 |
<button onClick={handleDisconnect} title="重置" className="hover:bg-white/10 p-1.5 rounded-full text-gray-400 hover:text-white transition-colors cursor-pointer" onMouseDown={e=>e.stopPropagation()}><RefreshCw size={16}/></button>
|
| 440 |
-
<button onClick={
|
| 441 |
</div>
|
| 442 |
</div>
|
| 443 |
|
|
|
|
| 1 |
|
| 2 |
import React, { useState, useRef, useEffect } from 'react';
|
| 3 |
+
import { Mic, Power, Loader2, Bot, Volume2, Radio, RefreshCw, ChevronDown } from 'lucide-react';
|
| 4 |
import { api } from '../services/api';
|
| 5 |
|
| 6 |
// --- Audio Types & Helpers ---
|
|
|
|
| 28 |
const containerRef = useRef<HTMLDivElement>(null);
|
| 29 |
const dragRef = useRef({ isDragging: false, startX: 0, startY: 0, initialLeft: 0, initialTop: 0 });
|
| 30 |
const hasMovedRef = useRef(false);
|
| 31 |
+
const prevButtonPos = useRef<{x: number, y: number} | null>(null); // Store pos before expand
|
| 32 |
|
| 33 |
+
// Audio Refs
|
| 34 |
const audioContextRef = useRef<AudioContext | null>(null); // Output Context
|
| 35 |
+
const inputAudioContextRef = useRef<AudioContext | null>(null); // Input Context
|
| 36 |
|
| 37 |
const mediaStreamRef = useRef<MediaStream | null>(null);
|
| 38 |
const processorRef = useRef<ScriptProcessorNode | null>(null);
|
| 39 |
const sourceNodeRef = useRef<MediaStreamAudioSourceNode | null>(null);
|
|
|
|
| 40 |
|
| 41 |
const wsRef = useRef<WebSocket | null>(null);
|
| 42 |
const nextPlayTimeRef = useRef<number>(0);
|
| 43 |
const analyserRef = useRef<AnalyserNode | null>(null);
|
| 44 |
const volumeIntervalRef = useRef<any>(null);
|
| 45 |
|
| 46 |
+
// State Refs for async safety
|
| 47 |
+
const isRecordingIntentRef = useRef(false);
|
| 48 |
|
| 49 |
useEffect(() => {
|
| 50 |
if (!isOpen) {
|
| 51 |
handleDisconnect();
|
| 52 |
}
|
| 53 |
+
// Boundary check on open
|
| 54 |
if (position && containerRef.current) {
|
| 55 |
const { innerWidth, innerHeight } = window;
|
| 56 |
const rect = containerRef.current.getBoundingClientRect();
|
|
|
|
| 65 |
};
|
| 66 |
}, [isOpen]);
|
| 67 |
|
| 68 |
+
// Drag Logic
|
| 69 |
useEffect(() => {
|
| 70 |
const handleMove = (e: MouseEvent | TouchEvent) => {
|
| 71 |
if (!dragRef.current.isDragging) return;
|
|
|
|
| 112 |
};
|
| 113 |
}, []);
|
| 114 |
|
| 115 |
+
// Volume Visualizer
|
| 116 |
useEffect(() => {
|
| 117 |
if (status === 'DISCONNECTED') {
|
| 118 |
setVolumeLevel(0);
|
|
|
|
| 131 |
|
| 132 |
const handleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
|
| 133 |
if (!containerRef.current) return;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
const clientX = 'touches' in e ? e.touches[0].clientX : (e as React.MouseEvent).clientX;
|
| 135 |
const clientY = 'touches' in e ? e.touches[0].clientY : (e as React.MouseEvent).clientY;
|
|
|
|
| 136 |
const rect = containerRef.current.getBoundingClientRect();
|
| 137 |
|
|
|
|
| 138 |
if (!position) {
|
| 139 |
setPosition({ x: rect.left, y: rect.top });
|
| 140 |
+
dragRef.current = { isDragging: true, startX: clientX, startY: clientY, initialLeft: rect.left, initialTop: rect.top };
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
} else {
|
| 142 |
+
dragRef.current = { isDragging: true, startX: clientX, startY: clientY, initialLeft: position.x, initialTop: position.y };
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
}
|
|
|
|
| 144 |
hasMovedRef.current = false;
|
| 145 |
document.body.style.userSelect = 'none';
|
| 146 |
};
|
| 147 |
|
| 148 |
const handleToggleOpen = () => {
|
| 149 |
if (!hasMovedRef.current) {
|
| 150 |
+
if (!isOpen) {
|
| 151 |
+
// Opening: Save current position as "button position"
|
| 152 |
+
if (position) prevButtonPos.current = position;
|
| 153 |
+
setIsOpen(true);
|
| 154 |
+
} else {
|
| 155 |
+
// Closing (via button click, though typically via minimize button)
|
| 156 |
+
setIsOpen(false);
|
| 157 |
+
}
|
| 158 |
+
}
|
| 159 |
+
};
|
| 160 |
+
|
| 161 |
+
const handleMinimize = () => {
|
| 162 |
+
setIsOpen(false);
|
| 163 |
+
// Restore previous button position if it exists, otherwise leave it where it is (or reset)
|
| 164 |
+
if (prevButtonPos.current) {
|
| 165 |
+
setPosition(prevButtonPos.current);
|
| 166 |
}
|
| 167 |
};
|
| 168 |
|
|
|
|
| 171 |
// @ts-ignore
|
| 172 |
const AudioCtor = window.AudioContext || window.webkitAudioContext;
|
| 173 |
const ctx = new AudioCtor({ sampleRate: OUTPUT_SAMPLE_RATE });
|
|
|
|
| 174 |
const analyser = ctx.createAnalyser();
|
| 175 |
analyser.fftSize = 64;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
audioContextRef.current = ctx;
|
| 177 |
analyserRef.current = analyser;
|
|
|
|
| 178 |
}
|
| 179 |
if (audioContextRef.current.state === 'suspended') {
|
| 180 |
audioContextRef.current.resume();
|
|
|
|
| 190 |
|
| 191 |
try {
|
| 192 |
initOutputAudioContext();
|
|
|
|
| 193 |
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
|
|
|
| 194 |
const wsUrl = `${protocol}//${window.location.host}/ws/live?userId=${user._id}&username=${encodeURIComponent(user.username)}`;
|
| 195 |
|
| 196 |
console.log("Connecting to", wsUrl);
|
|
|
|
| 246 |
const source = ctx.createBufferSource();
|
| 247 |
source.buffer = buffer;
|
| 248 |
|
| 249 |
+
if (analyserRef.current) {
|
| 250 |
source.connect(analyserRef.current);
|
| 251 |
+
analyserRef.current.connect(ctx.destination);
|
| 252 |
} else {
|
| 253 |
source.connect(ctx.destination);
|
| 254 |
}
|
|
|
|
| 281 |
if (status !== 'CONNECTED' && status !== 'SPEAKING') return;
|
| 282 |
|
| 283 |
try {
|
| 284 |
+
isRecordingIntentRef.current = true;
|
| 285 |
+
if (status === 'SPEAKING') setStatus('CONNECTED');
|
| 286 |
+
|
| 287 |
+
const stream = await navigator.mediaDevices.getUserMedia({
|
| 288 |
+
audio: {
|
| 289 |
+
sampleRate: INPUT_SAMPLE_RATE,
|
| 290 |
+
channelCount: 1,
|
| 291 |
+
echoCancellation: true,
|
| 292 |
+
autoGainControl: true,
|
| 293 |
+
noiseSuppression: true
|
| 294 |
+
}
|
| 295 |
+
});
|
| 296 |
+
|
| 297 |
+
// Safety check: if user released mouse while getting stream
|
| 298 |
+
if (!isRecordingIntentRef.current) {
|
| 299 |
+
stream.getTracks().forEach(t => t.stop());
|
| 300 |
+
return;
|
| 301 |
}
|
| 302 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
mediaStreamRef.current = stream;
|
| 304 |
|
|
|
|
| 305 |
// @ts-ignore
|
| 306 |
const AudioCtor = window.AudioContext || window.webkitAudioContext;
|
| 307 |
const ctx = new AudioCtor({ sampleRate: INPUT_SAMPLE_RATE });
|
| 308 |
inputAudioContextRef.current = ctx;
|
| 309 |
+
await ctx.resume();
|
| 310 |
|
| 311 |
const source = ctx.createMediaStreamSource(stream);
|
| 312 |
const processor = ctx.createScriptProcessor(4096, 1, 1);
|
| 313 |
|
| 314 |
+
// Create a mute gain node to prevent feedback while keeping the processor alive
|
| 315 |
+
const muteGain = ctx.createGain();
|
| 316 |
+
muteGain.gain.value = 0;
|
| 317 |
+
|
| 318 |
+
source.connect(processor);
|
| 319 |
+
processor.connect(muteGain);
|
| 320 |
+
muteGain.connect(ctx.destination);
|
| 321 |
+
|
| 322 |
processor.onaudioprocess = (e) => {
|
| 323 |
+
if (!isRecordingIntentRef.current) return;
|
|
|
|
| 324 |
|
| 325 |
const inputData = e.inputBuffer.getChannelData(0);
|
| 326 |
const l = inputData.length;
|
| 327 |
const int16Data = new Int16Array(l);
|
| 328 |
for (let i = 0; i < l; i++) {
|
| 329 |
+
int16Data[i] = Math.max(-1, Math.min(1, inputData[i])) * 32767; // Clamp and convert
|
| 330 |
}
|
| 331 |
|
| 332 |
+
// Raw PCM -> Base64
|
| 333 |
let binary = '';
|
| 334 |
const bytes = new Uint8Array(int16Data.buffer);
|
| 335 |
const len = bytes.byteLength;
|
|
|
|
| 345 |
}));
|
| 346 |
}
|
| 347 |
};
|
|
|
|
|
|
|
|
|
|
| 348 |
|
| 349 |
sourceNodeRef.current = source;
|
| 350 |
processorRef.current = processor;
|
| 351 |
+
|
| 352 |
setStatus('LISTENING');
|
| 353 |
setTranscript('正在聆听...');
|
| 354 |
|
| 355 |
} catch (e) {
|
| 356 |
console.error(e);
|
| 357 |
+
isRecordingIntentRef.current = false;
|
| 358 |
setTranscript('无法访问麦克风');
|
| 359 |
}
|
| 360 |
};
|
| 361 |
|
| 362 |
const stopRecording = () => {
|
| 363 |
+
isRecordingIntentRef.current = false;
|
|
|
|
| 364 |
|
| 365 |
// Cleanup Mic Processing
|
| 366 |
if (processorRef.current) {
|
|
|
|
| 367 |
processorRef.current.disconnect();
|
| 368 |
processorRef.current = null;
|
| 369 |
}
|
|
|
|
| 375 |
mediaStreamRef.current.getTracks().forEach(t => t.stop());
|
| 376 |
mediaStreamRef.current = null;
|
| 377 |
}
|
|
|
|
| 378 |
if (inputAudioContextRef.current) {
|
| 379 |
inputAudioContextRef.current.close().catch(()=>{});
|
| 380 |
inputAudioContextRef.current = null;
|
| 381 |
}
|
| 382 |
|
| 383 |
+
if (status === 'LISTENING') {
|
| 384 |
+
setStatus('THINKING');
|
| 385 |
+
setTranscript('思考中...');
|
| 386 |
+
}
|
| 387 |
};
|
| 388 |
|
| 389 |
const handleDisconnect = () => {
|
|
|
|
| 438 |
</div>
|
| 439 |
<div className="flex gap-2">
|
| 440 |
<button onClick={handleDisconnect} title="重置" className="hover:bg-white/10 p-1.5 rounded-full text-gray-400 hover:text-white transition-colors cursor-pointer" onMouseDown={e=>e.stopPropagation()}><RefreshCw size={16}/></button>
|
| 441 |
+
<button onClick={handleMinimize} title="最小化" className="hover:bg-white/10 p-1.5 rounded-full text-gray-400 hover:text-white transition-colors cursor-pointer" onMouseDown={e=>e.stopPropagation()}><ChevronDown size={20}/></button>
|
| 442 |
</div>
|
| 443 |
</div>
|
| 444 |
|