dvc890 commited on
Commit
c3a4bf0
·
verified ·
1 Parent(s): 8eb1b37

Delete components/LiveVoiceFab.tsx

Browse files
Files changed (1) hide show
  1. components/LiveVoiceFab.tsx +0 -372
components/LiveVoiceFab.tsx DELETED
@@ -1,372 +0,0 @@
1
-
2
- import React, { useState, useRef, useEffect } from 'react';
3
- import { Bot, Mic, X, MessageSquare, Loader2, Volume2, Power, Minimize2 } from 'lucide-react';
4
- import { GoogleGenAI, LiveServerMessage, Modality } from '@google/genai';
5
- import { api } from '../services/api';
6
- import { Toast } from './Toast';
7
-
8
- // --- Audio Utils (Specific for Live API PCM) ---
9
- const decodeAudioData = async (
10
- base64String: string,
11
- ctx: AudioContext,
12
- sampleRate: number = 24000
13
- ): Promise<AudioBuffer> => {
14
- const binaryString = atob(base64String);
15
- const len = binaryString.length;
16
- const bytes = new Uint8Array(len);
17
- for (let i = 0; i < len; i++) {
18
- bytes[i] = binaryString.charCodeAt(i);
19
- }
20
-
21
- // Convert Int16 PCM to Float32
22
- const int16Data = new Int16Array(bytes.buffer);
23
- const float32Data = new Float32Array(int16Data.length);
24
- for (let i = 0; i < int16Data.length; i++) {
25
- float32Data[i] = int16Data[i] / 32768.0;
26
- }
27
-
28
- const buffer = ctx.createBuffer(1, float32Data.length, sampleRate);
29
- buffer.copyToChannel(float32Data, 0);
30
- return buffer;
31
- };
32
-
33
- const createPCMBlob = (data: Float32Array): { data: string, mimeType: string } => {
34
- const l = data.length;
35
- const int16 = new Int16Array(l);
36
- for (let i = 0; i < l; i++) {
37
- int16[i] = Math.max(-1, Math.min(1, data[i])) * 32768;
38
- }
39
- const bytes = new Uint8Array(int16.buffer);
40
- let binary = '';
41
- const len = bytes.byteLength;
42
- for (let i = 0; i < len; i++) {
43
- binary += String.fromCharCode(bytes[i]);
44
- }
45
- return {
46
- data: btoa(binary),
47
- mimeType: 'audio/pcm;rate=16000', // Client input is usually 16k
48
- };
49
- };
50
-
51
- interface ChatMessage {
52
- id: string;
53
- role: 'user' | 'model';
54
- text: string;
55
- }
56
-
57
- export const LiveVoiceFab: React.FC = () => {
58
- const [isOpen, setIsOpen] = useState(false);
59
- const [isConnected, setIsConnected] = useState(false);
60
- const [isRecording, setIsRecording] = useState(false);
61
- const [isAiSpeaking, setIsAiSpeaking] = useState(false);
62
- const [messages, setMessages] = useState<ChatMessage[]>([]);
63
- const [error, setError] = useState<string | null>(null);
64
-
65
- // Refs for Audio & Session
66
- const sessionRef = useRef<any>(null);
67
- const audioContextRef = useRef<AudioContext | null>(null);
68
- const inputContextRef = useRef<AudioContext | null>(null);
69
- const nextStartTimeRef = useRef<number>(0);
70
- const sourceNodeRef = useRef<MediaStreamAudioSourceNode | null>(null);
71
- const processorRef = useRef<ScriptProcessorNode | null>(null);
72
- const activeSourcesRef = useRef<Set<AudioBufferSourceNode>>(new Set());
73
- const messagesEndRef = useRef<HTMLDivElement>(null);
74
-
75
- const currentUser = api.auth.getCurrentUser();
76
- const hasAccess = currentUser?.aiAccess || currentUser?.role === 'ADMIN';
77
-
78
- useEffect(() => {
79
- if (isOpen && !audioContextRef.current) {
80
- // @ts-ignore
81
- const AudioCtor = window.AudioContext || window.webkitAudioContext;
82
- audioContextRef.current = new AudioCtor({ sampleRate: 24000 });
83
- inputContextRef.current = new AudioCtor({ sampleRate: 16000 });
84
- }
85
- // Scroll to bottom
86
- messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
87
- }, [isOpen, messages]);
88
-
89
- // Clean up on unmount
90
- useEffect(() => {
91
- return () => {
92
- disconnectSession();
93
- };
94
- }, []);
95
-
96
- const connectSession = async () => {
97
- setError(null);
98
- try {
99
- const { key } = await api.ai.getKey();
100
- if (!key) throw new Error("无法获取 AI 配置");
101
-
102
- const ai = new GoogleGenAI({ apiKey: key });
103
- const model = 'gemini-2.5-flash-native-audio-preview-09-2025';
104
-
105
- const session = await ai.live.connect({
106
- model,
107
- config: {
108
- responseModalities: [Modality.AUDIO],
109
- speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Kore' } } },
110
- systemInstruction: { parts: [{ text: "你是一位友善的校园助手。请用简短、口语化的中文回答。请在回答中包含文字转录。" }] },
111
- outputAudioTranscription: { } // Enable text output
112
- },
113
- callbacks: {
114
- onopen: () => {
115
- setIsConnected(true);
116
- setMessages(prev => [...prev, { id: 'sys-start', role: 'model', text: '已连接!按住按钮说话。' }]);
117
- },
118
- onmessage: async (message: LiveServerMessage) => {
119
- // 1. Handle Text (Transcription)
120
- const transcript = message.serverContent?.modelTurn?.parts?.find(p => p.text)?.text;
121
- if (transcript) {
122
- setMessages(prev => {
123
- const last = prev[prev.length - 1];
124
- if (last && last.role === 'model' && !last.text.endsWith('\n') && last.id !== 'sys-start') {
125
- // Append to streaming message
126
- return [...prev.slice(0, -1), { ...last, text: last.text + transcript }];
127
- }
128
- return [...prev, { id: Date.now().toString(), role: 'model', text: transcript }];
129
- });
130
- }
131
-
132
- // 2. Handle User Transcription (Echo) & Interruption
133
- if (message.serverContent?.interrupted) {
134
- activeSourcesRef.current.forEach(s => s.stop());
135
- activeSourcesRef.current.clear();
136
- nextStartTimeRef.current = 0;
137
- setIsAiSpeaking(false);
138
- }
139
-
140
- // 3. Handle Audio Output
141
- const audioData = message.serverContent?.modelTurn?.parts?.find(p => p.inlineData)?.inlineData?.data;
142
- if (audioData && audioContextRef.current) {
143
- setIsAiSpeaking(true);
144
- const ctx = audioContextRef.current;
145
- const buffer = await decodeAudioData(audioData, ctx);
146
-
147
- const source = ctx.createBufferSource();
148
- source.buffer = buffer;
149
- source.connect(ctx.destination);
150
-
151
- // Scheduling
152
- const currentTime = ctx.currentTime;
153
- if (nextStartTimeRef.current < currentTime) {
154
- nextStartTimeRef.current = currentTime;
155
- }
156
- source.start(nextStartTimeRef.current);
157
- nextStartTimeRef.current += buffer.duration;
158
-
159
- activeSourcesRef.current.add(source);
160
- source.onended = () => {
161
- activeSourcesRef.current.delete(source);
162
- if (activeSourcesRef.current.size === 0) setIsAiSpeaking(false);
163
- };
164
- }
165
- },
166
- onclose: () => {
167
- setIsConnected(false);
168
- setIsRecording(false);
169
- },
170
- onerror: (e) => {
171
- console.error("Live API Error:", e);
172
- setError("连接断开,请重试");
173
- setIsConnected(false);
174
- }
175
- }
176
- });
177
-
178
- sessionRef.current = session;
179
-
180
- } catch (e: any) {
181
- console.error(e);
182
- setError(e.message || "连接失败");
183
- setIsConnected(false);
184
- }
185
- };
186
-
187
- const disconnectSession = async () => {
188
- if (sessionRef.current) {
189
- // Typically session closure is handled by the server or object cleanup
190
- // But we can try to close if method exists
191
- try {
192
- // @ts-ignore
193
- if(typeof sessionRef.current.close === 'function') sessionRef.current.close();
194
- } catch(e) {}
195
- sessionRef.current = null;
196
- }
197
- setIsConnected(false);
198
- setIsRecording(false);
199
-
200
- // Stop audio input
201
- if (sourceNodeRef.current) sourceNodeRef.current.disconnect();
202
- if (processorRef.current) processorRef.current.disconnect();
203
- sourceNodeRef.current = null;
204
- processorRef.current = null;
205
-
206
- // Stop audio output
207
- activeSourcesRef.current.forEach(s => s.stop());
208
- activeSourcesRef.current.clear();
209
- };
210
-
211
- const startRecording = async () => {
212
- if (!isConnected || !sessionRef.current || !inputContextRef.current) return;
213
- setIsRecording(true);
214
-
215
- // Interrupt AI if speaking
216
- // We can send an empty text to interrupt or rely on VAD, but explicit clear is safer for UI
217
- activeSourcesRef.current.forEach(s => s.stop());
218
- activeSourcesRef.current.clear();
219
- nextStartTimeRef.current = 0;
220
- setIsAiSpeaking(false);
221
-
222
- try {
223
- const ctx = inputContextRef.current;
224
- if (ctx.state === 'suspended') await ctx.resume();
225
-
226
- const stream = await navigator.mediaDevices.getUserMedia({ audio: {
227
- sampleRate: 16000,
228
- channelCount: 1,
229
- echoCancellation: true
230
- }});
231
-
232
- const source = ctx.createMediaStreamSource(stream);
233
- // Using ScriptProcessor for raw PCM access (AudioWorklet is better but more complex to setup in a single file)
234
- const processor = ctx.createScriptProcessor(4096, 1, 1);
235
-
236
- processor.onaudioprocess = (e) => {
237
- if (!sessionRef.current) return;
238
- const inputData = e.inputBuffer.getChannelData(0);
239
- const pcmBlob = createPCMBlob(inputData);
240
- sessionRef.current.sendRealtimeInput({ media: pcmBlob });
241
- };
242
-
243
- source.connect(processor);
244
- processor.connect(ctx.destination); // Required for script processor to run
245
-
246
- sourceNodeRef.current = source;
247
- processorRef.current = processor;
248
-
249
- } catch (e) {
250
- console.error("Mic error", e);
251
- setError("无法访问麦克风");
252
- setIsRecording(false);
253
- }
254
- };
255
-
256
- const stopRecording = () => {
257
- setIsRecording(false);
258
- if (sourceNodeRef.current) {
259
- sourceNodeRef.current.disconnect();
260
- sourceNodeRef.current = null;
261
- }
262
- if (processorRef.current) {
263
- processorRef.current.disconnect();
264
- processorRef.current = null;
265
- }
266
- // Note: We don't explicitly send "End of Turn", the model infers it from silence/VAD in Live API usually,
267
- // but stopping the stream is sufficient.
268
- };
269
-
270
- if (!hasAccess) return null;
271
-
272
- return (
273
- <>
274
- {/* Floating Button */}
275
- <button
276
- onClick={() => {
277
- setIsOpen(!isOpen);
278
- if (!isOpen && !isConnected) connectSession();
279
- }}
280
- className={`fixed bottom-6 right-6 z-[9990] w-14 h-14 rounded-full shadow-2xl flex items-center justify-center transition-all hover:scale-110 active:scale-95 ${isOpen ? 'bg-red-500 rotate-45' : 'bg-gradient-to-tr from-blue-600 to-indigo-600'}`}
281
- >
282
- {isOpen ? <X color="white" size={24}/> : <Bot color="white" size={28}/>}
283
- </button>
284
-
285
- {/* Floating Window */}
286
- {isOpen && (
287
- <div className="fixed bottom-24 right-6 z-[9990] w-80 md:w-96 bg-white rounded-2xl shadow-2xl border border-gray-100 flex flex-col overflow-hidden animate-in slide-in-from-bottom-10 fade-in duration-200" style={{height: '500px'}}>
288
-
289
- {/* Header */}
290
- <div className="p-4 bg-gradient-to-r from-blue-600 to-indigo-600 flex justify-between items-center shrink-0">
291
- <div className="flex items-center gap-2 text-white">
292
- <div className={`w-2 h-2 rounded-full ${isConnected ? 'bg-green-400 animate-pulse' : 'bg-red-400'}`}></div>
293
- <span className="font-bold text-sm">AI 实时语音 (Live)</span>
294
- </div>
295
- <div className="flex gap-2">
296
- <button onClick={disconnectSession} className="text-white/80 hover:text-white" title="重连/刷新">
297
- <Power size={16}/>
298
- </button>
299
- <button onClick={() => setIsOpen(false)} className="text-white/80 hover:text-white">
300
- <Minimize2 size={16}/>
301
- </button>
302
- </div>
303
- </div>
304
-
305
- {/* Chat Body */}
306
- <div className="flex-1 bg-gray-50 overflow-y-auto p-4 space-y-3 custom-scrollbar">
307
- {messages.length === 0 && isConnected && (
308
- <div className="text-center text-gray-400 text-xs mt-10">
309
- <p>已连接 Gemini Native Audio Dialog</p>
310
- <p>按住下方按钮开始对话</p>
311
- </div>
312
- )}
313
- {!isConnected && !error && (
314
- <div className="flex flex-col items-center justify-center h-full text-gray-400 gap-2">
315
- <Loader2 className="animate-spin text-blue-500" size={24}/>
316
- <span className="text-xs">正在建立加密连接...</span>
317
- </div>
318
- )}
319
- {error && (
320
- <div className="bg-red-50 text-red-600 p-3 rounded-lg text-xs text-center border border-red-100">
321
- {error}
322
- <button onClick={connectSession} className="block mx-auto mt-2 text-blue-600 underline">重试</button>
323
- </div>
324
- )}
325
- {messages.map((msg, idx) => (
326
- <div key={idx} className={`flex ${msg.role === 'user' ? 'justify-end' : 'justify-start'}`}>
327
- <div className={`max-w-[85%] px-3 py-2 rounded-xl text-sm ${msg.role === 'user' ? 'bg-blue-600 text-white rounded-tr-none' : 'bg-white border border-gray-200 text-gray-800 rounded-tl-none shadow-sm'}`}>
328
- {msg.text}
329
- </div>
330
- </div>
331
- ))}
332
- {isAiSpeaking && (
333
- <div className="flex justify-start">
334
- <div className="bg-white border border-gray-200 px-3 py-2 rounded-xl rounded-tl-none shadow-sm flex items-center gap-1 text-blue-600">
335
- <Volume2 size={14} className="animate-pulse"/>
336
- <div className="flex gap-0.5 items-end h-3">
337
- <div className="w-0.5 bg-blue-500 h-1 animate-[bounce_1s_infinite]"></div>
338
- <div className="w-0.5 bg-blue-500 h-2 animate-[bounce_1.2s_infinite]"></div>
339
- <div className="w-0.5 bg-blue-500 h-3 animate-[bounce_0.8s_infinite]"></div>
340
- </div>
341
- </div>
342
- </div>
343
- )}
344
- <div ref={messagesEndRef}></div>
345
- </div>
346
-
347
- {/* Controls */}
348
- <div className="p-4 bg-white border-t border-gray-100 shrink-0 flex flex-col items-center gap-2">
349
- <button
350
- disabled={!isConnected}
351
- onMouseDown={startRecording}
352
- onMouseUp={stopRecording}
353
- onMouseLeave={stopRecording}
354
- onTouchStart={startRecording}
355
- onTouchEnd={stopRecording}
356
- className={`w-full py-3 rounded-full font-bold text-white shadow-lg transition-all transform active:scale-95 flex items-center justify-center gap-2 ${
357
- isRecording
358
- ? 'bg-red-500 scale-105 ring-4 ring-red-100'
359
- : isConnected ? 'bg-blue-600 hover:bg-blue-700' : 'bg-gray-300 cursor-not-allowed'
360
- }`}
361
- >
362
- {isRecording ? <><div className="w-3 h-3 bg-white rounded-full animate-ping"></div> 松开发送</> : <><Mic size={18}/> 按住说话</>}
363
- </button>
364
- <div className="text-[10px] text-gray-400">
365
- Model: gemini-2.5-flash-native-audio
366
- </div>
367
- </div>
368
- </div>
369
- )}
370
- </>
371
- );
372
- };