dvc890 commited on
Commit
5320eb5
·
verified ·
1 Parent(s): 7d2c997

Upload 54 files

Browse files
Files changed (1) hide show
  1. components/LiveVoiceFab.tsx +72 -70
components/LiveVoiceFab.tsx CHANGED
@@ -100,8 +100,6 @@ export const LiveVoiceFab: React.FC = () => {
100
  if (!key) throw new Error("无法获取 AI 配置");
101
 
102
  const ai = new GoogleGenAI({ apiKey: key });
103
-
104
- // Use the model mentioned (mapped to the standard preview)
105
  const model = 'gemini-2.5-flash-native-audio-preview-09-2025';
106
 
107
  const session = await ai.live.connect({
@@ -112,16 +110,72 @@ export const LiveVoiceFab: React.FC = () => {
112
  systemInstruction: { parts: [{ text: "你是一位友善的校园助手。请用简短、口语化的中文回答。请在回答中包含文字转录。" }] },
113
  outputAudioTranscription: { } // Enable text output
114
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  });
116
 
117
  sessionRef.current = session;
118
- setIsConnected(true);
119
-
120
- // Welcome message
121
- setMessages(prev => [...prev, { id: 'sys-start', role: 'model', text: '已连接!按住按钮说话。' }]);
122
-
123
- // Listen for messages
124
- receiveLoop(session);
125
 
126
  } catch (e: any) {
127
  console.error(e);
@@ -132,8 +186,13 @@ export const LiveVoiceFab: React.FC = () => {
132
 
133
  const disconnectSession = async () => {
134
  if (sessionRef.current) {
135
- // Typically session closure is handled by the server or object cleanup
136
- sessionRef.current = null;
 
 
 
 
 
137
  }
138
  setIsConnected(false);
139
  setIsRecording(false);
@@ -141,71 +200,14 @@ export const LiveVoiceFab: React.FC = () => {
141
  // Stop audio input
142
  if (sourceNodeRef.current) sourceNodeRef.current.disconnect();
143
  if (processorRef.current) processorRef.current.disconnect();
 
 
144
 
145
  // Stop audio output
146
  activeSourcesRef.current.forEach(s => s.stop());
147
  activeSourcesRef.current.clear();
148
  };
149
 
150
- const receiveLoop = async (session: any) => {
151
- try {
152
- for await (const msg of session.receive()) {
153
- const message = msg as LiveServerMessage;
154
-
155
- // 1. Handle Text (Transcription)
156
- const transcript = message.serverContent?.modelTurn?.parts?.find(p => p.text)?.text;
157
- if (transcript) {
158
- setMessages(prev => {
159
- const last = prev[prev.length - 1];
160
- if (last && last.role === 'model' && !last.text.endsWith('\n')) {
161
- // Append to streaming message
162
- return [...prev.slice(0, -1), { ...last, text: last.text + transcript }];
163
- }
164
- return [...prev, { id: Date.now().toString(), role: 'model', text: transcript }];
165
- });
166
- }
167
-
168
- // 2. Handle User Transcription (Echo)
169
- if (message.serverContent?.interrupted) {
170
- // Clear queue if interrupted
171
- activeSourcesRef.current.forEach(s => s.stop());
172
- activeSourcesRef.current.clear();
173
- nextStartTimeRef.current = 0;
174
- setIsAiSpeaking(false);
175
- }
176
-
177
- // 3. Handle Audio Output
178
- const audioData = message.serverContent?.modelTurn?.parts?.find(p => p.inlineData)?.inlineData?.data;
179
- if (audioData && audioContextRef.current) {
180
- setIsAiSpeaking(true);
181
- const ctx = audioContextRef.current;
182
- const buffer = await decodeAudioData(audioData, ctx);
183
-
184
- const source = ctx.createBufferSource();
185
- source.buffer = buffer;
186
- source.connect(ctx.destination);
187
-
188
- // Scheduling
189
- const currentTime = ctx.currentTime;
190
- if (nextStartTimeRef.current < currentTime) {
191
- nextStartTimeRef.current = currentTime;
192
- }
193
- source.start(nextStartTimeRef.current);
194
- nextStartTimeRef.current += buffer.duration;
195
-
196
- activeSourcesRef.current.add(source);
197
- source.onended = () => {
198
- activeSourcesRef.current.delete(source);
199
- if (activeSourcesRef.current.size === 0) setIsAiSpeaking(false);
200
- };
201
- }
202
- }
203
- } catch (e) {
204
- console.log("Session ended or error", e);
205
- setIsConnected(false);
206
- }
207
- };
208
-
209
  const startRecording = async () => {
210
  if (!isConnected || !sessionRef.current || !inputContextRef.current) return;
211
  setIsRecording(true);
 
100
  if (!key) throw new Error("无法获取 AI 配置");
101
 
102
  const ai = new GoogleGenAI({ apiKey: key });
 
 
103
  const model = 'gemini-2.5-flash-native-audio-preview-09-2025';
104
 
105
  const session = await ai.live.connect({
 
110
  systemInstruction: { parts: [{ text: "你是一位友善的校园助手。请用简短、口语化的中文回答。请在回答中包含文字转录。" }] },
111
  outputAudioTranscription: { } // Enable text output
112
  },
113
+ callbacks: {
114
+ onopen: () => {
115
+ setIsConnected(true);
116
+ setMessages(prev => [...prev, { id: 'sys-start', role: 'model', text: '已连接!按住按钮说话。' }]);
117
+ },
118
+ onmessage: async (message: LiveServerMessage) => {
119
+ // 1. Handle Text (Transcription)
120
+ const transcript = message.serverContent?.modelTurn?.parts?.find(p => p.text)?.text;
121
+ if (transcript) {
122
+ setMessages(prev => {
123
+ const last = prev[prev.length - 1];
124
+ if (last && last.role === 'model' && !last.text.endsWith('\n') && last.id !== 'sys-start') {
125
+ // Append to streaming message
126
+ return [...prev.slice(0, -1), { ...last, text: last.text + transcript }];
127
+ }
128
+ return [...prev, { id: Date.now().toString(), role: 'model', text: transcript }];
129
+ });
130
+ }
131
+
132
+ // 2. Handle User Transcription (Echo) & Interruption
133
+ if (message.serverContent?.interrupted) {
134
+ activeSourcesRef.current.forEach(s => s.stop());
135
+ activeSourcesRef.current.clear();
136
+ nextStartTimeRef.current = 0;
137
+ setIsAiSpeaking(false);
138
+ }
139
+
140
+ // 3. Handle Audio Output
141
+ const audioData = message.serverContent?.modelTurn?.parts?.find(p => p.inlineData)?.inlineData?.data;
142
+ if (audioData && audioContextRef.current) {
143
+ setIsAiSpeaking(true);
144
+ const ctx = audioContextRef.current;
145
+ const buffer = await decodeAudioData(audioData, ctx);
146
+
147
+ const source = ctx.createBufferSource();
148
+ source.buffer = buffer;
149
+ source.connect(ctx.destination);
150
+
151
+ // Scheduling
152
+ const currentTime = ctx.currentTime;
153
+ if (nextStartTimeRef.current < currentTime) {
154
+ nextStartTimeRef.current = currentTime;
155
+ }
156
+ source.start(nextStartTimeRef.current);
157
+ nextStartTimeRef.current += buffer.duration;
158
+
159
+ activeSourcesRef.current.add(source);
160
+ source.onended = () => {
161
+ activeSourcesRef.current.delete(source);
162
+ if (activeSourcesRef.current.size === 0) setIsAiSpeaking(false);
163
+ };
164
+ }
165
+ },
166
+ onclose: () => {
167
+ setIsConnected(false);
168
+ setIsRecording(false);
169
+ },
170
+ onerror: (e) => {
171
+ console.error("Live API Error:", e);
172
+ setError("连接断开,请重试");
173
+ setIsConnected(false);
174
+ }
175
+ }
176
  });
177
 
178
  sessionRef.current = session;
 
 
 
 
 
 
 
179
 
180
  } catch (e: any) {
181
  console.error(e);
 
186
 
187
  const disconnectSession = async () => {
188
  if (sessionRef.current) {
189
+ // Typically session closure is handled by the server or object cleanup
190
+ // But we can try to close if method exists
191
+ try {
192
+ // @ts-ignore
193
+ if(typeof sessionRef.current.close === 'function') sessionRef.current.close();
194
+ } catch(e) {}
195
+ sessionRef.current = null;
196
  }
197
  setIsConnected(false);
198
  setIsRecording(false);
 
200
  // Stop audio input
201
  if (sourceNodeRef.current) sourceNodeRef.current.disconnect();
202
  if (processorRef.current) processorRef.current.disconnect();
203
+ sourceNodeRef.current = null;
204
+ processorRef.current = null;
205
 
206
  // Stop audio output
207
  activeSourcesRef.current.forEach(s => s.stop());
208
  activeSourcesRef.current.clear();
209
  };
210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  const startRecording = async () => {
212
  if (!isConnected || !sessionRef.current || !inputContextRef.current) return;
213
  setIsRecording(true);