Spaces:
Sleeping
Sleeping
Upload 54 files
Browse files- components/LiveVoiceFab.tsx +72 -70
components/LiveVoiceFab.tsx
CHANGED
|
@@ -100,8 +100,6 @@ export const LiveVoiceFab: React.FC = () => {
|
|
| 100 |
if (!key) throw new Error("无法获取 AI 配置");
|
| 101 |
|
| 102 |
const ai = new GoogleGenAI({ apiKey: key });
|
| 103 |
-
|
| 104 |
-
// Use the model mentioned (mapped to the standard preview)
|
| 105 |
const model = 'gemini-2.5-flash-native-audio-preview-09-2025';
|
| 106 |
|
| 107 |
const session = await ai.live.connect({
|
|
@@ -112,16 +110,72 @@ export const LiveVoiceFab: React.FC = () => {
|
|
| 112 |
systemInstruction: { parts: [{ text: "你是一位友善的校园助手。请用简短、口语化的中文回答。请在回答中包含文字转录。" }] },
|
| 113 |
outputAudioTranscription: { } // Enable text output
|
| 114 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
});
|
| 116 |
|
| 117 |
sessionRef.current = session;
|
| 118 |
-
setIsConnected(true);
|
| 119 |
-
|
| 120 |
-
// Welcome message
|
| 121 |
-
setMessages(prev => [...prev, { id: 'sys-start', role: 'model', text: '已连接!按住按钮说话。' }]);
|
| 122 |
-
|
| 123 |
-
// Listen for messages
|
| 124 |
-
receiveLoop(session);
|
| 125 |
|
| 126 |
} catch (e: any) {
|
| 127 |
console.error(e);
|
|
@@ -132,8 +186,13 @@ export const LiveVoiceFab: React.FC = () => {
|
|
| 132 |
|
| 133 |
const disconnectSession = async () => {
|
| 134 |
if (sessionRef.current) {
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
}
|
| 138 |
setIsConnected(false);
|
| 139 |
setIsRecording(false);
|
|
@@ -141,71 +200,14 @@ export const LiveVoiceFab: React.FC = () => {
|
|
| 141 |
// Stop audio input
|
| 142 |
if (sourceNodeRef.current) sourceNodeRef.current.disconnect();
|
| 143 |
if (processorRef.current) processorRef.current.disconnect();
|
|
|
|
|
|
|
| 144 |
|
| 145 |
// Stop audio output
|
| 146 |
activeSourcesRef.current.forEach(s => s.stop());
|
| 147 |
activeSourcesRef.current.clear();
|
| 148 |
};
|
| 149 |
|
| 150 |
-
const receiveLoop = async (session: any) => {
|
| 151 |
-
try {
|
| 152 |
-
for await (const msg of session.receive()) {
|
| 153 |
-
const message = msg as LiveServerMessage;
|
| 154 |
-
|
| 155 |
-
// 1. Handle Text (Transcription)
|
| 156 |
-
const transcript = message.serverContent?.modelTurn?.parts?.find(p => p.text)?.text;
|
| 157 |
-
if (transcript) {
|
| 158 |
-
setMessages(prev => {
|
| 159 |
-
const last = prev[prev.length - 1];
|
| 160 |
-
if (last && last.role === 'model' && !last.text.endsWith('\n')) {
|
| 161 |
-
// Append to streaming message
|
| 162 |
-
return [...prev.slice(0, -1), { ...last, text: last.text + transcript }];
|
| 163 |
-
}
|
| 164 |
-
return [...prev, { id: Date.now().toString(), role: 'model', text: transcript }];
|
| 165 |
-
});
|
| 166 |
-
}
|
| 167 |
-
|
| 168 |
-
// 2. Handle User Transcription (Echo)
|
| 169 |
-
if (message.serverContent?.interrupted) {
|
| 170 |
-
// Clear queue if interrupted
|
| 171 |
-
activeSourcesRef.current.forEach(s => s.stop());
|
| 172 |
-
activeSourcesRef.current.clear();
|
| 173 |
-
nextStartTimeRef.current = 0;
|
| 174 |
-
setIsAiSpeaking(false);
|
| 175 |
-
}
|
| 176 |
-
|
| 177 |
-
// 3. Handle Audio Output
|
| 178 |
-
const audioData = message.serverContent?.modelTurn?.parts?.find(p => p.inlineData)?.inlineData?.data;
|
| 179 |
-
if (audioData && audioContextRef.current) {
|
| 180 |
-
setIsAiSpeaking(true);
|
| 181 |
-
const ctx = audioContextRef.current;
|
| 182 |
-
const buffer = await decodeAudioData(audioData, ctx);
|
| 183 |
-
|
| 184 |
-
const source = ctx.createBufferSource();
|
| 185 |
-
source.buffer = buffer;
|
| 186 |
-
source.connect(ctx.destination);
|
| 187 |
-
|
| 188 |
-
// Scheduling
|
| 189 |
-
const currentTime = ctx.currentTime;
|
| 190 |
-
if (nextStartTimeRef.current < currentTime) {
|
| 191 |
-
nextStartTimeRef.current = currentTime;
|
| 192 |
-
}
|
| 193 |
-
source.start(nextStartTimeRef.current);
|
| 194 |
-
nextStartTimeRef.current += buffer.duration;
|
| 195 |
-
|
| 196 |
-
activeSourcesRef.current.add(source);
|
| 197 |
-
source.onended = () => {
|
| 198 |
-
activeSourcesRef.current.delete(source);
|
| 199 |
-
if (activeSourcesRef.current.size === 0) setIsAiSpeaking(false);
|
| 200 |
-
};
|
| 201 |
-
}
|
| 202 |
-
}
|
| 203 |
-
} catch (e) {
|
| 204 |
-
console.log("Session ended or error", e);
|
| 205 |
-
setIsConnected(false);
|
| 206 |
-
}
|
| 207 |
-
};
|
| 208 |
-
|
| 209 |
const startRecording = async () => {
|
| 210 |
if (!isConnected || !sessionRef.current || !inputContextRef.current) return;
|
| 211 |
setIsRecording(true);
|
|
|
|
| 100 |
if (!key) throw new Error("无法获取 AI 配置");
|
| 101 |
|
| 102 |
const ai = new GoogleGenAI({ apiKey: key });
|
|
|
|
|
|
|
| 103 |
const model = 'gemini-2.5-flash-native-audio-preview-09-2025';
|
| 104 |
|
| 105 |
const session = await ai.live.connect({
|
|
|
|
| 110 |
systemInstruction: { parts: [{ text: "你是一位友善的校园助手。请用简短、口语化的中文回答。请在回答中包含文字转录。" }] },
|
| 111 |
outputAudioTranscription: { } // Enable text output
|
| 112 |
},
|
| 113 |
+
callbacks: {
|
| 114 |
+
onopen: () => {
|
| 115 |
+
setIsConnected(true);
|
| 116 |
+
setMessages(prev => [...prev, { id: 'sys-start', role: 'model', text: '已连接!按住按钮说话。' }]);
|
| 117 |
+
},
|
| 118 |
+
onmessage: async (message: LiveServerMessage) => {
|
| 119 |
+
// 1. Handle Text (Transcription)
|
| 120 |
+
const transcript = message.serverContent?.modelTurn?.parts?.find(p => p.text)?.text;
|
| 121 |
+
if (transcript) {
|
| 122 |
+
setMessages(prev => {
|
| 123 |
+
const last = prev[prev.length - 1];
|
| 124 |
+
if (last && last.role === 'model' && !last.text.endsWith('\n') && last.id !== 'sys-start') {
|
| 125 |
+
// Append to streaming message
|
| 126 |
+
return [...prev.slice(0, -1), { ...last, text: last.text + transcript }];
|
| 127 |
+
}
|
| 128 |
+
return [...prev, { id: Date.now().toString(), role: 'model', text: transcript }];
|
| 129 |
+
});
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
// 2. Handle User Transcription (Echo) & Interruption
|
| 133 |
+
if (message.serverContent?.interrupted) {
|
| 134 |
+
activeSourcesRef.current.forEach(s => s.stop());
|
| 135 |
+
activeSourcesRef.current.clear();
|
| 136 |
+
nextStartTimeRef.current = 0;
|
| 137 |
+
setIsAiSpeaking(false);
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
// 3. Handle Audio Output
|
| 141 |
+
const audioData = message.serverContent?.modelTurn?.parts?.find(p => p.inlineData)?.inlineData?.data;
|
| 142 |
+
if (audioData && audioContextRef.current) {
|
| 143 |
+
setIsAiSpeaking(true);
|
| 144 |
+
const ctx = audioContextRef.current;
|
| 145 |
+
const buffer = await decodeAudioData(audioData, ctx);
|
| 146 |
+
|
| 147 |
+
const source = ctx.createBufferSource();
|
| 148 |
+
source.buffer = buffer;
|
| 149 |
+
source.connect(ctx.destination);
|
| 150 |
+
|
| 151 |
+
// Scheduling
|
| 152 |
+
const currentTime = ctx.currentTime;
|
| 153 |
+
if (nextStartTimeRef.current < currentTime) {
|
| 154 |
+
nextStartTimeRef.current = currentTime;
|
| 155 |
+
}
|
| 156 |
+
source.start(nextStartTimeRef.current);
|
| 157 |
+
nextStartTimeRef.current += buffer.duration;
|
| 158 |
+
|
| 159 |
+
activeSourcesRef.current.add(source);
|
| 160 |
+
source.onended = () => {
|
| 161 |
+
activeSourcesRef.current.delete(source);
|
| 162 |
+
if (activeSourcesRef.current.size === 0) setIsAiSpeaking(false);
|
| 163 |
+
};
|
| 164 |
+
}
|
| 165 |
+
},
|
| 166 |
+
onclose: () => {
|
| 167 |
+
setIsConnected(false);
|
| 168 |
+
setIsRecording(false);
|
| 169 |
+
},
|
| 170 |
+
onerror: (e) => {
|
| 171 |
+
console.error("Live API Error:", e);
|
| 172 |
+
setError("连接断开,请重试");
|
| 173 |
+
setIsConnected(false);
|
| 174 |
+
}
|
| 175 |
+
}
|
| 176 |
});
|
| 177 |
|
| 178 |
sessionRef.current = session;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
} catch (e: any) {
|
| 181 |
console.error(e);
|
|
|
|
| 186 |
|
| 187 |
const disconnectSession = async () => {
|
| 188 |
if (sessionRef.current) {
|
| 189 |
+
// Typically session closure is handled by the server or object cleanup
|
| 190 |
+
// But we can try to close if method exists
|
| 191 |
+
try {
|
| 192 |
+
// @ts-ignore
|
| 193 |
+
if(typeof sessionRef.current.close === 'function') sessionRef.current.close();
|
| 194 |
+
} catch(e) {}
|
| 195 |
+
sessionRef.current = null;
|
| 196 |
}
|
| 197 |
setIsConnected(false);
|
| 198 |
setIsRecording(false);
|
|
|
|
| 200 |
// Stop audio input
|
| 201 |
if (sourceNodeRef.current) sourceNodeRef.current.disconnect();
|
| 202 |
if (processorRef.current) processorRef.current.disconnect();
|
| 203 |
+
sourceNodeRef.current = null;
|
| 204 |
+
processorRef.current = null;
|
| 205 |
|
| 206 |
// Stop audio output
|
| 207 |
activeSourcesRef.current.forEach(s => s.stop());
|
| 208 |
activeSourcesRef.current.clear();
|
| 209 |
};
|
| 210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
const startRecording = async () => {
|
| 212 |
if (!isConnected || !sessionRef.current || !inputContextRef.current) return;
|
| 213 |
setIsRecording(true);
|