Spaces:
Sleeping
Sleeping
Upload 54 files
Browse files- components/LiveAssistant.tsx +22 -4
components/LiveAssistant.tsx
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import React, { useState, useRef, useEffect } from 'react';
|
| 2 |
import { GoogleGenAI, LiveServerMessage, Modality } from "@google/genai";
|
| 3 |
import { Mic, X, MessageCircle, Volume2, Power, Play, Square, Loader2, Bot, ChevronDown, RefreshCw } from 'lucide-react';
|
|
@@ -101,6 +102,8 @@ export const LiveAssistant: React.FC = () => {
|
|
| 101 |
|
| 102 |
try {
|
| 103 |
setIsInitializing(true);
|
|
|
|
|
|
|
| 104 |
// Setup Audio Context
|
| 105 |
// @ts-ignore
|
| 106 |
const AudioCtor = window.AudioContext || window.webkitAudioContext;
|
|
@@ -113,9 +116,11 @@ export const LiveAssistant: React.FC = () => {
|
|
| 113 |
const stream = await navigator.mediaDevices.getUserMedia({ audio: {
|
| 114 |
sampleRate: 16000,
|
| 115 |
channelCount: 1,
|
| 116 |
-
echoCancellation: true
|
|
|
|
| 117 |
}});
|
| 118 |
audioStreamRef.current = stream;
|
|
|
|
| 119 |
|
| 120 |
// Initialize Gemini Client
|
| 121 |
const client = new GoogleGenAI({ apiKey });
|
|
@@ -125,11 +130,12 @@ export const LiveAssistant: React.FC = () => {
|
|
| 125 |
config: {
|
| 126 |
responseModalities: [Modality.AUDIO],
|
| 127 |
speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Kore' } } },
|
| 128 |
-
systemInstruction: { parts: [{ text: "你是一位乐于助人的校园AI
|
| 129 |
outputAudioTranscription: { model: true } // Enable transcription to show text
|
| 130 |
},
|
| 131 |
callbacks: {
|
| 132 |
onopen: () => {
|
|
|
|
| 133 |
setIsConnected(true);
|
| 134 |
setIsInitializing(false);
|
| 135 |
setLogs(prev => [...prev, {role: 'model', text: '已连接,请点击麦克风说话。'}]);
|
|
@@ -165,7 +171,10 @@ export const LiveAssistant: React.FC = () => {
|
|
| 165 |
// Update last model log or add new
|
| 166 |
setLogs(prev => {
|
| 167 |
const last = prev[prev.length - 1];
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
| 169 |
// Append to existing turn (simplified logic)
|
| 170 |
return [...prev.slice(0, -1), { ...last, text: last.text + transcript }];
|
| 171 |
}
|
|
@@ -179,6 +188,7 @@ export const LiveAssistant: React.FC = () => {
|
|
| 179 |
// Note: Standard API usually doesn't echo user transcript in serverContent easily without config, relying on model turn.
|
| 180 |
},
|
| 181 |
onclose: () => {
|
|
|
|
| 182 |
setIsConnected(false);
|
| 183 |
setLogs(prev => [...prev, {role: 'model', text: '连接已断开'}]);
|
| 184 |
},
|
|
@@ -220,6 +230,7 @@ export const LiveAssistant: React.FC = () => {
|
|
| 220 |
|
| 221 |
const newMicState = !isMicOn;
|
| 222 |
setIsMicOn(newMicState);
|
|
|
|
| 223 |
|
| 224 |
if (newMicState) {
|
| 225 |
// START SENDING
|
|
@@ -233,10 +244,16 @@ export const LiveAssistant: React.FC = () => {
|
|
| 233 |
const source = inputCtx.createMediaStreamSource(audioStreamRef.current);
|
| 234 |
const processor = inputCtx.createScriptProcessor(4096, 1, 1);
|
| 235 |
|
|
|
|
| 236 |
processor.onaudioprocess = (e) => {
|
| 237 |
if (!newMicState) return; // Guard
|
| 238 |
const inputData = e.inputBuffer.getChannelData(0);
|
| 239 |
const blob = createBlob(inputData);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
sessionPromiseRef.current?.then(session => {
|
| 241 |
session.sendRealtimeInput({ media: { mimeType: 'audio/pcm;rate=16000', data: blob.data } });
|
| 242 |
});
|
|
@@ -254,6 +271,7 @@ export const LiveAssistant: React.FC = () => {
|
|
| 254 |
|
| 255 |
} else {
|
| 256 |
// STOP SENDING
|
|
|
|
| 257 |
if (inputProcessorRef.current) {
|
| 258 |
inputProcessorRef.current.disconnect();
|
| 259 |
inputProcessorRef.current = null;
|
|
@@ -359,4 +377,4 @@ export const LiveAssistant: React.FC = () => {
|
|
| 359 |
)}
|
| 360 |
</div>
|
| 361 |
);
|
| 362 |
-
};
|
|
|
|
| 1 |
+
|
| 2 |
import React, { useState, useRef, useEffect } from 'react';
|
| 3 |
import { GoogleGenAI, LiveServerMessage, Modality } from "@google/genai";
|
| 4 |
import { Mic, X, MessageCircle, Volume2, Power, Play, Square, Loader2, Bot, ChevronDown, RefreshCw } from 'lucide-react';
|
|
|
|
| 102 |
|
| 103 |
try {
|
| 104 |
setIsInitializing(true);
|
| 105 |
+
console.log("Starting Live Connection...");
|
| 106 |
+
|
| 107 |
// Setup Audio Context
|
| 108 |
// @ts-ignore
|
| 109 |
const AudioCtor = window.AudioContext || window.webkitAudioContext;
|
|
|
|
| 116 |
const stream = await navigator.mediaDevices.getUserMedia({ audio: {
|
| 117 |
sampleRate: 16000,
|
| 118 |
channelCount: 1,
|
| 119 |
+
echoCancellation: true,
|
| 120 |
+
noiseSuppression: true
|
| 121 |
}});
|
| 122 |
audioStreamRef.current = stream;
|
| 123 |
+
console.log("Microphone access granted");
|
| 124 |
|
| 125 |
// Initialize Gemini Client
|
| 126 |
const client = new GoogleGenAI({ apiKey });
|
|
|
|
| 130 |
config: {
|
| 131 |
responseModalities: [Modality.AUDIO],
|
| 132 |
speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Kore' } } },
|
| 133 |
+
systemInstruction: { parts: [{ text: "你是一位乐于助人的校园AI助手。请始终使用中文回答。请简短、自然地进行对话,不要使用 Markdown 格式,不要进行搜索。" }] },
|
| 134 |
outputAudioTranscription: { model: true } // Enable transcription to show text
|
| 135 |
},
|
| 136 |
callbacks: {
|
| 137 |
onopen: () => {
|
| 138 |
+
console.log("Live Session Opened");
|
| 139 |
setIsConnected(true);
|
| 140 |
setIsInitializing(false);
|
| 141 |
setLogs(prev => [...prev, {role: 'model', text: '已连接,请点击麦克风说话。'}]);
|
|
|
|
| 171 |
// Update last model log or add new
|
| 172 |
setLogs(prev => {
|
| 173 |
const last = prev[prev.length - 1];
|
| 174 |
+
const isInitialMessage = last && last.text === '已连接,请点击麦克风说话。';
|
| 175 |
+
|
| 176 |
+
// IMPORTANT: Do not append to the initial system message
|
| 177 |
+
if (last && last.role === 'model' && !isInitialMessage && !last.text.endsWith('\n')) {
|
| 178 |
// Append to existing turn (simplified logic)
|
| 179 |
return [...prev.slice(0, -1), { ...last, text: last.text + transcript }];
|
| 180 |
}
|
|
|
|
| 188 |
// Note: Standard API usually doesn't echo user transcript in serverContent easily without config, relying on model turn.
|
| 189 |
},
|
| 190 |
onclose: () => {
|
| 191 |
+
console.log("Live Session Closed");
|
| 192 |
setIsConnected(false);
|
| 193 |
setLogs(prev => [...prev, {role: 'model', text: '连接已断开'}]);
|
| 194 |
},
|
|
|
|
| 230 |
|
| 231 |
const newMicState = !isMicOn;
|
| 232 |
setIsMicOn(newMicState);
|
| 233 |
+
console.log("Toggling Mic:", newMicState ? "ON" : "OFF");
|
| 234 |
|
| 235 |
if (newMicState) {
|
| 236 |
// START SENDING
|
|
|
|
| 244 |
const source = inputCtx.createMediaStreamSource(audioStreamRef.current);
|
| 245 |
const processor = inputCtx.createScriptProcessor(4096, 1, 1);
|
| 246 |
|
| 247 |
+
let chunkCount = 0;
|
| 248 |
processor.onaudioprocess = (e) => {
|
| 249 |
if (!newMicState) return; // Guard
|
| 250 |
const inputData = e.inputBuffer.getChannelData(0);
|
| 251 |
const blob = createBlob(inputData);
|
| 252 |
+
|
| 253 |
+
// Debug log every 20 chunks (~0.5s) to avoid spam but confirm data flow
|
| 254 |
+
chunkCount++;
|
| 255 |
+
if (chunkCount % 20 === 0) console.log("Sending audio chunk...", chunkCount);
|
| 256 |
+
|
| 257 |
sessionPromiseRef.current?.then(session => {
|
| 258 |
session.sendRealtimeInput({ media: { mimeType: 'audio/pcm;rate=16000', data: blob.data } });
|
| 259 |
});
|
|
|
|
| 271 |
|
| 272 |
} else {
|
| 273 |
// STOP SENDING
|
| 274 |
+
console.log("Stopping audio stream...");
|
| 275 |
if (inputProcessorRef.current) {
|
| 276 |
inputProcessorRef.current.disconnect();
|
| 277 |
inputProcessorRef.current = null;
|
|
|
|
| 377 |
)}
|
| 378 |
</div>
|
| 379 |
);
|
| 380 |
+
};
|