dvc890 commited on
Commit
0e885ae
·
verified ·
1 Parent(s): c3a4bf0

Upload 54 files

Browse files
Files changed (1) hide show
  1. components/LiveAssistant.tsx +22 -4
components/LiveAssistant.tsx CHANGED
@@ -1,3 +1,4 @@
 
1
  import React, { useState, useRef, useEffect } from 'react';
2
  import { GoogleGenAI, LiveServerMessage, Modality } from "@google/genai";
3
  import { Mic, X, MessageCircle, Volume2, Power, Play, Square, Loader2, Bot, ChevronDown, RefreshCw } from 'lucide-react';
@@ -101,6 +102,8 @@ export const LiveAssistant: React.FC = () => {
101
 
102
  try {
103
  setIsInitializing(true);
 
 
104
  // Setup Audio Context
105
  // @ts-ignore
106
  const AudioCtor = window.AudioContext || window.webkitAudioContext;
@@ -113,9 +116,11 @@ export const LiveAssistant: React.FC = () => {
113
  const stream = await navigator.mediaDevices.getUserMedia({ audio: {
114
  sampleRate: 16000,
115
  channelCount: 1,
116
- echoCancellation: true
 
117
  }});
118
  audioStreamRef.current = stream;
 
119
 
120
  // Initialize Gemini Client
121
  const client = new GoogleGenAI({ apiKey });
@@ -125,11 +130,12 @@ export const LiveAssistant: React.FC = () => {
125
  config: {
126
  responseModalities: [Modality.AUDIO],
127
  speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Kore' } } },
128
- systemInstruction: { parts: [{ text: "你是一位乐于助人的校园AI助手。请用简短、自然的中文进行语音对话。" }] },
129
  outputAudioTranscription: { model: true } // Enable transcription to show text
130
  },
131
  callbacks: {
132
  onopen: () => {
 
133
  setIsConnected(true);
134
  setIsInitializing(false);
135
  setLogs(prev => [...prev, {role: 'model', text: '已连接,请点击麦克风说话。'}]);
@@ -165,7 +171,10 @@ export const LiveAssistant: React.FC = () => {
165
  // Update last model log or add new
166
  setLogs(prev => {
167
  const last = prev[prev.length - 1];
168
- if (last && last.role === 'model' && !last.text.endsWith('\n')) {
 
 
 
169
  // Append to existing turn (simplified logic)
170
  return [...prev.slice(0, -1), { ...last, text: last.text + transcript }];
171
  }
@@ -179,6 +188,7 @@ export const LiveAssistant: React.FC = () => {
179
  // Note: Standard API usually doesn't echo user transcript in serverContent easily without config, relying on model turn.
180
  },
181
  onclose: () => {
 
182
  setIsConnected(false);
183
  setLogs(prev => [...prev, {role: 'model', text: '连接已断开'}]);
184
  },
@@ -220,6 +230,7 @@ export const LiveAssistant: React.FC = () => {
220
 
221
  const newMicState = !isMicOn;
222
  setIsMicOn(newMicState);
 
223
 
224
  if (newMicState) {
225
  // START SENDING
@@ -233,10 +244,16 @@ export const LiveAssistant: React.FC = () => {
233
  const source = inputCtx.createMediaStreamSource(audioStreamRef.current);
234
  const processor = inputCtx.createScriptProcessor(4096, 1, 1);
235
 
 
236
  processor.onaudioprocess = (e) => {
237
  if (!newMicState) return; // Guard
238
  const inputData = e.inputBuffer.getChannelData(0);
239
  const blob = createBlob(inputData);
 
 
 
 
 
240
  sessionPromiseRef.current?.then(session => {
241
  session.sendRealtimeInput({ media: { mimeType: 'audio/pcm;rate=16000', data: blob.data } });
242
  });
@@ -254,6 +271,7 @@ export const LiveAssistant: React.FC = () => {
254
 
255
  } else {
256
  // STOP SENDING
 
257
  if (inputProcessorRef.current) {
258
  inputProcessorRef.current.disconnect();
259
  inputProcessorRef.current = null;
@@ -359,4 +377,4 @@ export const LiveAssistant: React.FC = () => {
359
  )}
360
  </div>
361
  );
362
- };
 
1
+
2
  import React, { useState, useRef, useEffect } from 'react';
3
  import { GoogleGenAI, LiveServerMessage, Modality } from "@google/genai";
4
  import { Mic, X, MessageCircle, Volume2, Power, Play, Square, Loader2, Bot, ChevronDown, RefreshCw } from 'lucide-react';
 
102
 
103
  try {
104
  setIsInitializing(true);
105
+ console.log("Starting Live Connection...");
106
+
107
  // Setup Audio Context
108
  // @ts-ignore
109
  const AudioCtor = window.AudioContext || window.webkitAudioContext;
 
116
  const stream = await navigator.mediaDevices.getUserMedia({ audio: {
117
  sampleRate: 16000,
118
  channelCount: 1,
119
+ echoCancellation: true,
120
+ noiseSuppression: true
121
  }});
122
  audioStreamRef.current = stream;
123
+ console.log("Microphone access granted");
124
 
125
  // Initialize Gemini Client
126
  const client = new GoogleGenAI({ apiKey });
 
130
  config: {
131
  responseModalities: [Modality.AUDIO],
132
  speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Kore' } } },
133
+ systemInstruction: { parts: [{ text: "你是一位乐于助人的校园AI助手。请始终使用中文回答。请简短、自然地进行对话,不要使用 Markdown 格式,不要进行搜索。" }] },
134
  outputAudioTranscription: { model: true } // Enable transcription to show text
135
  },
136
  callbacks: {
137
  onopen: () => {
138
+ console.log("Live Session Opened");
139
  setIsConnected(true);
140
  setIsInitializing(false);
141
  setLogs(prev => [...prev, {role: 'model', text: '已连接,请点击麦克风说话。'}]);
 
171
  // Update last model log or add new
172
  setLogs(prev => {
173
  const last = prev[prev.length - 1];
174
+ const isInitialMessage = last && last.text === '已连接,请点击麦克风说话。';
175
+
176
+ // IMPORTANT: Do not append to the initial system message
177
+ if (last && last.role === 'model' && !isInitialMessage && !last.text.endsWith('\n')) {
178
  // Append to existing turn (simplified logic)
179
  return [...prev.slice(0, -1), { ...last, text: last.text + transcript }];
180
  }
 
188
  // Note: Standard API usually doesn't echo user transcript in serverContent easily without config, relying on model turn.
189
  },
190
  onclose: () => {
191
+ console.log("Live Session Closed");
192
  setIsConnected(false);
193
  setLogs(prev => [...prev, {role: 'model', text: '连接已断开'}]);
194
  },
 
230
 
231
  const newMicState = !isMicOn;
232
  setIsMicOn(newMicState);
233
+ console.log("Toggling Mic:", newMicState ? "ON" : "OFF");
234
 
235
  if (newMicState) {
236
  // START SENDING
 
244
  const source = inputCtx.createMediaStreamSource(audioStreamRef.current);
245
  const processor = inputCtx.createScriptProcessor(4096, 1, 1);
246
 
247
+ let chunkCount = 0;
248
  processor.onaudioprocess = (e) => {
249
  if (!newMicState) return; // Guard
250
  const inputData = e.inputBuffer.getChannelData(0);
251
  const blob = createBlob(inputData);
252
+
253
+ // Debug log every 20 chunks (~0.5s) to avoid spam but confirm data flow
254
+ chunkCount++;
255
+ if (chunkCount % 20 === 0) console.log("Sending audio chunk...", chunkCount);
256
+
257
  sessionPromiseRef.current?.then(session => {
258
  session.sendRealtimeInput({ media: { mimeType: 'audio/pcm;rate=16000', data: blob.data } });
259
  });
 
271
 
272
  } else {
273
  // STOP SENDING
274
+ console.log("Stopping audio stream...");
275
  if (inputProcessorRef.current) {
276
  inputProcessorRef.current.disconnect();
277
  inputProcessorRef.current = null;
 
377
  )}
378
  </div>
379
  );
380
+ };