dvc890 commited on
Commit
40883e7
·
verified ·
1 Parent(s): 9df572c

Upload 54 files

Browse files
Files changed (3) hide show
  1. components/LiveAssistant.tsx +67 -90
  2. package.json +2 -1
  3. server.js +154 -14
components/LiveAssistant.tsx CHANGED
@@ -1,13 +1,12 @@
1
 
2
  import React, { useState, useRef, useEffect } from 'react';
3
- import { GoogleGenAI, LiveServerMessage, Modality } from "@google/genai";
4
  import { Mic, X, Power, Loader2, Bot, Volume2, Radio, Activity, RefreshCw, ChevronDown } from 'lucide-react';
5
  import { api } from '../services/api';
6
 
7
  // --- Audio Types & Helpers ---
8
  // 16kHz for Gemini Input
9
  const INPUT_SAMPLE_RATE = 16000;
10
- // 24kHz for Gemini Output
11
  const OUTPUT_SAMPLE_RATE = 24000;
12
 
13
  function base64ToUint8Array(base64: string) {
@@ -23,7 +22,6 @@ function base64ToUint8Array(base64: string) {
23
  export const LiveAssistant: React.FC = () => {
24
  const [isOpen, setIsOpen] = useState(false);
25
  const [status, setStatus] = useState<'DISCONNECTED' | 'CONNECTING' | 'CONNECTED' | 'LISTENING' | 'THINKING' | 'SPEAKING'>('DISCONNECTED');
26
- const [apiKey, setApiKey] = useState('');
27
  const [transcript, setTranscript] = useState(''); // Current subtitle
28
  const [volumeLevel, setVolumeLevel] = useState(0);
29
 
@@ -34,30 +32,12 @@ export const LiveAssistant: React.FC = () => {
34
  const sourceNodeRef = useRef<MediaStreamAudioSourceNode | null>(null);
35
  const gainNodeRef = useRef<GainNode | null>(null);
36
 
37
- const sessionRef = useRef<any>(null); // The GenAI Session
38
  const nextPlayTimeRef = useRef<number>(0);
39
  const analyserRef = useRef<AnalyserNode | null>(null);
40
  const volumeIntervalRef = useRef<any>(null);
41
 
42
- // 1. Fetch Key on Open
43
- useEffect(() => {
44
- if (isOpen && !apiKey) {
45
- api.ai.getStats().catch(() => {}); // Warm up
46
- fetch('/api/ai/live-access', {
47
- headers: {
48
- 'x-user-username': api.auth.getCurrentUser()?.username || '',
49
- 'x-user-role': api.auth.getCurrentUser()?.role || ''
50
- }
51
- })
52
- .then(res => res.json())
53
- .then(data => {
54
- if (data.key) setApiKey(data.key);
55
- })
56
- .catch(err => console.error("Failed to get live key", err));
57
- }
58
- }, [isOpen]);
59
-
60
- // 2. Clean up on unmount or close
61
  useEffect(() => {
62
  if (!isOpen) {
63
  handleDisconnect();
@@ -106,44 +86,45 @@ export const LiveAssistant: React.FC = () => {
106
  };
107
 
108
  const handleConnect = async () => {
109
- if (!apiKey) return;
110
  setStatus('CONNECTING');
111
- setTranscript('正在建立连接...');
112
 
113
  try {
114
  initAudioContext();
115
- const client = new GoogleGenAI({ apiKey });
116
 
117
- const session = await client.live.connect({
118
- model: 'gemini-2.5-flash-native-audio-preview-09-2025',
119
- callbacks: {
120
- onopen: () => {
121
- console.log('Session Open');
122
- },
123
- onmessage: (msg: LiveServerMessage) => {
124
- handleServerMessage(msg);
125
- },
126
- onclose: () => {
127
- console.log('Session Close');
128
- handleDisconnect();
129
- },
130
- onerror: (e) => {
131
- console.error('Session Error', e);
132
- setTranscript('连接发生错误,请重试');
133
- handleDisconnect();
134
- }
135
- },
136
- config: {
137
- responseModalities: [Modality.AUDIO],
138
- speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Kore' } } },
139
- // Strong instruction to force Chinese
140
- systemInstruction: "You are a helpful school assistant. You MUST reply in spoken Chinese (Mandarin). Keep answers concise and friendly. Do not use markdown.",
141
  }
142
- });
 
 
 
 
 
143
 
144
- sessionRef.current = session;
145
- setStatus('CONNECTED');
146
- setTranscript('连接成功,请按住麦克风说话');
 
 
147
 
148
  } catch (e) {
149
  console.error("Connect failed", e);
@@ -152,17 +133,14 @@ export const LiveAssistant: React.FC = () => {
152
  }
153
  };
154
 
155
- const handleServerMessage = async (msg: LiveServerMessage) => {
156
- const serverContent = msg.serverContent;
157
-
158
  // 1. Audio Output
159
- const audioData = serverContent?.modelTurn?.parts?.[0]?.inlineData?.data;
160
- if (audioData && audioContextRef.current) {
161
- setStatus('SPEAKING'); // Receiving audio means speaking
162
  const ctx = audioContextRef.current;
163
- const bytes = base64ToUint8Array(audioData);
164
 
165
- // Decode Raw PCM (16-bit, 24kHz, Mono)
166
  const int16 = new Int16Array(bytes.buffer);
167
  const float32 = new Float32Array(int16.length);
168
  for(let i=0; i<int16.length; i++) float32[i] = int16[i] / 32768.0;
@@ -195,20 +173,18 @@ export const LiveAssistant: React.FC = () => {
195
  };
196
  }
197
 
198
- // 2. Text Transcription (Subtitle)
199
- // Note: The model sometimes returns 'thought' or 'search' logs here.
200
- // We rely on audio mostly, but show text if it looks like a response.
201
- const text = serverContent?.modelTurn?.parts?.[0]?.text;
202
- if (text) {
203
- if (!text.startsWith('**') && !text.includes('Finding')) {
204
- setTranscript(text);
205
- }
206
  }
207
 
208
- // 3. User Turn Finished (Model starts thinking)
209
- if (serverContent?.turnComplete) {
210
  setStatus('THINKING');
211
  }
 
 
 
212
  };
213
 
214
  const startRecording = async () => {
@@ -217,13 +193,17 @@ export const LiveAssistant: React.FC = () => {
217
  try {
218
  // Interrupt model if speaking
219
  if (status === 'SPEAKING') {
220
- // We can send an interruption message or just stop playing, but API handles new input as interrupt usually
221
  setStatus('CONNECTED');
222
  }
223
 
224
- const stream = await navigator.mediaDevices.getUserMedia({ audio: { sampleRate: INPUT_SAMPLE_RATE } });
 
 
 
 
225
  mediaStreamRef.current = stream;
226
 
 
227
  const ctx = new (window.AudioContext || (window as any).webkitAudioContext)({ sampleRate: INPUT_SAMPLE_RATE });
228
  const source = ctx.createMediaStreamSource(stream);
229
  const processor = ctx.createScriptProcessor(4096, 1, 1);
@@ -231,14 +211,13 @@ export const LiveAssistant: React.FC = () => {
231
  processor.onaudioprocess = (e) => {
232
  const inputData = e.inputBuffer.getChannelData(0);
233
 
234
- // Downconvert Float32 to Int16 for Gemini
235
  const l = inputData.length;
236
  const int16Data = new Int16Array(l);
237
  for (let i = 0; i < l; i++) {
238
  int16Data[i] = inputData[i] * 32768;
239
  }
240
 
241
- // Convert to Base64 manually to avoid large lib dependency
242
  let binary = '';
243
  const bytes = new Uint8Array(int16Data.buffer);
244
  const len = bytes.byteLength;
@@ -247,15 +226,16 @@ export const LiveAssistant: React.FC = () => {
247
  }
248
  const b64 = btoa(binary);
249
 
250
- if (sessionRef.current) {
251
- sessionRef.current.sendRealtimeInput({
252
- media: { mimeType: `audio/pcm;rate=${INPUT_SAMPLE_RATE}`, data: b64 }
253
- });
 
254
  }
255
  };
256
 
257
  source.connect(processor);
258
- processor.connect(ctx.destination);
259
 
260
  sourceNodeRef.current = source;
261
  processorRef.current = processor;
@@ -290,19 +270,17 @@ export const LiveAssistant: React.FC = () => {
290
  };
291
 
292
  const handleDisconnect = () => {
293
- if (sessionRef.current) {
294
- // sessionRef.current.close(); // SDK might not have close method exposed directly depending on version, but usually does
295
- sessionRef.current = null;
296
  }
297
  // Cleanup Audio
298
  if (audioContextRef.current) {
299
- audioContextRef.current.suspend(); // Suspend instead of close to reuse? Or close.
300
- // For robustness, let's just close and nullify.
301
  audioContextRef.current.close().catch(()=>{});
302
  audioContextRef.current = null;
303
  }
304
 
305
- stopRecording(); // Ensure mic is off
306
 
307
  setStatus('DISCONNECTED');
308
  setTranscript('');
@@ -330,7 +308,7 @@ export const LiveAssistant: React.FC = () => {
330
  <div className="bg-slate-800/50 p-4 flex justify-between items-center text-white shrink-0 backdrop-blur-md">
331
  <div className="flex items-center gap-2">
332
  <div className={`w-2 h-2 rounded-full ${status === 'DISCONNECTED' ? 'bg-red-500' : 'bg-green-500 animate-pulse'}`}></div>
333
- <span className="font-bold text-sm">AI 实时通话</span>
334
  </div>
335
  <div className="flex gap-2">
336
  <button onClick={handleDisconnect} title="重置" className="hover:bg-white/10 p-1.5 rounded-full text-gray-400 hover:text-white transition-colors"><RefreshCw size={16}/></button>
@@ -400,8 +378,7 @@ export const LiveAssistant: React.FC = () => {
400
  {status === 'DISCONNECTED' ? (
401
  <button
402
  onClick={handleConnect}
403
- disabled={!apiKey}
404
- className="w-full py-4 bg-blue-600 hover:bg-blue-500 text-white rounded-2xl font-bold flex items-center justify-center gap-2 transition-all hover:scale-[1.02] active:scale-95 disabled:opacity-50 disabled:cursor-not-allowed"
405
  >
406
  <Power size={20}/> 开启 AI 语音
407
  </button>
 
1
 
2
  import React, { useState, useRef, useEffect } from 'react';
 
3
  import { Mic, X, Power, Loader2, Bot, Volume2, Radio, Activity, RefreshCw, ChevronDown } from 'lucide-react';
4
  import { api } from '../services/api';
5
 
6
  // --- Audio Types & Helpers ---
7
  // 16kHz for Gemini Input
8
  const INPUT_SAMPLE_RATE = 16000;
9
+ // 24kHz for Gemini Output (Typically)
10
  const OUTPUT_SAMPLE_RATE = 24000;
11
 
12
  function base64ToUint8Array(base64: string) {
 
22
  export const LiveAssistant: React.FC = () => {
23
  const [isOpen, setIsOpen] = useState(false);
24
  const [status, setStatus] = useState<'DISCONNECTED' | 'CONNECTING' | 'CONNECTED' | 'LISTENING' | 'THINKING' | 'SPEAKING'>('DISCONNECTED');
 
25
  const [transcript, setTranscript] = useState(''); // Current subtitle
26
  const [volumeLevel, setVolumeLevel] = useState(0);
27
 
 
32
  const sourceNodeRef = useRef<MediaStreamAudioSourceNode | null>(null);
33
  const gainNodeRef = useRef<GainNode | null>(null);
34
 
35
+ const wsRef = useRef<WebSocket | null>(null);
36
  const nextPlayTimeRef = useRef<number>(0);
37
  const analyserRef = useRef<AnalyserNode | null>(null);
38
  const volumeIntervalRef = useRef<any>(null);
39
 
40
+ // 1. Clean up on unmount or close
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  useEffect(() => {
42
  if (!isOpen) {
43
  handleDisconnect();
 
86
  };
87
 
88
  const handleConnect = async () => {
 
89
  setStatus('CONNECTING');
90
+ setTranscript('正在连接服务器...');
91
 
92
  try {
93
  initAudioContext();
 
94
 
95
+ // Connect to Backend Proxy via WebSocket
96
+ const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
97
+ const wsUrl = `${protocol}//${window.location.host}/ws/live`;
98
+
99
+ console.log("Connecting to", wsUrl);
100
+ const ws = new WebSocket(wsUrl);
101
+ wsRef.current = ws;
102
+
103
+ ws.onopen = () => {
104
+ console.log('WS Open');
105
+ setStatus('CONNECTED');
106
+ setTranscript('连接成功,请按住麦克风说话');
107
+ };
108
+
109
+ ws.onmessage = async (event) => {
110
+ try {
111
+ const msg = JSON.parse(event.data);
112
+ handleServerMessage(msg);
113
+ } catch (e) {
114
+ console.error("Parse error", e);
 
 
 
 
115
  }
116
+ };
117
+
118
+ ws.onclose = () => {
119
+ console.log('WS Close');
120
+ handleDisconnect();
121
+ };
122
 
123
+ ws.onerror = (e) => {
124
+ console.error('WS Error', e);
125
+ setTranscript('连接服务器失败');
126
+ handleDisconnect();
127
+ };
128
 
129
  } catch (e) {
130
  console.error("Connect failed", e);
 
133
  }
134
  };
135
 
136
+ const handleServerMessage = async (msg: any) => {
 
 
137
  // 1. Audio Output
138
+ if (msg.type === 'audio' && msg.data && audioContextRef.current) {
139
+ setStatus('SPEAKING');
 
140
  const ctx = audioContextRef.current;
141
+ const bytes = base64ToUint8Array(msg.data);
142
 
143
+ // Decode Raw PCM (16-bit, 24kHz, Mono) from Gemini
144
  const int16 = new Int16Array(bytes.buffer);
145
  const float32 = new Float32Array(int16.length);
146
  for(let i=0; i<int16.length; i++) float32[i] = int16[i] / 32768.0;
 
173
  };
174
  }
175
 
176
+ // 2. Text Transcription
177
+ if (msg.type === 'text' && msg.content) {
178
+ setTranscript(msg.content);
 
 
 
 
 
179
  }
180
 
181
+ // 3. Status/Error
182
+ if (msg.type === 'turnComplete') {
183
  setStatus('THINKING');
184
  }
185
+ if (msg.type === 'error') {
186
+ setTranscript(`错误: ${msg.message}`);
187
+ }
188
  };
189
 
190
  const startRecording = async () => {
 
193
  try {
194
  // Interrupt model if speaking
195
  if (status === 'SPEAKING') {
 
196
  setStatus('CONNECTED');
197
  }
198
 
199
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: {
200
+ sampleRate: INPUT_SAMPLE_RATE,
201
+ channelCount: 1,
202
+ echoCancellation: true
203
+ }});
204
  mediaStreamRef.current = stream;
205
 
206
+ // Use a new context for input to ensure 16k rate if browser supports specific ctx rate
207
  const ctx = new (window.AudioContext || (window as any).webkitAudioContext)({ sampleRate: INPUT_SAMPLE_RATE });
208
  const source = ctx.createMediaStreamSource(stream);
209
  const processor = ctx.createScriptProcessor(4096, 1, 1);
 
211
  processor.onaudioprocess = (e) => {
212
  const inputData = e.inputBuffer.getChannelData(0);
213
 
214
+ // Float32 -> Int16 -> Base64
215
  const l = inputData.length;
216
  const int16Data = new Int16Array(l);
217
  for (let i = 0; i < l; i++) {
218
  int16Data[i] = inputData[i] * 32768;
219
  }
220
 
 
221
  let binary = '';
222
  const bytes = new Uint8Array(int16Data.buffer);
223
  const len = bytes.byteLength;
 
226
  }
227
  const b64 = btoa(binary);
228
 
229
+ if (wsRef.current && wsRef.current.readyState === WebSocket.OPEN) {
230
+ wsRef.current.send(JSON.stringify({
231
+ type: 'audio',
232
+ data: b64
233
+ }));
234
  }
235
  };
236
 
237
  source.connect(processor);
238
+ processor.connect(ctx.destination); // Destination is mute, just for processing pipeline
239
 
240
  sourceNodeRef.current = source;
241
  processorRef.current = processor;
 
270
  };
271
 
272
  const handleDisconnect = () => {
273
+ if (wsRef.current) {
274
+ wsRef.current.close();
275
+ wsRef.current = null;
276
  }
277
  // Cleanup Audio
278
  if (audioContextRef.current) {
 
 
279
  audioContextRef.current.close().catch(()=>{});
280
  audioContextRef.current = null;
281
  }
282
 
283
+ stopRecording();
284
 
285
  setStatus('DISCONNECTED');
286
  setTranscript('');
 
308
  <div className="bg-slate-800/50 p-4 flex justify-between items-center text-white shrink-0 backdrop-blur-md">
309
  <div className="flex items-center gap-2">
310
  <div className={`w-2 h-2 rounded-full ${status === 'DISCONNECTED' ? 'bg-red-500' : 'bg-green-500 animate-pulse'}`}></div>
311
+ <span className="font-bold text-sm">AI 实时通话 (代理模式)</span>
312
  </div>
313
  <div className="flex gap-2">
314
  <button onClick={handleDisconnect} title="重置" className="hover:bg-white/10 p-1.5 rounded-full text-gray-400 hover:text-white transition-colors"><RefreshCw size={16}/></button>
 
378
  {status === 'DISCONNECTED' ? (
379
  <button
380
  onClick={handleConnect}
381
+ className="w-full py-4 bg-blue-600 hover:bg-blue-500 text-white rounded-2xl font-bold flex items-center justify-center gap-2 transition-all hover:scale-[1.02] active:scale-95"
 
382
  >
383
  <Power size={20}/> 开启 AI 语音
384
  </button>
package.json CHANGED
@@ -23,7 +23,8 @@
23
  "openai": "^4.28.0",
24
  "@google/genai": "*",
25
  "react-markdown": "^9.0.1",
26
- "remark-gfm": "^4.0.0"
 
27
  },
28
  "devDependencies": {
29
  "@types/react": "^18.2.43",
 
23
  "openai": "^4.28.0",
24
  "@google/genai": "*",
25
  "react-markdown": "^9.0.1",
26
+ "remark-gfm": "^4.0.0",
27
+ "ws": "^8.16.0"
28
  },
29
  "devDependencies": {
30
  "@types/react": "^18.2.43",
server.js CHANGED
@@ -15,11 +15,14 @@ const cors = require('cors');
15
  const bodyParser = require('body-parser');
16
  const path = require('path');
17
  const compression = require('compression');
 
 
18
 
19
  const PORT = 7860;
20
  const MONGO_URI = 'mongodb+srv://dv890a:db8822723@chatpro.gw3v0v7.mongodb.net/chatpro?retryWrites=true&w=majority&appName=chatpro&authSource=admin';
21
 
22
  const app = express();
 
23
 
24
  app.use(compression({
25
  filter: (req, res) => {
@@ -47,15 +50,10 @@ const connectDB = async () => {
47
  try {
48
  await mongoose.connect(MONGO_URI, { serverSelectionTimeoutMS: 30000 });
49
  console.log('✅ MongoDB 连接成功 (Real Data)');
50
-
51
- // FIX: Drop the restrictive index that prevents multiple schedules per slot
52
  try {
53
  await ScheduleModel.collection.dropIndex('schoolId_1_className_1_dayOfWeek_1_period_1');
54
  console.log('✅ Dropped restrictive schedule index');
55
- } catch (e) {
56
- // Ignore error if index doesn't exist
57
- }
58
-
59
  } catch (err) {
60
  console.error('❌ MongoDB 连接失败:', err.message);
61
  InMemoryDB.isFallback = true;
@@ -63,6 +61,155 @@ const connectDB = async () => {
63
  };
64
  connectDB();
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  const getQueryFilter = (req) => {
67
  const s = req.headers['x-school-id'];
68
  const role = req.headers['x-user-role'];
@@ -113,8 +260,6 @@ const generateStudentNo = async () => {
113
  // MOUNT AI ROUTES
114
  app.use('/api/ai', aiRoutes);
115
 
116
- // ... (Rest of Existing Routes) ...
117
-
118
  // --- TODO LIST ENDPOINTS ---
119
  app.get('/api/todos', async (req, res) => {
120
  const username = req.headers['x-user-username'];
@@ -162,7 +307,6 @@ app.put('/api/schedules/:id', async (req, res) => {
162
  // Create or Update by Logic (Upsert)
163
  app.post('/api/schedules', async (req, res) => {
164
  try {
165
- // Updated Filter: Include weekType to allow separate ODD/EVEN records for same slot
166
  const filter = {
167
  className: req.body.className,
168
  dayOfWeek: req.body.dayOfWeek,
@@ -175,14 +319,12 @@ app.post('/api/schedules', async (req, res) => {
175
  await ScheduleModel.findOneAndUpdate(filter, injectSchoolId(req, req.body), {upsert:true});
176
  res.json({});
177
  } catch (e) {
178
- console.error("Save schedule error:", e);
179
  res.status(500).json({ error: e.message });
180
  }
181
  });
182
 
183
  app.delete('/api/schedules', async (req, res) => {
184
  try {
185
- // Support deleting by ID if provided
186
  if (req.query.id) {
187
  await ScheduleModel.findByIdAndDelete(req.query.id);
188
  } else {
@@ -194,14 +336,12 @@ app.delete('/api/schedules', async (req, res) => {
194
  }
195
  });
196
 
197
- // --- USER MENU ORDER ---
198
  app.put('/api/users/:id/menu-order', async (req, res) => {
199
  const { menuOrder } = req.body;
200
  await User.findByIdAndUpdate(req.params.id, { menuOrder });
201
  res.json({ success: true });
202
  });
203
 
204
- // ... (Rest of existing routes unchanged) ...
205
  app.get('/api/classes/:className/teachers', async (req, res) => {
206
  const { className } = req.params;
207
  const schoolId = req.headers['x-school-id'];
@@ -545,4 +685,4 @@ app.delete('/api/attendance/calendar/:id', async (req, res) => { await SchoolCal
545
  app.post('/api/batch-delete', async (req, res) => { if(req.body.type==='student') await Student.deleteMany({_id:{$in:req.body.ids}}); if(req.body.type==='score') await Score.deleteMany({_id:{$in:req.body.ids}}); res.json({}); });
546
 
547
  app.get('*', (req, res) => { res.sendFile(path.join(__dirname, 'dist', 'index.html')); });
548
- app.listen(PORT, () => console.log(`🚀 Server running on port ${PORT}`));
 
15
  const bodyParser = require('body-parser');
16
  const path = require('path');
17
  const compression = require('compression');
18
+ const WebSocket = require('ws'); // Import WS
19
+ const http = require('http'); // Import HTTP for server integration
20
 
21
  const PORT = 7860;
22
  const MONGO_URI = 'mongodb+srv://dv890a:db8822723@chatpro.gw3v0v7.mongodb.net/chatpro?retryWrites=true&w=majority&appName=chatpro&authSource=admin';
23
 
24
  const app = express();
25
+ const server = http.createServer(app); // Wrap express app in HTTP server
26
 
27
  app.use(compression({
28
  filter: (req, res) => {
 
50
  try {
51
  await mongoose.connect(MONGO_URI, { serverSelectionTimeoutMS: 30000 });
52
  console.log('✅ MongoDB 连接成功 (Real Data)');
 
 
53
  try {
54
  await ScheduleModel.collection.dropIndex('schoolId_1_className_1_dayOfWeek_1_period_1');
55
  console.log('✅ Dropped restrictive schedule index');
56
+ } catch (e) { }
 
 
 
57
  } catch (err) {
58
  console.error('❌ MongoDB 连接失败:', err.message);
59
  InMemoryDB.isFallback = true;
 
61
  };
62
  connectDB();
63
 
64
+ // --- WebSocket Proxy for Gemini Live API ---
65
+ const wss = new WebSocket.Server({ noServer: true });
66
+
67
+ server.on('upgrade', (request, socket, head) => {
68
+ // Basic path check
69
+ if (request.url.startsWith('/ws/live')) {
70
+ wss.handleUpgrade(request, socket, head, (ws) => {
71
+ wss.emit('connection', ws, request);
72
+ });
73
+ } else {
74
+ socket.destroy();
75
+ }
76
+ });
77
+
78
+ wss.on('connection', async (ws, req) => {
79
+ console.log('🔌 Client connected to Live Proxy');
80
+ let geminiSession = null;
81
+ let isGeminiConnected = false;
82
+
83
+ try {
84
+ // 1. Get API Key
85
+ const config = await ConfigModel.findOne({ key: 'main' });
86
+ let apiKey = process.env.API_KEY;
87
+ if (config && config.apiKeys && config.apiKeys.gemini && config.apiKeys.gemini.length > 0) {
88
+ apiKey = config.apiKeys.gemini[0]; // Use first available key
89
+ }
90
+
91
+ if (!apiKey) {
92
+ ws.send(JSON.stringify({ type: 'error', message: 'No Server API Key Configured' }));
93
+ ws.close();
94
+ return;
95
+ }
96
+
97
+ // 2. Initialize Gemini SDK (Dynamic Import)
98
+ const { GoogleGenAI, Modality } = await import("@google/genai");
99
+ const client = new GoogleGenAI({ apiKey });
100
+
101
+ geminiSession = await client.live.connect({
102
+ model: 'gemini-2.5-flash-native-audio-preview-09-2025',
103
+ config: {
104
+ responseModalities: [Modality.AUDIO],
105
+ speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Kore' } } },
106
+ systemInstruction: { parts: [{ text: "你是一位乐于助人的校园AI助手。请必须使用中文(普通话)回答。回答要简短、自然、口语化。不要使用Markdown格式。" }] },
107
+ outputAudioTranscription: { model: true }
108
+ }
109
+ });
110
+
111
+ isGeminiConnected = true;
112
+ console.log('🔗 Backend connected to Gemini Live');
113
+ ws.send(JSON.stringify({ type: 'status', content: 'CONNECTED' }));
114
+
115
+ // 3. Forward Gemini -> Client
116
+ // Note: The SDK's session object relies on callbacks/listeners in newer versions,
117
+ // or uses a stream iterator. Adjusting based on standard SDK usage for proxying.
118
+ // We need to attach listeners to the session manually if the SDK supports it,
119
+ // or loop through the incoming stream.
120
+
121
+ // Since `live.connect` returns a session that is also an event emitter (typically) or we passed callbacks.
122
+ // Let's re-instantiate using callbacks approach for easier piping which matches the frontend logic I wrote earlier,
123
+ // but now applied to backend.
124
+
125
+ // RE-DO Connect with Callbacks to handle stream piping properly
126
+ geminiSession = await client.live.connect({
127
+ model: 'gemini-2.5-flash-native-audio-preview-09-2025',
128
+ config: {
129
+ responseModalities: [Modality.AUDIO],
130
+ speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Kore' } } },
131
+ systemInstruction: { parts: [{ text: "你是一位乐于助人的校园AI助手。请必须使用中文(普通话)回答。回答要简短、自然、口语化。不要使用Markdown格式。" }] },
132
+ outputAudioTranscription: { model: true }
133
+ },
134
+ callbacks: {
135
+ onopen: () => {
136
+ console.log('Gemini Stream Open');
137
+ },
138
+ onmessage: (msg) => {
139
+ // Extract relevant data and send to frontend
140
+ const serverContent = msg.serverContent;
141
+
142
+ // Audio
143
+ const audioData = serverContent?.modelTurn?.parts?.[0]?.inlineData?.data;
144
+ if (audioData) {
145
+ if (ws.readyState === WebSocket.OPEN) {
146
+ ws.send(JSON.stringify({ type: 'audio', data: audioData }));
147
+ }
148
+ }
149
+
150
+ // Text
151
+ const text = serverContent?.modelTurn?.parts?.[0]?.text;
152
+ if (text) {
153
+ if (ws.readyState === WebSocket.OPEN) {
154
+ ws.send(JSON.stringify({ type: 'text', content: text }));
155
+ }
156
+ }
157
+
158
+ // Turn Complete
159
+ if (serverContent?.turnComplete) {
160
+ if (ws.readyState === WebSocket.OPEN) {
161
+ ws.send(JSON.stringify({ type: 'turnComplete' }));
162
+ }
163
+ }
164
+ },
165
+ onclose: () => {
166
+ console.log('Gemini Stream Closed');
167
+ if (ws.readyState === WebSocket.OPEN) ws.close();
168
+ },
169
+ onerror: (err) => {
170
+ console.error('Gemini Stream Error', err);
171
+ if (ws.readyState === WebSocket.OPEN) ws.send(JSON.stringify({ type: 'error', message: 'Upstream Error' }));
172
+ }
173
+ }
174
+ });
175
+
176
+ // 4. Forward Client -> Gemini
177
+ ws.on('message', (message) => {
178
+ if (!isGeminiConnected) return;
179
+ try {
180
+ const parsed = JSON.parse(message);
181
+ if (parsed.type === 'audio') {
182
+ // Frontend sends base64, SDK needs base64 string directly in media object
183
+ // data: { mimeType: ..., data: ... }
184
+ geminiSession.sendRealtimeInput({
185
+ media: {
186
+ mimeType: 'audio/pcm;rate=16000',
187
+ data: parsed.data
188
+ }
189
+ });
190
+ }
191
+ } catch (e) {
192
+ console.error('Error parsing client message', e);
193
+ }
194
+ });
195
+
196
+ ws.on('close', () => {
197
+ console.log('Client disconnected');
198
+ if (geminiSession) {
199
+ // Try to close session if method exists, otherwise it might close automatically
200
+ try { geminiSession.close(); } catch(e){}
201
+ }
202
+ });
203
+
204
+ } catch (e) {
205
+ console.error('WebSocket Handshake Error:', e);
206
+ ws.send(JSON.stringify({ type: 'error', message: e.message }));
207
+ ws.close();
208
+ }
209
+ });
210
+
211
+ // ... (Rest of Express logic) ...
212
+
213
  const getQueryFilter = (req) => {
214
  const s = req.headers['x-school-id'];
215
  const role = req.headers['x-user-role'];
 
260
  // MOUNT AI ROUTES
261
  app.use('/api/ai', aiRoutes);
262
 
 
 
263
  // --- TODO LIST ENDPOINTS ---
264
  app.get('/api/todos', async (req, res) => {
265
  const username = req.headers['x-user-username'];
 
307
  // Create or Update by Logic (Upsert)
308
  app.post('/api/schedules', async (req, res) => {
309
  try {
 
310
  const filter = {
311
  className: req.body.className,
312
  dayOfWeek: req.body.dayOfWeek,
 
319
  await ScheduleModel.findOneAndUpdate(filter, injectSchoolId(req, req.body), {upsert:true});
320
  res.json({});
321
  } catch (e) {
 
322
  res.status(500).json({ error: e.message });
323
  }
324
  });
325
 
326
  app.delete('/api/schedules', async (req, res) => {
327
  try {
 
328
  if (req.query.id) {
329
  await ScheduleModel.findByIdAndDelete(req.query.id);
330
  } else {
 
336
  }
337
  });
338
 
 
339
  app.put('/api/users/:id/menu-order', async (req, res) => {
340
  const { menuOrder } = req.body;
341
  await User.findByIdAndUpdate(req.params.id, { menuOrder });
342
  res.json({ success: true });
343
  });
344
 
 
345
  app.get('/api/classes/:className/teachers', async (req, res) => {
346
  const { className } = req.params;
347
  const schoolId = req.headers['x-school-id'];
 
685
  app.post('/api/batch-delete', async (req, res) => { if(req.body.type==='student') await Student.deleteMany({_id:{$in:req.body.ids}}); if(req.body.type==='score') await Score.deleteMany({_id:{$in:req.body.ids}}); res.json({}); });
686
 
687
  app.get('*', (req, res) => { res.sendFile(path.join(__dirname, 'dist', 'index.html')); });
688
+ server.listen(PORT, () => console.log(`🚀 Server running on port ${PORT}`));