Spaces:

dvc890
/

stud-manager

Sleeping

App Files Files Community

dvc890 commited on Dec 14, 2025

Commit

9df572c

verified ·

1 Parent(s): 0e885ae

Update components/LiveAssistant.tsx

Browse files

Files changed (1) hide show

components/LiveAssistant.tsx +319 -263

components/LiveAssistant.tsx CHANGED Viewed

@@ -1,11 +1,16 @@
 import React, { useState, useRef, useEffect } from 'react';
 import { GoogleGenAI, LiveServerMessage, Modality } from "@google/genai";
-import { Mic, X, MessageCircle, Volume2, Power, Play, Square, Loader2, Bot, ChevronDown, RefreshCw } from 'lucide-react';
 import { api } from '../services/api';
-// --- Helper Functions for Audio Processing ---
-function decode(base64: string) {
   const binaryString = atob(base64);
   const len = binaryString.length;
   const bytes = new Uint8Array(len);
@@ -15,73 +20,29 @@ function decode(base64: string) {
   return bytes;
 }
-async function decodeAudioData(
-  data: Uint8Array,
-  ctx: AudioContext,
-  sampleRate: number,
-  numChannels: number,
-): Promise<AudioBuffer> {
-  const dataInt16 = new Int16Array(data.buffer);
-  const frameCount = dataInt16.length / numChannels;
-  const buffer = ctx.createBuffer(numChannels, frameCount, sampleRate);
-  for (let channel = 0; channel < numChannels; channel++) {
-    const channelData = buffer.getChannelData(channel);
-    for (let i = 0; i < frameCount; i++) {
-      channelData[i] = dataInt16[i * numChannels + channel] / 32768.0;
-    }
-  }
-  return buffer;
-}
-function createBlob(data: Float32Array): { data: string; mimeType: string } {
-  const l = data.length;
-  const int16 = new Int16Array(l);
-  for (let i = 0; i < l; i++) {
-    int16[i] = data[i] * 32768;
-  }
-  // Custom encode function instead of js-base64
-  let binary = '';
-  const bytes = new Uint8Array(int16.buffer);
-  const len = bytes.byteLength;
-  for (let i = 0; i < len; i++) {
-    binary += String.fromCharCode(bytes[i]);
-  }
-  const base64 = btoa(binary);
-  return {
-    data: base64,
-    mimeType: 'audio/pcm;rate=16000',
-  };
-}
 export const LiveAssistant: React.FC = () => {
   const [isOpen, setIsOpen] = useState(false);
-  const [isConnected, setIsConnected] = useState(false);
-  const [isMicOn, setIsMicOn] = useState(false); // Toggle for "Hold to Talk" simulation
-  const [isSpeaking, setIsSpeaking] = useState(false); // Model speaking
-  const [logs, setLogs] = useState<{role: 'user'|'model', text: string}[]>([]);
   const [apiKey, setApiKey] = useState('');
-  const [isInitializing, setIsInitializing] = useState(false);
-  // Audio Refs
   const audioContextRef = useRef<AudioContext | null>(null);
-  const audioStreamRef = useRef<MediaStream | null>(null);
-  const inputProcessorRef = useRef<ScriptProcessorNode | null>(null);
-  const inputSourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
-  const outputNodeRef = useRef<GainNode | null>(null);
-  const nextStartTimeRef = useRef<number>(0);
-  const activeSourcesRef = useRef<Set<AudioBufferSourceNode>>(new Set());
-  // Session Ref
-  const sessionPromiseRef = useRef<Promise<any> | null>(null);
-  // 1. Get Key on Mount (if allowed)
   useEffect(() => {
-      // Only fetch key if user opens the widget to save resources
-      if (isOpen && !apiKey && !isInitializing) {
-          setIsInitializing(true);
           fetch('/api/ai/live-access', {
               headers: {
                   'x-user-username': api.auth.getCurrentUser()?.username || '',
@@ -91,204 +52,264 @@ export const LiveAssistant: React.FC = () => {
           .then(res => res.json())
           .then(data => {
               if (data.key) setApiKey(data.key);
-              setIsInitializing(false);
           })
-          .catch(() => setIsInitializing(false));
       }
   }, [isOpen]);
-  const connect = async () => {
-      if (!apiKey) return;
-      try {
-          setIsInitializing(true);
-          console.log("Starting Live Connection...");
-          // Setup Audio Context
           // @ts-ignore
           const AudioCtor = window.AudioContext || window.webkitAudioContext;
-          const ctx = new AudioCtor({sampleRate: 24000}); // Output rate usually 24k
-          audioContextRef.current = ctx;
-          outputNodeRef.current = ctx.createGain();
-          outputNodeRef.current.connect(ctx.destination);
-          // Setup Input (Mic) - But don't connect processor yet until "Mic On"
-          const stream = await navigator.mediaDevices.getUserMedia({ audio: {
-              sampleRate: 16000,
-              channelCount: 1,
-              echoCancellation: true,
-              noiseSuppression: true
-          }});
-          audioStreamRef.current = stream;
-          console.log("Microphone access granted");
-          // Initialize Gemini Client
           const client = new GoogleGenAI({ apiKey });
-          const sessionPromise = client.live.connect({
               model: 'gemini-2.5-flash-native-audio-preview-09-2025',
-              config: {
-                  responseModalities: [Modality.AUDIO],
-                  speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Kore' } } },
-                  systemInstruction: { parts: [{ text: "你是一位乐于助人的校园AI助手。请始终使用中文回答。请简短、自然地进行对话，不要使用 Markdown 格式，不要进行搜索。" }] },
-                  outputAudioTranscription: { model: true } // Enable transcription to show text
-              },
               callbacks: {
                   onopen: () => {
-                      console.log("Live Session Opened");
-                      setIsConnected(true);
-                      setIsInitializing(false);
-                      setLogs(prev => [...prev, {role: 'model', text: '已连接，请点击麦克风说话。'}]);
                   },
-                  onmessage: async (msg: LiveServerMessage) => {
-                      // Handle Audio Output
-                      const audioData = msg.serverContent?.modelTurn?.parts?.[0]?.inlineData?.data;
-                      if (audioData && audioContextRef.current && outputNodeRef.current) {
-                          setIsSpeaking(true);
-                          const ctx = audioContextRef.current;
-                          const buffer = await decodeAudioData(decode(audioData), ctx, 24000, 1);
-                          const source = ctx.createBufferSource();
-                          source.buffer = buffer;
-                          source.connect(outputNodeRef.current);
-                          // Scheduling
-                          const now = ctx.currentTime;
-                          const startTime = Math.max(now, nextStartTimeRef.current);
-                          source.start(startTime);
-                          nextStartTimeRef.current = startTime + buffer.duration;
-                          activeSourcesRef.current.add(source);
-                          source.onended = () => {
-                              activeSourcesRef.current.delete(source);
-                              if (activeSourcesRef.current.size === 0) setIsSpeaking(false);
-                          };
-                      }
-                      // Handle Text Transcription
-                      const transcript = msg.serverContent?.modelTurn?.parts?.[0]?.text;
-                      if (transcript) {
-                          // Update last model log or add new
-                          setLogs(prev => {
-                              const last = prev[prev.length - 1];
-                              const isInitialMessage = last && last.text === '已连接，请点击麦克风说话。';
-                              // IMPORTANT: Do not append to the initial system message
-                              if (last && last.role === 'model' && !isInitialMessage && !last.text.endsWith('\n')) {
-                                  // Append to existing turn (simplified logic)
-                                  return [...prev.slice(0, -1), { ...last, text: last.text + transcript }];
-                              }
-                              return [...prev, { role: 'model', text: transcript }];
-                          });
-                      }
-                      // Handle Transcription of User Input (Echo)
-                      // @ts-ignore - types might be missing in some SDK versions
-                      const userTranscript = msg.serverContent?.outputAudioTranscription?.text || msg.serverContent?.turnComplete && "User input processed";
-                      // Note: Standard API usually doesn't echo user transcript in serverContent easily without config, relying on model turn.
                   },
                   onclose: () => {
-                      console.log("Live Session Closed");
-                      setIsConnected(false);
-                      setLogs(prev => [...prev, {role: 'model', text: '连接已断开'}]);
                   },
                   onerror: (e) => {
-                      console.error("Live API Error", e);
-                      setIsConnected(false);
                   }
               }
           });
-          sessionPromiseRef.current = sessionPromise;
       } catch (e) {
-          console.error("Connection failed", e);
-          setIsInitializing(false);
       }
   };
-  const disconnect = () => {
-      // Close Session
-      if (sessionPromiseRef.current) {
-          sessionPromiseRef.current.then(s => s.close());
-          sessionPromiseRef.current = null;
-      }
-      // Cleanup Audio
-      if (audioStreamRef.current) audioStreamRef.current.getTracks().forEach(t => t.stop());
-      if (inputProcessorRef.current) inputProcessorRef.current.disconnect();
-      if (inputSourceRef.current) inputSourceRef.current.disconnect();
-      if (audioContextRef.current) audioContextRef.current.close();
-      setIsConnected(false);
-      setIsMicOn(false);
-      setLogs([]);
   };
-  const toggleMic = async () => {
-      if (!isConnected || !audioContextRef.current || !sessionPromiseRef.current || !audioStreamRef.current) return;
-      const newMicState = !isMicOn;
-      setIsMicOn(newMicState);
-      console.log("Toggling Mic:", newMicState ? "ON" : "OFF");
-      if (newMicState) {
-          // START SENDING
-          const ctx = audioContextRef.current;
-          // Input context sample rate usually needs to match stream, but we resample manually or rely on createScriptProcessor logic
-          // Simple approach: Use 16k context for input if possible, or downsample.
-          // Here we assume ctx is created at 24k (output), so input might need resampling or just sending as is if API tolerates.
-          // Gemini API expects 16k for input usually.
-          const inputCtx = new (window.AudioContext || (window as any).webkitAudioContext)({ sampleRate: 16000 });
-          const source = inputCtx.createMediaStreamSource(audioStreamRef.current);
-          const processor = inputCtx.createScriptProcessor(4096, 1, 1);
-          let chunkCount = 0;
           processor.onaudioprocess = (e) => {
-              if (!newMicState) return; // Guard
               const inputData = e.inputBuffer.getChannelData(0);
-              const blob = createBlob(inputData);
-              // Debug log every 20 chunks (~0.5s) to avoid spam but confirm data flow
-              chunkCount++;
-              if (chunkCount % 20 === 0) console.log("Sending audio chunk...", chunkCount);
-              sessionPromiseRef.current?.then(session => {
-                  session.sendRealtimeInput({ media: { mimeType: 'audio/pcm;rate=16000', data: blob.data } });
-              });
           };
           source.connect(processor);
-          processor.connect(inputCtx.destination);
-          // Store refs to disconnect later
-          // @ts-ignore
-          inputProcessorRef.current = processor;
-          // @ts-ignore
-          inputSourceRef.current = source;
-          // Store input context to close? Usually separate from output context to handle diff sample rates easily.
-      } else {
-          // STOP SENDING
-          console.log("Stopping audio stream...");
-          if (inputProcessorRef.current) {
-              inputProcessorRef.current.disconnect();
-              inputProcessorRef.current = null;
-          }
-          if (inputSourceRef.current) {
-              inputSourceRef.current.disconnect();
-              inputSourceRef.current = null;
-          }
       }
   };
-  // Auto-disconnect when closing modal
-  useEffect(() => {
-      if (!isOpen && isConnected) disconnect();
-  }, [isOpen]);
-  if (!api.auth.getCurrentUser()) return null; // Safety check
   return (
     <div className="fixed bottom-6 right-6 z-[9999]">
@@ -302,74 +323,109 @@ export const LiveAssistant: React.FC = () => {
             </button>
         )}
-        {/* Expanded Interface */}
         {isOpen && (
-            <div className="bg-white w-80 md:w-96 rounded-2xl shadow-2xl border border-gray-200 overflow-hidden flex flex-col animate-in slide-in-from-bottom-5 fade-in duration-300" style={{maxHeight: '600px', height: '80vh'}}>
                 {/* Header */}
-                <div className="bg-gradient-to-r from-indigo-600 to-purple-600 p-4 flex justify-between items-center text-white shrink-0">
                     <div className="flex items-center gap-2">
-                        <Bot size={20}/>
-                        <span className="font-bold">AI 语音助理</span>
                     </div>
-                    <div className="flex items-center gap-2">
-                        <button onClick={disconnect} title="重置" className="hover:bg-white/20 p-1.5 rounded-full"><RefreshCw size={16}/></button>
-                        <button onClick={() => setIsOpen(false)} title="最小化" className="hover:bg-white/20 p-1.5 rounded-full"><ChevronDown size={20}/></button>
                     </div>
                 </div>
-                {/* Content / Logs */}
-                <div className="flex-1 bg-gray-50 p-4 overflow-y-auto space-y-3 custom-scrollbar">
-                    {logs.length === 0 && isConnected && (
-                        <div className="text-center text-gray-400 mt-10 text-sm">
-                            <p>点击下方麦克风开始说话</p>
-                            <p className="text-xs mt-2 opacity-70">Gemini 2.5 Flash (Native Audio)</p>
-                        </div>
-                    )}
-                    {logs.map((log, i) => (
-                        <div key={i} className={`flex ${log.role === 'user' ? 'justify-end' : 'justify-start'}`}>
-                            <div className={`max-w-[85%] p-3 rounded-2xl text-sm ${log.role === 'user' ? 'bg-indigo-600 text-white rounded-tr-none' : 'bg-white border border-gray-200 text-gray-800 rounded-tl-none shadow-sm'}`}>
-                                {log.text}
-                            </div>
-                        </div>
-                    ))}
-                    {isSpeaking && (
-                        <div className="flex justify-start">
-                            <div className="bg-white border border-gray-200 px-4 py-2 rounded-full shadow-sm flex items-center gap-2">
-                                <span className="flex gap-1 h-3 items-end">
-                                    <span className="w-1 bg-indigo-500 animate-[bounce_1s_infinite] h-2"></span>
-                                    <span className="w-1 bg-indigo-500 animate-[bounce_1.2s_infinite] h-3"></span>
-                                    <span className="w-1 bg-indigo-500 animate-[bounce_0.8s_infinite] h-1"></span>
-                                </span>
-                                <span className="text-xs text-indigo-600 font-bold">正在说话...</span>
-                            </div>
                         </div>
-                    )}
                 </div>
                 {/* Controls */}
-                <div className="p-4 bg-white border-t border-gray-100 shrink-0">
-                    {!isConnected ? (
                         <button
-                            onClick={connect}
-                            disabled={isInitializing || !apiKey}
-                            className="w-full py-3 bg-indigo-600 hover:bg-indigo-700 text-white rounded-xl font-bold flex items-center justify-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
                         >
-                            {isInitializing ? <Loader2 className="animate-spin"/> : <Power size={18}/>}
-                            {isInitializing ? '正在连接...' : '开启语音会话'}
                         </button>
                     ) : (
-                        <div className="flex flex-col gap-3">
-                            <div className="flex items-center justify-center">
                                 <button
-                                    onClick={toggleMic}
-                                    className={`w-16 h-16 rounded-full flex items-center justify-center shadow-lg transition-all transform active:scale-95 ${isMicOn ? 'bg-red-500 text-white animate-pulse ring-4 ring-red-100' : 'bg-indigo-100 text-indigo-600 hover:bg-indigo-200'}`}
                                 >
-                                    {isMicOn ? <Square fill="currentColor" size={24}/> : <Mic size={28}/>}
                                 </button>
                             </div>
-                            <p className="text-center text-xs text-gray-400 font-medium">
-                                {isMicOn ? '正在聆听... 点击停止发送' : '点击麦克风开始说话'}
-                            </p>
                         </div>
                     )}
                 </div>

 import React, { useState, useRef, useEffect } from 'react';
 import { GoogleGenAI, LiveServerMessage, Modality } from "@google/genai";
+import { Mic, X, Power, Loader2, Bot, Volume2, Radio, Activity, RefreshCw, ChevronDown } from 'lucide-react';
 import { api } from '../services/api';
+// --- Audio Types & Helpers ---
+// 16kHz for Gemini Input
+const INPUT_SAMPLE_RATE = 16000;
+// 24kHz for Gemini Output
+const OUTPUT_SAMPLE_RATE = 24000;
+function base64ToUint8Array(base64: string) {
   const binaryString = atob(base64);
   const len = binaryString.length;
   const bytes = new Uint8Array(len);
   return bytes;
 }
 export const LiveAssistant: React.FC = () => {
   const [isOpen, setIsOpen] = useState(false);
+  const [status, setStatus] = useState<'DISCONNECTED' | 'CONNECTING' | 'CONNECTED' | 'LISTENING' | 'THINKING' | 'SPEAKING'>('DISCONNECTED');
   const [apiKey, setApiKey] = useState('');
+  const [transcript, setTranscript] = useState(''); // Current subtitle
+  const [volumeLevel, setVolumeLevel] = useState(0);
+  // --- Refs for managing Audio & Session Lifecycle ---
   const audioContextRef = useRef<AudioContext | null>(null);
+  const mediaStreamRef = useRef<MediaStream | null>(null);
+  const processorRef = useRef<ScriptProcessorNode | null>(null);
+  const sourceNodeRef = useRef<MediaStreamAudioSourceNode | null>(null);
+  const gainNodeRef = useRef<GainNode | null>(null);
+  const sessionRef = useRef<any>(null); // The GenAI Session
+  const nextPlayTimeRef = useRef<number>(0);
+  const analyserRef = useRef<AnalyserNode | null>(null);
+  const volumeIntervalRef = useRef<any>(null);
+  // 1. Fetch Key on Open
   useEffect(() => {
+      if (isOpen && !apiKey) {
+          api.ai.getStats().catch(() => {}); // Warm up
           fetch('/api/ai/live-access', {
               headers: {
                   'x-user-username': api.auth.getCurrentUser()?.username || '',
           .then(res => res.json())
           .then(data => {
               if (data.key) setApiKey(data.key);
           })
+          .catch(err => console.error("Failed to get live key", err));
       }
   }, [isOpen]);
+  // 2. Clean up on unmount or close
+  useEffect(() => {
+      if (!isOpen) {
+          handleDisconnect();
+      }
+      return () => {
+          handleDisconnect();
+      };
+  }, [isOpen]);
+  // Visualizer Loop
+  useEffect(() => {
+      if (status === 'DISCONNECTED') {
+          setVolumeLevel(0);
+          return;
+      }
+      volumeIntervalRef.current = setInterval(() => {
+          if (analyserRef.current) {
+              const array = new Uint8Array(analyserRef.current.frequencyBinCount);
+              analyserRef.current.getByteFrequencyData(array);
+              const avg = array.reduce((a,b)=>a+b) / array.length;
+              setVolumeLevel(Math.min(100, avg * 1.5));
+          }
+      }, 100);
+      return () => clearInterval(volumeIntervalRef.current);
+  }, [status]);
+  const initAudioContext = () => {
+      if (!audioContextRef.current) {
           // @ts-ignore
           const AudioCtor = window.AudioContext || window.webkitAudioContext;
+          const ctx = new AudioCtor({ sampleRate: OUTPUT_SAMPLE_RATE });
+          const analyser = ctx.createAnalyser();
+          analyser.fftSize = 64;
+          const gain = ctx.createGain();
+          gain.connect(ctx.destination); // For output
+          audioContextRef.current = ctx;
+          analyserRef.current = analyser;
+          gainNodeRef.current = gain;
+      }
+      if (audioContextRef.current.state === 'suspended') {
+          audioContextRef.current.resume();
+      }
+  };
+  const handleConnect = async () => {
+      if (!apiKey) return;
+      setStatus('CONNECTING');
+      setTranscript('正在建立连接...');
+      try {
+          initAudioContext();
           const client = new GoogleGenAI({ apiKey });
+          const session = await client.live.connect({
               model: 'gemini-2.5-flash-native-audio-preview-09-2025',
               callbacks: {
                   onopen: () => {
+                      console.log('Session Open');
                   },
+                  onmessage: (msg: LiveServerMessage) => {
+                      handleServerMessage(msg);
                   },
                   onclose: () => {
+                      console.log('Session Close');
+                      handleDisconnect();
                   },
                   onerror: (e) => {
+                      console.error('Session Error', e);
+                      setTranscript('连接发生错误，请重试');
+                      handleDisconnect();
                   }
+              },
+              config: {
+                  responseModalities: [Modality.AUDIO],
+                  speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Kore' } } },
+                  // Strong instruction to force Chinese
+                  systemInstruction: "You are a helpful school assistant. You MUST reply in spoken Chinese (Mandarin). Keep answers concise and friendly. Do not use markdown.",
               }
           });
+          sessionRef.current = session;
+          setStatus('CONNECTED');
+          setTranscript('连接成功，请按住麦克风说话');
       } catch (e) {
+          console.error("Connect failed", e);
+          setStatus('DISCONNECTED');
+          setTranscript('连接失败');
       }
   };
+  const handleServerMessage = async (msg: LiveServerMessage) => {
+      const serverContent = msg.serverContent;
+      // 1. Audio Output
+      const audioData = serverContent?.modelTurn?.parts?.[0]?.inlineData?.data;
+      if (audioData && audioContextRef.current) {
+          setStatus('SPEAKING'); // Receiving audio means speaking
+          const ctx = audioContextRef.current;
+          const bytes = base64ToUint8Array(audioData);
+          // Decode Raw PCM (16-bit, 24kHz, Mono)
+          const int16 = new Int16Array(bytes.buffer);
+          const float32 = new Float32Array(int16.length);
+          for(let i=0; i<int16.length; i++) float32[i] = int16[i] / 32768.0;
+          const buffer = ctx.createBuffer(1, float32.length, OUTPUT_SAMPLE_RATE);
+          buffer.copyToChannel(float32, 0);
+          const source = ctx.createBufferSource();
+          source.buffer = buffer;
+          // Connect to analyser for visuals
+          if (analyserRef.current && gainNodeRef.current) {
+              source.connect(analyserRef.current);
+              analyserRef.current.connect(gainNodeRef.current);
+          } else {
+              source.connect(ctx.destination);
+          }
+          // Schedule gapless playback
+          const now = ctx.currentTime;
+          const startTime = Math.max(now, nextPlayTimeRef.current);
+          source.start(startTime);
+          nextPlayTimeRef.current = startTime + buffer.duration;
+          source.onended = () => {
+              // If gap is large, we assume finished
+              if (ctx.currentTime >= nextPlayTimeRef.current - 0.1) {
+                  setStatus('CONNECTED'); // Back to idle
+              }
+          };
+      }
+      // 2. Text Transcription (Subtitle)
+      // Note: The model sometimes returns 'thought' or 'search' logs here.
+      // We rely on audio mostly, but show text if it looks like a response.
+      const text = serverContent?.modelTurn?.parts?.[0]?.text;
+      if (text) {
+          if (!text.startsWith('**') && !text.includes('Finding')) {
+             setTranscript(text);
+          }
+      }
+      // 3. User Turn Finished (Model starts thinking)
+      if (serverContent?.turnComplete) {
+          setStatus('THINKING');
+      }
   };
+  const startRecording = async () => {
+      if (status !== 'CONNECTED' && status !== 'SPEAKING') return;
+      try {
+          // Interrupt model if speaking
+          if (status === 'SPEAKING') {
+              // We can send an interruption message or just stop playing, but API handles new input as interrupt usually
+              setStatus('CONNECTED');
+          }
+          const stream = await navigator.mediaDevices.getUserMedia({ audio: { sampleRate: INPUT_SAMPLE_RATE } });
+          mediaStreamRef.current = stream;
+          const ctx = new (window.AudioContext || (window as any).webkitAudioContext)({ sampleRate: INPUT_SAMPLE_RATE });
+          const source = ctx.createMediaStreamSource(stream);
+          const processor = ctx.createScriptProcessor(4096, 1, 1);
           processor.onaudioprocess = (e) => {
               const inputData = e.inputBuffer.getChannelData(0);
+              // Downconvert Float32 to Int16 for Gemini
+              const l = inputData.length;
+              const int16Data = new Int16Array(l);
+              for (let i = 0; i < l; i++) {
+                  int16Data[i] = inputData[i] * 32768;
+              }
+              // Convert to Base64 manually to avoid large lib dependency
+              let binary = '';
+              const bytes = new Uint8Array(int16Data.buffer);
+              const len = bytes.byteLength;
+              for (let i = 0; i < len; i++) {
+                  binary += String.fromCharCode(bytes[i]);
+              }
+              const b64 = btoa(binary);
+              if (sessionRef.current) {
+                  sessionRef.current.sendRealtimeInput({
+                      media: { mimeType: `audio/pcm;rate=${INPUT_SAMPLE_RATE}`, data: b64 }
+                  });
+              }
           };
           source.connect(processor);
+          processor.connect(ctx.destination);
+          sourceNodeRef.current = source;
+          processorRef.current = processor;
+          setStatus('LISTENING');
+          setTranscript('正在聆听...');
+      } catch (e) {
+          console.error(e);
+          setTranscript('无法访问麦克风');
       }
   };
+  const stopRecording = () => {
+      if (status !== 'LISTENING') return;
+      // Cleanup Mic Processing
+      if (processorRef.current) {
+          processorRef.current.disconnect();
+          processorRef.current = null;
+      }
+      if (sourceNodeRef.current) {
+          sourceNodeRef.current.disconnect();
+          sourceNodeRef.current = null;
+      }
+      if (mediaStreamRef.current) {
+          mediaStreamRef.current.getTracks().forEach(t => t.stop());
+          mediaStreamRef.current = null;
+      }
+      setStatus('THINKING');
+      setTranscript('思考中...');
+  };
+  const handleDisconnect = () => {
+      if (sessionRef.current) {
+          // sessionRef.current.close(); // SDK might not have close method exposed directly depending on version, but usually does
+          sessionRef.current = null;
+      }
+      // Cleanup Audio
+      if (audioContextRef.current) {
+          audioContextRef.current.suspend(); // Suspend instead of close to reuse? Or close.
+          // For robustness, let's just close and nullify.
+          audioContextRef.current.close().catch(()=>{});
+          audioContextRef.current = null;
+      }
+      stopRecording(); // Ensure mic is off
+      setStatus('DISCONNECTED');
+      setTranscript('');
+      nextPlayTimeRef.current = 0;
+  };
+  if (!api.auth.getCurrentUser()) return null;
   return (
     <div className="fixed bottom-6 right-6 z-[9999]">
             </button>
         )}
+        {/* Call Interface */}
         {isOpen && (
+            <div className="bg-slate-900 w-80 md:w-96 rounded-3xl shadow-2xl border border-slate-700 overflow-hidden flex flex-col animate-in slide-in-from-bottom-5 fade-in duration-300 h-[500px]">
                 {/* Header */}
+                <div className="bg-slate-800/50 p-4 flex justify-between items-center text-white shrink-0 backdrop-blur-md">
                     <div className="flex items-center gap-2">
+                        <div className={`w-2 h-2 rounded-full ${status === 'DISCONNECTED' ? 'bg-red-500' : 'bg-green-500 animate-pulse'}`}></div>
+                        <span className="font-bold text-sm">AI 实时通话</span>
                     </div>
+                    <div className="flex gap-2">
+                        <button onClick={handleDisconnect} title="重置" className="hover:bg-white/10 p-1.5 rounded-full text-gray-400 hover:text-white transition-colors"><RefreshCw size={16}/></button>
+                        <button onClick={() => setIsOpen(false)} title="最小化" className="hover:bg-white/10 p-1.5 rounded-full text-gray-400 hover:text-white transition-colors"><ChevronDown size={20}/></button>
                     </div>
                 </div>
+                {/* Main Visual Area */}
+                <div className="flex-1 flex flex-col items-center justify-center p-6 relative">
+                    {/* Visualizer Circle */}
+                    <div className={`relative w-40 h-40 flex items-center justify-center transition-all duration-500 ${status === 'LISTENING' ? 'scale-110' : 'scale-100'}`}>
+                        {/* Outer Glow */}
+                        <div
+                            className={`absolute inset-0 rounded-full blur-2xl transition-all duration-300 ${
+                                status === 'SPEAKING' ? 'bg-blue-500/40' :
+                                status === 'LISTENING' ? 'bg-green-500/40' :
+                                status === 'THINKING' ? 'bg-purple-500/40' : 'bg-gray-500/10'
+                            }`}
+                            style={{ opacity: 0.5 + (volumeLevel / 200) }}
+                        ></div>
+                        {/* Dynamic Rings */}
+                        <div
+                            className={`absolute inset-0 rounded-full border-2 border-white/10 transition-all duration-100`}
+                            style={{ transform: `scale(${1 + volumeLevel/100})` }}
+                        ></div>
+                        <div
+                            className={`absolute inset-0 rounded-full border border-white/20 transition-all duration-100 delay-75`}
+                            style={{ transform: `scale(${1 + volumeLevel/150})` }}
+                        ></div>
+                        {/* Central Icon */}
+                        <div className={`z-10 w-24 h-24 rounded-full flex items-center justify-center text-white shadow-xl transition-colors duration-500 ${
+                            status === 'SPEAKING' ? 'bg-blue-600' :
+                            status === 'LISTENING' ? 'bg-green-600' :
+                            status === 'THINKING' ? 'bg-purple-600' :
+                            status === 'CONNECTED' ? 'bg-slate-700' : 'bg-slate-800'
+                        }`}>
+                            {status === 'SPEAKING' ? <Volume2 size={40} className="animate-pulse"/> :
+                             status === 'LISTENING' ? <Mic size={40} className="animate-bounce"/> :
+                             status === 'THINKING' ? <Loader2 size={40} className="animate-spin"/> :
+                             status === 'CONNECTED' ? <Radio size={40}/> : <Power size={40}/>}
                         </div>
+                    </div>
+                    {/* Status Text */}
+                    <div className="mt-8 text-center px-4 w-full">
+                        <p className={`text-sm font-bold uppercase tracking-wider mb-2 ${
+                            status === 'SPEAKING' ? 'text-blue-400' :
+                            status === 'LISTENING' ? 'text-green-400' :
+                            status === 'THINKING' ? 'text-purple-400' : 'text-gray-500'
+                        }`}>
+                            {status === 'DISCONNECTED' ? '未连接' :
+                             status === 'CONNECTING' ? '连接中...' :
+                             status === 'CONNECTED' ? '准备就绪' :
+                             status === 'LISTENING' ? '正在聆听...' :
+                             status === 'THINKING' ? '思考中...' : '正在说话'}
+                        </p>
+                        <p className="text-white text-lg font-medium leading-relaxed min-h-[3rem] line-clamp-3 transition-all">
+                            {transcript}
+                        </p>
+                    </div>
                 </div>
                 {/* Controls */}
+                <div className="p-6 pb-8 bg-slate-800/50 backdrop-blur-md border-t border-slate-700 flex justify-center">
+                    {status === 'DISCONNECTED' ? (
                         <button
+                            onClick={handleConnect}
+                            disabled={!apiKey}
+                            className="w-full py-4 bg-blue-600 hover:bg-blue-500 text-white rounded-2xl font-bold flex items-center justify-center gap-2 transition-all hover:scale-[1.02] active:scale-95 disabled:opacity-50 disabled:cursor-not-allowed"
                         >
+                            <Power size={20}/> 开启 AI 语音
                         </button>
                     ) : (
+                        <div className="flex items-center gap-4 w-full justify-center">
+                            <div className="relative group">
                                 <button
+                                    onMouseDown={startRecording}
+                                    onMouseUp={stopRecording}
+                                    onTouchStart={(e) => { e.preventDefault(); startRecording(); }}
+                                    onTouchEnd={(e) => { e.preventDefault(); stopRecording(); }}
+                                    className={`w-20 h-20 rounded-full flex items-center justify-center shadow-lg transition-all transform ${
+                                        status === 'LISTENING' ? 'bg-green-500 scale-110 ring-4 ring-green-500/30' :
+                                        'bg-white text-slate-900 hover:bg-gray-100'
+                                    }`}
                                 >
+                                    <Mic size={32} fill={status==='LISTENING' ? 'white' : 'currentColor'} className={status==='LISTENING'?'text-white':''}/>
                                 </button>
+                                {status === 'CONNECTED' && (
+                                    <div className="absolute -bottom-8 left-1/2 -translate-x-1/2 text-xs text-gray-400 whitespace-nowrap opacity-0 group-hover:opacity-100 transition-opacity">
+                                        按住说话
+                                    </div>
+                                )}
                             </div>
                         </div>
                     )}
                 </div>