import React, { useEffect, useState, useRef } from 'react'; import { Mic, X, MicOff, AlertCircle } from 'lucide-react'; export default function VoiceSessionModal({ isOpen, onClose, apiKey }) { const [status, setStatus] = useState('connecting'); // 'connecting', 'listening', 'speaking', 'error' const [errorMessage, setErrorMessage] = useState(''); const [isMuted, setIsMuted] = useState(false); const wsRef = useRef(null); const audioContextRef = useRef(null); const playbackContextRef = useRef(null); const streamRef = useRef(null); const processorRef = useRef(null); const nextPlayTimeRef = useRef(0); const textTranscriptRef = useRef(''); const [transcript, setTranscript] = useState(''); // Helper: Convert ArrayBuffer to Base64 const base64ArrayBuffer = (arrayBuffer) => { let binary = ''; const bytes = new Uint8Array(arrayBuffer); const len = bytes.byteLength; for (let i = 0; i < len; i++) { binary += String.fromCharCode(bytes[i]); } return window.btoa(binary); }; useEffect(() => { if (!isOpen) return; setStatus('connecting'); setTranscript(''); textTranscriptRef.current = ''; // Determine WebSocket URL const protocol = window.location.protocol === 'https:' ? 'wss://' : 'ws://'; const host = window.location.host === 'localhost:5173' ? 'localhost:8000' : window.location.host; const wsUrl = `${protocol}${host}/api/live-ws?api_key=${apiKey || ''}`; // Establish WebSocket Connection const ws = new WebSocket(wsUrl); wsRef.current = ws; ws.onopen = async () => { try { // Request Microphone access const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); streamRef.current = stream; // Initialize Audio contexts audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 }); playbackContextRef.current = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 24000 }); nextPlayTimeRef.current = playbackContextRef.current.currentTime; // Capture Mic Input const source = audioContextRef.current.createMediaStreamSource(stream); const processor = audioContextRef.current.createScriptProcessor(2048, 1, 1); processorRef.current = processor; processor.onaudioprocess = (e) => { if (isMuted) return; const inputData = e.inputBuffer.getChannelData(0); // Convert Float32 to Int16 PCM const pcmData = new Int16Array(inputData.length); for (let i = 0; i < inputData.length; i++) { pcmData[i] = Math.max(-1, Math.min(1, inputData[i])) * 0x7FFF; } // Send chunk to server if (ws.readyState === WebSocket.OPEN) { const base64Audio = base64ArrayBuffer(pcmData.buffer); ws.send(JSON.stringify({ type: 'audio', data: base64Audio })); } }; source.connect(processor); processor.connect(audioContextRef.current.destination); setStatus('listening'); } catch (err) { console.error("Microphone access failed:", err); setStatus('error'); setErrorMessage("Microphone access is required for voice session. Please allow mic permissions."); } }; ws.onmessage = async (event) => { const message = JSON.parse(event.data); if (message.type === 'audio') { setStatus('speaking'); // Decode base64 24kHz PCM back to Float32 for Web Audio playback const binary = window.atob(message.data); const bytes = new Uint8Array(binary.length); for (let i = 0; i < binary.length; i++) { bytes[i] = binary.charCodeAt(i); } const int16Data = new Int16Array(bytes.buffer); const float32Data = new Float32Array(int16Data.length); for (let i = 0; i < int16Data.length; i++) { float32Data[i] = int16Data[i] / 0x7FFF; } const pContext = playbackContextRef.current; if (pContext && pContext.state !== 'suspended') { const audioBuffer = pContext.createBuffer(1, float32Data.length, 24000); audioBuffer.getChannelData(0).set(float32Data); const bufferSource = pContext.createBufferSource(); bufferSource.buffer = audioBuffer; bufferSource.connect(pContext.destination); // Gapless scheduling const startTime = Math.max(pContext.currentTime, nextPlayTimeRef.current); bufferSource.start(startTime); nextPlayTimeRef.current = startTime + audioBuffer.duration; } } else if (message.type === 'text') { // Handle incoming Socratic tutor speech transcription textTranscriptRef.current += message.data; setTranscript(textTranscriptRef.current); } else if (message.type === 'turn_complete') { setStatus('listening'); textTranscriptRef.current = ''; } }; ws.onerror = (err) => { console.error("WebSocket error:", err); setStatus('error'); setErrorMessage("Lost connection to Gemini Live server."); }; ws.onclose = () => { setStatus('connecting'); }; return () => { // Clean up connections and audio context on unmount if (wsRef.current) wsRef.current.close(); if (processorRef.current) processorRef.current.disconnect(); if (streamRef.current) { streamRef.current.getTracks().forEach(track => track.stop()); } if (audioContextRef.current) audioContextRef.current.close(); if (playbackContextRef.current) playbackContextRef.current.close(); }; }, [isOpen, isMuted]); if (!isOpen) return null; return (

Socratic Voice Space

Real-Time Bidirectional Dialogue

{/* Pulse Animations and Mic States */}
{status === 'connecting' && (
Connecting to Gemini Live...
)} {status === 'listening' && (
Listening to you... Go ahead and speak!
)} {status === 'speaking' && (
Socratic Tutor is speaking...
)} {status === 'error' && (

{errorMessage}

)}
{/* Transcription Display */} {status === 'speaking' && transcript && (

"{transcript}"

)} {/* Control Buttons */}
); }