| import React, { useEffect, useState, useRef } from 'react'; |
| import { Mic, X, MicOff, AlertCircle } from 'lucide-react'; |
|
|
| export default function VoiceSessionModal({ isOpen, onClose, apiKey }) { |
| const [status, setStatus] = useState('connecting'); |
| const [errorMessage, setErrorMessage] = useState(''); |
| const [isMuted, setIsMuted] = useState(false); |
| |
| const wsRef = useRef(null); |
| const audioContextRef = useRef(null); |
| const playbackContextRef = useRef(null); |
| const streamRef = useRef(null); |
| const processorRef = useRef(null); |
| const nextPlayTimeRef = useRef(0); |
| const textTranscriptRef = useRef(''); |
| const [transcript, setTranscript] = useState(''); |
|
|
| |
| const base64ArrayBuffer = (arrayBuffer) => { |
| let binary = ''; |
| const bytes = new Uint8Array(arrayBuffer); |
| const len = bytes.byteLength; |
| for (let i = 0; i < len; i++) { |
| binary += String.fromCharCode(bytes[i]); |
| } |
| return window.btoa(binary); |
| }; |
|
|
| useEffect(() => { |
| if (!isOpen) return; |
|
|
| setStatus('connecting'); |
| setTranscript(''); |
| textTranscriptRef.current = ''; |
|
|
| |
| const protocol = window.location.protocol === 'https:' ? 'wss://' : 'ws://'; |
| const host = window.location.host === 'localhost:5173' ? 'localhost:8000' : window.location.host; |
| const wsUrl = `${protocol}${host}/api/live-ws?api_key=${apiKey || ''}`; |
|
|
| |
| const ws = new WebSocket(wsUrl); |
| wsRef.current = ws; |
|
|
| ws.onopen = async () => { |
| try { |
| |
| const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); |
| streamRef.current = stream; |
|
|
| |
| audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 }); |
| playbackContextRef.current = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 24000 }); |
| nextPlayTimeRef.current = playbackContextRef.current.currentTime; |
|
|
| |
| const source = audioContextRef.current.createMediaStreamSource(stream); |
| const processor = audioContextRef.current.createScriptProcessor(2048, 1, 1); |
| processorRef.current = processor; |
|
|
| processor.onaudioprocess = (e) => { |
| if (isMuted) return; |
|
|
| const inputData = e.inputBuffer.getChannelData(0); |
| |
| const pcmData = new Int16Array(inputData.length); |
| for (let i = 0; i < inputData.length; i++) { |
| pcmData[i] = Math.max(-1, Math.min(1, inputData[i])) * 0x7FFF; |
| } |
|
|
| |
| if (ws.readyState === WebSocket.OPEN) { |
| const base64Audio = base64ArrayBuffer(pcmData.buffer); |
| ws.send(JSON.stringify({ type: 'audio', data: base64Audio })); |
| } |
| }; |
|
|
| source.connect(processor); |
| processor.connect(audioContextRef.current.destination); |
|
|
| setStatus('listening'); |
| } catch (err) { |
| console.error("Microphone access failed:", err); |
| setStatus('error'); |
| setErrorMessage("Microphone access is required for voice session. Please allow mic permissions."); |
| } |
| }; |
|
|
| ws.onmessage = async (event) => { |
| const message = JSON.parse(event.data); |
|
|
| if (message.type === 'audio') { |
| setStatus('speaking'); |
| |
| |
| const binary = window.atob(message.data); |
| const bytes = new Uint8Array(binary.length); |
| for (let i = 0; i < binary.length; i++) { |
| bytes[i] = binary.charCodeAt(i); |
| } |
|
|
| const int16Data = new Int16Array(bytes.buffer); |
| const float32Data = new Float32Array(int16Data.length); |
| for (let i = 0; i < int16Data.length; i++) { |
| float32Data[i] = int16Data[i] / 0x7FFF; |
| } |
|
|
| const pContext = playbackContextRef.current; |
| if (pContext && pContext.state !== 'suspended') { |
| const audioBuffer = pContext.createBuffer(1, float32Data.length, 24000); |
| audioBuffer.getChannelData(0).set(float32Data); |
|
|
| const bufferSource = pContext.createBufferSource(); |
| bufferSource.buffer = audioBuffer; |
| bufferSource.connect(pContext.destination); |
|
|
| |
| const startTime = Math.max(pContext.currentTime, nextPlayTimeRef.current); |
| bufferSource.start(startTime); |
| nextPlayTimeRef.current = startTime + audioBuffer.duration; |
| } |
| } else if (message.type === 'text') { |
| |
| textTranscriptRef.current += message.data; |
| setTranscript(textTranscriptRef.current); |
| } else if (message.type === 'turn_complete') { |
| setStatus('listening'); |
| textTranscriptRef.current = ''; |
| } |
| }; |
|
|
| ws.onerror = (err) => { |
| console.error("WebSocket error:", err); |
| setStatus('error'); |
| setErrorMessage("Lost connection to Gemini Live server."); |
| }; |
|
|
| ws.onclose = () => { |
| setStatus('connecting'); |
| }; |
|
|
| return () => { |
| |
| if (wsRef.current) wsRef.current.close(); |
| if (processorRef.current) processorRef.current.disconnect(); |
| if (streamRef.current) { |
| streamRef.current.getTracks().forEach(track => track.stop()); |
| } |
| if (audioContextRef.current) audioContextRef.current.close(); |
| if (playbackContextRef.current) playbackContextRef.current.close(); |
| }; |
| }, [isOpen, isMuted]); |
|
|
| if (!isOpen) return null; |
|
|
| return ( |
| <div className="voice-modal-backdrop"> |
| <div className="voice-modal-content"> |
| <button className="voice-modal-close" onClick={onClose}> |
| <X size={20} /> |
| </button> |
| |
| <div className="voice-modal-header"> |
| <h2>Socratic Voice Space</h2> |
| <p>Real-Time Bidirectional Dialogue</p> |
| </div> |
| |
| {/* Pulse Animations and Mic States */} |
| <div className="voice-visualizer-container"> |
| {status === 'connecting' && ( |
| <div className="voice-status-indicator connecting"> |
| <div className="pulse-circle" /> |
| <span>Connecting to Gemini Live...</span> |
| </div> |
| )} |
| |
| {status === 'listening' && ( |
| <div className="voice-status-indicator listening"> |
| <div className="pulse-circle active" /> |
| <div className="pulse-ring ring-1" /> |
| <div className="pulse-ring ring-2" /> |
| <span style={{ color: 'var(--color-happy)' }}>Listening to you... Go ahead and speak!</span> |
| </div> |
| )} |
| |
| {status === 'speaking' && ( |
| <div className="voice-status-indicator speaking"> |
| <div className="pulse-circle active speaking-pulse" /> |
| <div className="pulse-ring ring-1 speaking-ring" /> |
| <div className="pulse-ring ring-2 speaking-ring" /> |
| <span style={{ color: 'var(--secondary)' }}>Socratic Tutor is speaking...</span> |
| </div> |
| )} |
| |
| {status === 'error' && ( |
| <div className="voice-status-indicator error" style={{ gap: '0.8rem' }}> |
| <AlertCircle size={40} color="var(--color-frustrated)" /> |
| <p style={{ color: 'var(--color-frustrated)', fontSize: '0.9rem', textAlign: 'center', maxWidth: '300px' }}> |
| {errorMessage} |
| </p> |
| </div> |
| )} |
| </div> |
| |
| {/* Transcription Display */} |
| {status === 'speaking' && transcript && ( |
| <div className="voice-transcript-box"> |
| <p>"{transcript}"</p> |
| </div> |
| )} |
| |
| {/* Control Buttons */} |
| <div className="voice-modal-controls"> |
| <button |
| className={`voice-control-btn ${isMuted ? 'muted' : ''}`} |
| onClick={() => setIsMuted(!isMuted)} |
| disabled={status === 'error' || status === 'connecting'} |
| title={isMuted ? "Unmute microphone" : "Mute microphone"} |
| > |
| {isMuted ? <MicOff size={22} /> : <Mic size={22} />} |
| <span>{isMuted ? "Muted" : "Active"}</span> |
| </button> |
| </div> |
| </div> |
| </div> |
| ); |
| } |
|
|