| <!DOCTYPE html> |
| <html lang="en"> |
|
|
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>Gemini Voice Chat</title> |
| <style> |
| :root { |
| --color-accent: #6366f1; |
| --color-background: #0f172a; |
| --color-surface: #1e293b; |
| --color-text: #e2e8f0; |
| --boxSize: 8px; |
| --gutter: 4px; |
| } |
| body { |
| margin: 0; |
| padding: 0; |
| background-color: var(--color-background); |
| color: var(--color-text); |
| font-family: system-ui, -apple-system, sans-serif; |
| min-height: 100vh; |
| display: flex; |
| flex-direction: column; |
| align-items: center; |
| justify-content: center; |
| } |
| .container { |
| width: 90%; |
| max-width: 800px; |
| background-color: var(--color-surface); |
| padding: 2rem; |
| border-radius: 1rem; |
| box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25); |
| } |
| .wave-container { |
| position: relative; |
| display: flex; |
| min-height: 100px; |
| max-height: 128px; |
| justify-content: center; |
| align-items: center; |
| margin: 2rem 0; |
| } |
| .box-container { |
| display: flex; |
| justify-content: space-between; |
| height: 64px; |
| width: 100%; |
| } |
| .box { |
| height: 100%; |
| width: var(--boxSize); |
| background: var(--color-accent); |
| border-radius: 8px; |
| transition: transform 0.05s ease; |
| } |
| .controls { |
| display: grid; |
| gap: 1rem; |
| margin-bottom: 2rem; |
| } |
| .input-group { |
| display: flex; |
| flex-direction: column; |
| gap: 0.5rem; |
| } |
| label { |
| font-size: 0.875rem; |
| font-weight: 500; |
| } |
| select { |
| padding: 0.75rem; |
| border-radius: 0.5rem; |
| border: 1px solid rgba(255, 255, 255, 0.1); |
| background-color: var(--color-background); |
| color: var(--color-text); |
| font-size: 1rem; |
| } |
| button { |
| padding: 1rem 2rem; |
| border-radius: 0.5rem; |
| border: none; |
| background-color: var(--color-accent); |
| color: white; |
| font-weight: 600; |
| cursor: pointer; |
| transition: all 0.2s ease; |
| display: flex; |
| align-items: center; |
| justify-content: center; |
| gap: 12px; |
| min-width: 180px; |
| } |
| button:hover { |
| opacity: 0.9; |
| transform: translateY(-1px); |
| } |
| .icon-with-spinner { |
| display: flex; |
| align-items: center; |
| justify-content: center; |
| gap: 12px; |
| min-width: 180px; |
| } |
| .spinner { |
| width: 20px; |
| height: 20px; |
| border: 2px solid white; |
| border-top-color: transparent; |
| border-radius: 50%; |
| animation: spin 1s linear infinite; |
| flex-shrink: 0; |
| } |
| @keyframes spin { |
| to { |
| transform: rotate(360deg); |
| } |
| } |
| .pulse-container { |
| display: flex; |
| align-items: center; |
| justify-content: center; |
| gap: 12px; |
| } |
| .pulse-circle { |
| width: 20px; |
| height: 20px; |
| border-radius: 50%; |
| background-color: white; |
| opacity: 0.2; |
| flex-shrink: 0; |
| transform: translateX(-0%) scale(var(--audio-level, 1)); |
| transition: transform 0.1s ease; |
| } |
| .toast { |
| position: fixed; |
| top: 20px; |
| left: 50%; |
| transform: translateX(-50%); |
| padding: 16px 24px; |
| border-radius: 4px; |
| font-size: 14px; |
| z-index: 1000; |
| display: none; |
| box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2); |
| } |
| .toast.error { |
| background-color: #f44336; |
| color: white; |
| } |
| .toast.warning { |
| background-color: #ffd700; |
| color: black; |
| } |
| .mute-toggle { |
| width: 24px; |
| height: 24px; |
| cursor: pointer; |
| flex-shrink: 0; |
| } |
| .mute-toggle svg { |
| display: block; |
| } |
| #start-button { |
| margin-left: auto; |
| margin-right: auto; |
| } |
| .api-status { |
| background-color: var(--color-background); |
| padding: 1rem; |
| border-radius: 0.5rem; |
| margin-bottom: 1rem; |
| text-align: center; |
| border: 1px solid rgba(34, 197, 94, 0.3); |
| } |
| .api-status.success { |
| color: #22c55e; |
| } |
| .api-status.error { |
| color: #ef4444; |
| border-color: rgba(239, 68, 68, 0.3); |
| } |
| </style> |
| </head> |
|
|
| <body> |
| <div id="error-toast" class="toast"></div> |
| <div style="text-align: center"> |
| <h1>Gemini Voice Chat</h1> |
| <p>Speak with Gemini using real-time audio streaming</p> |
| <p> |
| Using API key from environment variable |
| </p> |
| </div> |
| <div class="container"> |
| <div class="api-status success"> |
| ✓ API Key configured via environment variable |
| </div> |
| |
| <div class="controls"> |
| <div class="input-group"> |
| <label for="voice">Voice</label> |
| <select id="voice"> |
| <option value="Puck">Puck</option> |
| <option value="Charon">Charon</option> |
| <option value="Kore">Kore</option> |
| <option value="Fenrir">Fenrir</option> |
| <option value="Aoede">Aoede</option> |
| </select> |
| </div> |
| </div> |
|
|
| <div class="wave-container"> |
| <div class="box-container"> |
| |
| </div> |
| </div> |
|
|
| <button id="start-button">Start Recording</button> |
| </div> |
|
|
| <audio id="audio-output"></audio> |
|
|
| <script> |
| let peerConnection; |
| let audioContext; |
| let dataChannel; |
| let isRecording = false; |
| let webrtc_id; |
| let isMuted = false; |
| let analyser_input, dataArray_input; |
| let analyser, dataArray; |
| let source_input = null; |
| let source_output = null; |
| const startButton = document.getElementById('start-button'); |
| const voiceSelect = document.getElementById('voice'); |
| const audioOutput = document.getElementById('audio-output'); |
| const boxContainer = document.querySelector('.box-container'); |
| const numBars = 32; |
| for (let i = 0; i < numBars; i++) { |
| const box = document.createElement('div'); |
| box.className = 'box'; |
| boxContainer.appendChild(box); |
| } |
| |
| |
| const micIconSVG = ` |
| <svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"> |
| <path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path> |
| <path d="M19 10v2a7 7 0 0 1-14 0v-2"></path> |
| <line x1="12" y1="19" x2="12" y2="23"></line> |
| <line x1="8" y1="23" x2="16" y2="23"></line> |
| </svg>`; |
| const micMutedIconSVG = ` |
| <svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"> |
| <path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path> |
| <path d="M19 10v2a7 7 0 0 1-14 0v-2"></path> |
| <line x1="12" y1="19" x2="12" y2="23"></line> |
| <line x1="8" y1="23" x2="16" y2="23"></line> |
| <line x1="1" y1="1" x2="23" y2="23"></line> |
| </svg>`; |
| |
| function updateButtonState() { |
| startButton.innerHTML = ''; |
| startButton.onclick = null; |
| if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) { |
| startButton.innerHTML = ` |
| <div class="icon-with-spinner"> |
| <div class="spinner"></div> |
| <span>Connecting...</span> |
| </div> |
| `; |
| startButton.disabled = true; |
| } else if (peerConnection && peerConnection.connectionState === 'connected') { |
| const pulseContainer = document.createElement('div'); |
| pulseContainer.className = 'pulse-container'; |
| pulseContainer.innerHTML = ` |
| <div class="pulse-circle"></div> |
| <span>Stop Recording</span> |
| `; |
| const muteToggle = document.createElement('div'); |
| muteToggle.className = 'mute-toggle'; |
| muteToggle.title = isMuted ? 'Unmute' : 'Mute'; |
| muteToggle.innerHTML = isMuted ? micMutedIconSVG : micIconSVG; |
| muteToggle.addEventListener('click', toggleMute); |
| startButton.appendChild(pulseContainer); |
| startButton.appendChild(muteToggle); |
| startButton.disabled = false; |
| } else { |
| startButton.innerHTML = 'Start Recording'; |
| startButton.disabled = false; |
| } |
| } |
| |
| function showError(message) { |
| const toast = document.getElementById('error-toast'); |
| toast.textContent = message; |
| toast.className = 'toast error'; |
| toast.style.display = 'block'; |
| setTimeout(() => { |
| toast.style.display = 'none'; |
| }, 5000); |
| } |
| |
| function toggleMute(event) { |
| event.stopPropagation(); |
| if (!peerConnection || peerConnection.connectionState !== 'connected') return; |
| isMuted = !isMuted; |
| console.log("Mute toggled:", isMuted); |
| peerConnection.getSenders().forEach(sender => { |
| if (sender.track && sender.track.kind === 'audio') { |
| sender.track.enabled = !isMuted; |
| console.log(`Audio track ${sender.track.id} enabled: ${!isMuted}`); |
| } |
| }); |
| updateButtonState(); |
| } |
| |
| async function setupWebRTC() { |
| const config = __RTC_CONFIGURATION__; |
| peerConnection = new RTCPeerConnection(config); |
| webrtc_id = Math.random().toString(36).substring(7); |
| const timeoutId = setTimeout(() => { |
| const toast = document.getElementById('error-toast'); |
| toast.textContent = "Connection is taking longer than usual. Are you on a VPN?"; |
| toast.className = 'toast warning'; |
| toast.style.display = 'block'; |
| setTimeout(() => { |
| toast.style.display = 'none'; |
| }, 5000); |
| }, 5000); |
| try { |
| const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); |
| stream.getTracks().forEach(track => peerConnection.addTrack(track, stream)); |
| if (!audioContext || audioContext.state === 'closed') { |
| audioContext = new AudioContext(); |
| } |
| if (source_input) { |
| try { source_input.disconnect(); } catch (e) { console.warn("Error disconnecting previous input source:", e); } |
| source_input = null; |
| } |
| source_input = audioContext.createMediaStreamSource(stream); |
| analyser_input = audioContext.createAnalyser(); |
| source_input.connect(analyser_input); |
| analyser_input.fftSize = 64; |
| dataArray_input = new Uint8Array(analyser_input.frequencyBinCount); |
| updateAudioLevel(); |
| peerConnection.addEventListener('connectionstatechange', () => { |
| console.log('connectionstatechange', peerConnection.connectionState); |
| if (peerConnection.connectionState === 'connected') { |
| clearTimeout(timeoutId); |
| const toast = document.getElementById('error-toast'); |
| toast.style.display = 'none'; |
| if (analyser_input) updateAudioLevel(); |
| if (analyser) updateVisualization(); |
| } |
| updateButtonState(); |
| }); |
| peerConnection.onicecandidate = ({ candidate }) => { |
| if (candidate) { |
| console.debug("Sending ICE candidate", candidate); |
| fetch('/webrtc/offer', { |
| method: 'POST', |
| headers: { 'Content-Type': 'application/json' }, |
| body: JSON.stringify({ |
| candidate: candidate.toJSON(), |
| webrtc_id: webrtc_id, |
| type: "ice-candidate", |
| }) |
| }) |
| } |
| }; |
| peerConnection.addEventListener('track', (evt) => { |
| if (evt.track.kind === 'audio' && audioOutput) { |
| if (audioOutput.srcObject !== evt.streams[0]) { |
| audioOutput.srcObject = evt.streams[0]; |
| audioOutput.play().catch(e => console.error("Audio play failed:", e)); |
| if (!audioContext || audioContext.state === 'closed') { |
| console.warn("AudioContext not ready for output track analysis."); |
| return; |
| } |
| if (source_output) { |
| try { source_output.disconnect(); } catch (e) { console.warn("Error disconnecting previous output source:", e); } |
| source_output = null; |
| } |
| source_output = audioContext.createMediaStreamSource(evt.streams[0]); |
| analyser = audioContext.createAnalyser(); |
| source_output.connect(analyser); |
| analyser.fftSize = 2048; |
| dataArray = new Uint8Array(analyser.frequencyBinCount); |
| updateVisualization(); |
| } |
| } |
| }); |
| dataChannel = peerConnection.createDataChannel('text'); |
| dataChannel.onmessage = (event) => { |
| const eventJson = JSON.parse(event.data); |
| if (eventJson.type === "error") { |
| showError(eventJson.message); |
| } else if (eventJson.type === "send_input") { |
| fetch('/input_hook', { |
| method: 'POST', |
| headers: { |
| 'Content-Type': 'application/json', |
| }, |
| body: JSON.stringify({ |
| webrtc_id: webrtc_id, |
| voice_name: voiceSelect.value |
| |
| }) |
| }); |
| } |
| }; |
| const offer = await peerConnection.createOffer(); |
| await peerConnection.setLocalDescription(offer); |
| const response = await fetch('/webrtc/offer', { |
| method: 'POST', |
| headers: { 'Content-Type': 'application/json' }, |
| body: JSON.stringify({ |
| sdp: peerConnection.localDescription.sdp, |
| type: peerConnection.localDescription.type, |
| webrtc_id: webrtc_id, |
| }) |
| }); |
| const serverResponse = await response.json(); |
| if (serverResponse.status === 'failed') { |
| showError(serverResponse.meta.error === 'concurrency_limit_reached' |
| ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}` |
| : serverResponse.meta.error); |
| stopWebRTC(); |
| startButton.textContent = 'Start Recording'; |
| return; |
| } |
| await peerConnection.setRemoteDescription(serverResponse); |
| } catch (err) { |
| clearTimeout(timeoutId); |
| console.error('Error setting up WebRTC:', err); |
| showError('Failed to establish connection. Please try again.'); |
| stopWebRTC(); |
| startButton.textContent = 'Start Recording'; |
| } |
| } |
| |
| function updateVisualization() { |
| if (!analyser || !peerConnection || !['connected', 'connecting'].includes(peerConnection.connectionState)) { |
| const bars = document.querySelectorAll('.box'); |
| bars.forEach(bar => bar.style.transform = 'scaleY(0.1)'); |
| return; |
| } |
| analyser.getByteFrequencyData(dataArray); |
| const bars = document.querySelectorAll('.box'); |
| for (let i = 0; i < bars.length; i++) { |
| const barHeight = (dataArray[i] / 255) * 2; |
| bars[i].style.transform = `scaleY(${Math.max(0.1, barHeight)})`; |
| } |
| requestAnimationFrame(updateVisualization); |
| } |
| |
| function updateAudioLevel() { |
| if (!analyser_input || !peerConnection || !['connected', 'connecting'].includes(peerConnection.connectionState)) { |
| const pulseCircle = document.querySelector('.pulse-circle'); |
| if (pulseCircle) { |
| pulseCircle.style.setProperty('--audio-level', 1); |
| } |
| return; |
| } |
| analyser_input.getByteFrequencyData(dataArray_input); |
| const average = Array.from(dataArray_input).reduce((a, b) => a + b, 0) / dataArray_input.length; |
| const audioLevel = average / 255; |
| const pulseCircle = document.querySelector('.pulse-circle'); |
| if (pulseCircle) { |
| pulseCircle.style.setProperty('--audio-level', 1 + audioLevel); |
| } |
| requestAnimationFrame(updateAudioLevel); |
| } |
| |
| function stopWebRTC() { |
| console.log("Running stopWebRTC"); |
| if (peerConnection) { |
| peerConnection.getSenders().forEach(sender => { |
| if (sender.track) { |
| sender.track.stop(); |
| } |
| }); |
| peerConnection.ontrack = null; |
| peerConnection.onicegatheringstatechange = null; |
| peerConnection.onconnectionstatechange = null; |
| if (dataChannel) { |
| dataChannel.onmessage = null; |
| try { dataChannel.close(); } catch (e) { console.warn("Error closing data channel:", e); } |
| dataChannel = null; |
| } |
| try { peerConnection.close(); } catch (e) { console.warn("Error closing peer connection:", e); } |
| peerConnection = null; |
| } |
| if (audioOutput) { |
| audioOutput.pause(); |
| audioOutput.srcObject = null; |
| } |
| if (source_input) { |
| try { source_input.disconnect(); } catch (e) { console.warn("Error disconnecting input source:", e); } |
| source_input = null; |
| } |
| if (source_output) { |
| try { source_output.disconnect(); } catch (e) { console.warn("Error disconnecting output source:", e); } |
| source_output = null; |
| } |
| if (audioContext && audioContext.state !== 'closed') { |
| audioContext.close().then(() => { |
| console.log("AudioContext closed successfully."); |
| audioContext = null; |
| }).catch(e => { |
| console.error("Error closing AudioContext:", e); |
| audioContext = null; |
| }); |
| } else { |
| audioContext = null; |
| } |
| analyser_input = null; |
| dataArray_input = null; |
| analyser = null; |
| dataArray = null; |
| isMuted = false; |
| isRecording = false; |
| updateButtonState(); |
| const bars = document.querySelectorAll('.box'); |
| bars.forEach(bar => bar.style.transform = 'scaleY(0.1)'); |
| const pulseCircle = document.querySelector('.pulse-circle'); |
| if (pulseCircle) { |
| pulseCircle.style.setProperty('--audio-level', 1); |
| } |
| } |
| |
| startButton.addEventListener('click', (event) => { |
| if (event.target.closest('.mute-toggle')) { |
| return; |
| } |
| if (peerConnection && peerConnection.connectionState === 'connected') { |
| console.log("Stop button clicked"); |
| stopWebRTC(); |
| } else if (!peerConnection || ['new', 'closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) { |
| console.log("Start button clicked"); |
| setupWebRTC(); |
| isRecording = true; |
| updateButtonState(); |
| } |
| }); |
| |
| updateButtonState(); |
| </script> |
| </body> |
|
|
| </html> |