| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>Real-Time Whisper Transcription</title> |
| <style> |
| body { |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; |
| max-width: 1200px; |
| margin: 0 auto; |
| padding: 20px; |
| background-color: #f5f5f5; |
| } |
| |
| .container { |
| background: white; |
| border-radius: 10px; |
| padding: 30px; |
| box-shadow: 0 2px 10px rgba(0,0,0,0.1); |
| } |
| |
| h1 { |
| color: #333; |
| text-align: center; |
| margin-bottom: 30px; |
| } |
| |
| .config-section { |
| display: grid; |
| grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); |
| gap: 15px; |
| margin-bottom: 30px; |
| padding: 20px; |
| background: #f8f9fa; |
| border-radius: 8px; |
| } |
| |
| .config-group { |
| display: flex; |
| flex-direction: column; |
| } |
| |
| label { |
| font-weight: 600; |
| margin-bottom: 5px; |
| color: #555; |
| } |
| |
| input, select { |
| padding: 10px; |
| border: 2px solid #ddd; |
| border-radius: 5px; |
| font-size: 14px; |
| transition: border-color 0.3s; |
| } |
| |
| input:focus, select:focus { |
| outline: none; |
| border-color: #007bff; |
| } |
| |
| .controls { |
| display: flex; |
| gap: 10px; |
| justify-content: center; |
| margin-bottom: 30px; |
| } |
| |
| button { |
| padding: 12px 24px; |
| border: none; |
| border-radius: 5px; |
| font-size: 16px; |
| font-weight: 600; |
| cursor: pointer; |
| transition: all 0.3s; |
| } |
| |
| .start-btn { |
| background: #28a745; |
| color: white; |
| } |
| |
| .start-btn:hover:not(:disabled) { |
| background: #218838; |
| } |
| |
| .stop-btn { |
| background: #dc3545; |
| color: white; |
| } |
| |
| .stop-btn:hover:not(:disabled) { |
| background: #c82333; |
| } |
| |
| .clear-btn { |
| background: #6c757d; |
| color: white; |
| } |
| |
| .clear-btn:hover:not(:disabled) { |
| background: #5a6268; |
| } |
| |
| button:disabled { |
| opacity: 0.6; |
| cursor: not-allowed; |
| } |
| |
| .status { |
| display: flex; |
| align-items: center; |
| justify-content: center; |
| gap: 10px; |
| margin-bottom: 20px; |
| font-weight: 600; |
| } |
| |
| .status-indicator { |
| width: 12px; |
| height: 12px; |
| border-radius: 50%; |
| background: #dc3545; |
| animation: pulse 2s infinite; |
| } |
| |
| .status-indicator.connected { |
| background: #28a745; |
| } |
| |
| .status-indicator.streaming { |
| background: #ffc107; |
| } |
| |
| @keyframes pulse { |
| 0% { opacity: 1; } |
| 50% { opacity: 0.5; } |
| 100% { opacity: 1; } |
| } |
| |
| .transcription-section { |
| display: grid; |
| grid-template-columns: 1fr 1fr; |
| gap: 20px; |
| } |
| |
| .transcription-panel { |
| background: #f8f9fa; |
| border-radius: 8px; |
| padding: 20px; |
| } |
| |
| .transcription-panel h3 { |
| margin-top: 0; |
| color: #333; |
| } |
| |
| .log-area, .transcription-area { |
| background: #fff; |
| border: 1px solid #ddd; |
| border-radius: 5px; |
| padding: 15px; |
| height: 300px; |
| overflow-y: auto; |
| font-family: 'Courier New', monospace; |
| font-size: 14px; |
| line-height: 1.4; |
| white-space: pre-wrap; |
| } |
| |
| .transcription-area { |
| font-family: inherit; |
| font-size: 16px; |
| line-height: 1.6; |
| } |
| |
| .stats { |
| display: grid; |
| grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); |
| gap: 15px; |
| margin-top: 20px; |
| } |
| |
| .stat-item { |
| text-align: center; |
| padding: 10px; |
| background: #e9ecef; |
| border-radius: 5px; |
| } |
| |
| .stat-value { |
| font-size: 18px; |
| font-weight: bold; |
| color: #007bff; |
| } |
| |
| .stat-label { |
| font-size: 12px; |
| color: #666; |
| margin-top: 5px; |
| } |
| |
| @media (max-width: 768px) { |
| .config-section { |
| grid-template-columns: 1fr; |
| } |
| |
| .transcription-section { |
| grid-template-columns: 1fr; |
| } |
| |
| .controls { |
| flex-direction: column; |
| align-items: center; |
| } |
| |
| button { |
| width: 200px; |
| } |
| } |
| </style> |
| </head> |
| <body> |
| <div class="container"> |
| <h1>🎤 Real-Time Whisper Transcription</h1> |
| |
| <div class="config-section"> |
| <div class="config-group"> |
| <label for="modelSize">Model Size:</label> |
| <select id="modelSize"> |
| <option value="base">Base (74 MB)</option> |
| <option value="small" selected>Small (244 MB)</option> |
| <option value="large-v2">Large-v2 (1550 MB)</option> |
| </select> |
| </div> |
| |
| <div class="config-group"> |
| <label for="chunkSize">Chunk Size (ms):</label> |
| <select id="chunkSize"> |
| <option value="40">40</option> |
| <option value="100" selected>100</option> |
| <option value="200">200</option> |
| <option value="300" selected>300</option> |
| <option value="1000">1000</option> |
| </select> |
| </div> |
| |
| <div class="config-group"> |
| <label for="beamSize">Beam Size:</label> |
| <input type="number" id="beamSize" value="0" min="0" max="10"> |
| </div> |
| |
| <div class="config-group"> |
| <label for="language">Language:</label> |
| <select id="language"> |
| <option value="en" selected>English</option> |
| <option value="fr">French</option> |
| <option value="es">Spanish</option> |
| <option value="de">German</option> |
| <option value="pt">Portuguese</option> |
| </select> |
| </div> |
| </div> |
| |
| <div class="controls"> |
| <button id="startBtn" class="start-btn" onclick="startStreaming()">🎤 Start Recording</button> |
| <button id="stopBtn" class="stop-btn" onclick="stopStreaming()" disabled>⏹️ Stop Recording</button> |
| <button id="clearBtn" class="clear-btn" onclick="clearAll()">🗑️ Clear All</button> |
| </div> |
| |
| <div class="status"> |
| <div class="status-indicator" id="statusIndicator"></div> |
| <span id="statusText">Disconnected</span> |
| </div> |
| |
| <div class="transcription-section"> |
| <div class="transcription-panel"> |
| <h3>📝 Transcription</h3> |
| <div id="transcriptionArea" class="transcription-area"></div> |
| </div> |
| |
| <div class="transcription-panel"> |
| <h3>📋 System Log</h3> |
| <div id="logArea" class="log-area"></div> |
| </div> |
| </div> |
| |
| <div class="stats"> |
| <div class="stat-item"> |
| <div class="stat-value" id="durationStat">0.0s</div> |
| <div class="stat-label">Duration</div> |
| </div> |
| <div class="stat-item"> |
| <div class="stat-value" id="chunksStat">0</div> |
| <div class="stat-label">Chunks Sent</div> |
| </div> |
| <div class="stat-item"> |
| <div class="stat-value" id="transcriptionsStat">0</div> |
| <div class="stat-label">Transcriptions</div> |
| </div> |
| <div class="stat-item"> |
| <div class="stat-value" id="errorsStat">0</div> |
| <div class="stat-label">Errors</div> |
| </div> |
| </div> |
| </div> |
|
|
| <script> |
| let socket; |
| let audioContext, processor, micStream; |
| let isStreaming = false; |
| let startTime = 0; |
| let stats = { |
| chunks: 0, |
| transcriptions: 0, |
| errors: 0 |
| }; |
| |
| |
| const startBtn = document.getElementById('startBtn'); |
| const stopBtn = document.getElementById('stopBtn'); |
| const statusIndicator = document.getElementById('statusIndicator'); |
| const statusText = document.getElementById('statusText'); |
| const logArea = document.getElementById('logArea'); |
| const transcriptionArea = document.getElementById('transcriptionArea'); |
| |
| function log(message, isError = false) { |
| const timestamp = new Date().toLocaleTimeString(); |
| const logMessage = `[${timestamp}] ${message}`; |
| logArea.textContent += logMessage + '\n'; |
| logArea.scrollTop = logArea.scrollHeight; |
| |
| if (isError) { |
| console.error(logMessage); |
| stats.errors++; |
| updateStats(); |
| } else { |
| console.log(logMessage); |
| } |
| } |
| |
| function updateStatus(status, color) { |
| statusText.textContent = status; |
| statusIndicator.className = `status-indicator ${color}`; |
| } |
| |
| function updateStats() { |
| document.getElementById('durationStat').textContent = |
| isStreaming ? ((Date.now() - startTime) / 1000).toFixed(1) + 's' : '0.0s'; |
| document.getElementById('chunksStat').textContent = stats.chunks; |
| document.getElementById('transcriptionsStat').textContent = stats.transcriptions; |
| document.getElementById('errorsStat').textContent = stats.errors; |
| } |
| |
| function addTranscription(text, timestamp) { |
| const transcriptionText = `[${timestamp.toFixed(1)}s] ${text}\n`; |
| transcriptionArea.textContent += transcriptionText; |
| transcriptionArea.scrollTop = transcriptionArea.scrollHeight; |
| stats.transcriptions++; |
| updateStats(); |
| } |
| |
| async function startStreaming() { |
| try { |
| |
| const config = { |
| model_size: document.getElementById('modelSize').value, |
| chunk_size: parseInt(document.getElementById('chunkSize').value), |
| beam_size: parseInt(document.getElementById('beamSize').value), |
| language: document.getElementById('language').value |
| }; |
| |
| log('Starting transcription session...'); |
| log(`Config: ${JSON.stringify(config, null, 2)}`); |
| |
| |
| const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; |
| const wsUrl = `${wsProtocol}//${window.location.host}/ws`; |
| |
| |
| socket = new WebSocket(wsUrl); |
| socket.binaryType = 'arraybuffer'; |
| |
| socket.onopen = async () => { |
| log('WebSocket connected'); |
| updateStatus('Connected', 'connected'); |
| |
| |
| log('Sending configuration...'); |
| socket.send(JSON.stringify(config)); |
| }; |
| |
| socket.onmessage = (event) => { |
| try { |
| const data = JSON.parse(event.data); |
| |
| if (data.error) { |
| log(`Server error: ${data.error}`, true); |
| return; |
| } |
| |
| if (data.status === 'CONFIG_RECEIVED') { |
| log(`Configuration received. GPU: ${data.gpu ? 'Yes' : 'No'}`); |
| if (data.fallback) { |
| log('Using fallback Whisper model'); |
| } |
| |
| setTimeout(() => { |
| startAudioCapture(); |
| }, 100); |
| } else if (data.text) { |
| addTranscription(data.text, data.timestamp || 0); |
| } |
| } catch (e) { |
| log(`Server message: ${event.data}`); |
| } |
| }; |
| |
| socket.onerror = (error) => { |
| log(`WebSocket error: ${error}`, true); |
| updateStatus('Error', 'error'); |
| }; |
| |
| socket.onclose = () => { |
| log('WebSocket disconnected'); |
| updateStatus('Disconnected', ''); |
| stopStreaming(); |
| }; |
| |
| } catch (error) { |
| log(`Failed to start: ${error.message}`, true); |
| updateStatus('Error', 'error'); |
| } |
| } |
| |
| async function startAudioCapture() { |
| try { |
| log('Requesting microphone access...'); |
| |
| const stream = await navigator.mediaDevices.getUserMedia({ |
| audio: { |
| sampleRate: 16000, |
| channelCount: 1, |
| echoCancellation: true, |
| noiseSuppression: true, |
| autoGainControl: true |
| } |
| }); |
| |
| log('Microphone access granted'); |
| |
| |
| audioContext = new (window.AudioContext || window.webkitAudioContext)({ |
| sampleRate: 16000 |
| }); |
| |
| |
| if (audioContext.state === 'suspended') { |
| await audioContext.resume(); |
| log('Audio context resumed'); |
| } |
| |
| |
| micStream = audioContext.createMediaStreamSource(stream); |
| |
| |
| const bufferSize = 4096; |
| processor = audioContext.createScriptProcessor(bufferSize, 1, 1); |
| |
| processor.onaudioprocess = (event) => { |
| if (!isStreaming || !socket || socket.readyState !== WebSocket.OPEN) { |
| return; |
| } |
| |
| const inputData = event.inputBuffer.getChannelData(0); |
| |
| |
| const int16Array = new Int16Array(inputData.length); |
| for (let i = 0; i < inputData.length; i++) { |
| const sample = Math.max(-1, Math.min(1, inputData[i])); |
| int16Array[i] = sample * 32767; |
| } |
| |
| |
| socket.send(int16Array.buffer); |
| stats.chunks++; |
| }; |
| |
| |
| micStream.connect(processor); |
| processor.connect(audioContext.destination); |
| |
| |
| isStreaming = true; |
| startTime = Date.now(); |
| startBtn.disabled = true; |
| stopBtn.disabled = false; |
| updateStatus('Streaming', 'streaming'); |
| |
| log('Audio streaming started'); |
| |
| |
| const statsTimer = setInterval(() => { |
| if (isStreaming) { |
| updateStats(); |
| } else { |
| clearInterval(statsTimer); |
| } |
| }, 100); |
| |
| } catch (error) { |
| log(`Audio capture failed: ${error.message}`, true); |
| updateStatus('Error', 'error'); |
| startBtn.disabled = false; |
| stopBtn.disabled = true; |
| } |
| } |
| |
| function stopStreaming() { |
| isStreaming = false; |
| |
| |
| if (micStream && micStream.mediaStream) { |
| micStream.mediaStream.getTracks().forEach(track => track.stop()); |
| } |
| |
| |
| if (processor) { |
| processor.disconnect(); |
| processor = null; |
| } |
| |
| if (micStream) { |
| micStream.disconnect(); |
| micStream = null; |
| } |
| |
| if (audioContext) { |
| audioContext.close(); |
| audioContext = null; |
| } |
| |
| |
| if (socket && socket.readyState === WebSocket.OPEN) { |
| socket.close(); |
| } |
| |
| |
| startBtn.disabled = false; |
| stopBtn.disabled = true; |
| updateStatus('Disconnected', ''); |
| |
| log('Streaming stopped'); |
| updateStats(); |
| } |
| |
| function clearAll() { |
| logArea.textContent = ''; |
| transcriptionArea.textContent = ''; |
| stats = { chunks: 0, transcriptions: 0, errors: 0 }; |
| updateStats(); |
| log('All content cleared'); |
| } |
| |
| |
| updateStats(); |
| log('Real-time transcription client ready'); |
| |
| |
| window.addEventListener('beforeunload', () => { |
| if (isStreaming) { |
| stopStreaming(); |
| } |
| }); |
| </script> |
| </body> |
| </html> |