Spaces:
Sleeping
Sleeping
| /** | |
| * Real-time VAD-ASR Pipeline - Frontend Application | |
| * Handles microphone capture, WebSocket communication, and UI updates | |
| */ | |
| class AudioRecorder { | |
| constructor() { | |
| // Audio settings | |
| this.sampleRate = 16000; | |
| this.chunkSize = 512; // Samples per chunk | |
| this.bufferSize = 1024; | |
| // State | |
| this.isRecording = false; | |
| this.audioContext = null; | |
| this.mediaStream = null; | |
| this.processor = null; | |
| this.analyser = null; | |
| this.animationId = null; | |
| this.websocket = null; | |
| // UI elements | |
| this.micButton = document.getElementById('micButton'); | |
| this.micIcon = document.querySelector('.mic-icon'); | |
| this.stopIcon = document.querySelector('.stop-icon'); | |
| this.statusIndicator = document.getElementById('statusIndicator'); | |
| this.statusMessage = document.getElementById('statusMessage'); | |
| this.probabilityFill = document.getElementById('probabilityFill'); | |
| this.connectionStatus = document.getElementById('connectionStatus'); | |
| this.transcriptionContent = document.getElementById('transcriptionContent'); | |
| this.transcriptionHistory = document.getElementById('transcriptionHistory'); | |
| this.confidencePanel = document.getElementById('confidencePanel'); | |
| this.confidenceTableBody = document.getElementById('confidenceTableBody'); | |
| this.globalConfidence = document.getElementById('globalConfidence'); | |
| this.waveformCanvas = document.getElementById('waveformCanvas'); | |
| this.waveformCtx = this.waveformCanvas.getContext('2d'); | |
| // Audio buffer for visualization | |
| this.audioDataBuffer = new Float32Array(128); | |
| // Bind events | |
| this.micButton.addEventListener('click', () => this.toggleRecording()); | |
| // Initialize canvas | |
| this.initCanvas(); | |
| window.addEventListener('resize', () => this.initCanvas()); | |
| } | |
| initCanvas() { | |
| const container = this.waveformCanvas.parentElement; | |
| this.waveformCanvas.width = container.clientWidth - 32; | |
| this.waveformCanvas.height = 80; | |
| this.drawIdleWaveform(); | |
| } | |
| drawIdleWaveform() { | |
| const { width, height } = this.waveformCanvas; | |
| this.waveformCtx.fillStyle = 'rgba(99, 102, 241, 0.1)'; | |
| this.waveformCtx.fillRect(0, 0, width, height); | |
| this.waveformCtx.strokeStyle = 'rgba(99, 102, 241, 0.3)'; | |
| this.waveformCtx.lineWidth = 2; | |
| this.waveformCtx.beginPath(); | |
| this.waveformCtx.moveTo(0, height / 2); | |
| this.waveformCtx.lineTo(width, height / 2); | |
| this.waveformCtx.stroke(); | |
| } | |
| drawWaveform(audioData) { | |
| const { width, height } = this.waveformCanvas; | |
| const ctx = this.waveformCtx; | |
| // Clear canvas | |
| ctx.fillStyle = 'rgba(10, 10, 26, 0.3)'; | |
| ctx.fillRect(0, 0, width, height); | |
| // Draw waveform | |
| const gradient = ctx.createLinearGradient(0, 0, width, 0); | |
| gradient.addColorStop(0, '#6366f1'); | |
| gradient.addColorStop(0.5, '#8b5cf6'); | |
| gradient.addColorStop(1, '#a855f7'); | |
| ctx.strokeStyle = gradient; | |
| ctx.lineWidth = 2; | |
| ctx.beginPath(); | |
| const sliceWidth = width / audioData.length; | |
| let x = 0; | |
| for (let i = 0; i < audioData.length; i++) { | |
| const v = audioData[i] * 0.5 + 0.5; | |
| const y = v * height; | |
| if (i === 0) { | |
| ctx.moveTo(x, y); | |
| } else { | |
| ctx.lineTo(x, y); | |
| } | |
| x += sliceWidth; | |
| } | |
| ctx.stroke(); | |
| // Add glow effect | |
| ctx.shadowColor = '#6366f1'; | |
| ctx.shadowBlur = 10; | |
| ctx.stroke(); | |
| ctx.shadowBlur = 0; | |
| } | |
| async toggleRecording() { | |
| if (this.isRecording) { | |
| this.stopRecording(); | |
| } else { | |
| await this.startRecording(); | |
| } | |
| } | |
| async startRecording() { | |
| try { | |
| // Request microphone access | |
| this.mediaStream = await navigator.mediaDevices.getUserMedia({ | |
| audio: { | |
| channelCount: 1, | |
| sampleRate: this.sampleRate, | |
| echoCancellation: true, | |
| noiseSuppression: true | |
| } | |
| }); | |
| // Create audio context | |
| this.audioContext = new (window.AudioContext || window.webkitAudioContext)({ | |
| sampleRate: this.sampleRate | |
| }); | |
| // Connect WebSocket | |
| await this.connectWebSocket(); | |
| // Create audio processing pipeline | |
| const source = this.audioContext.createMediaStreamSource(this.mediaStream); | |
| // Create analyser for smooth visualization | |
| this.analyser = this.audioContext.createAnalyser(); | |
| this.analyser.fftSize = 512; // Controls resolution of data | |
| this.analyser.smoothingTimeConstant = 0.5; | |
| this.audioDataBuffer = new Float32Array(this.analyser.fftSize); | |
| // Use ScriptProcessorNode for audio processing | |
| this.processor = this.audioContext.createScriptProcessor(this.bufferSize, 1, 1); | |
| this.processor.onaudioprocess = (e) => { | |
| if (!this.isRecording) return; | |
| const inputData = e.inputBuffer.getChannelData(0); | |
| // Send audio chunks to server | |
| this.sendAudioChunk(inputData); | |
| }; | |
| source.connect(this.analyser); | |
| this.analyser.connect(this.processor); | |
| this.processor.connect(this.audioContext.destination); | |
| // Update UI | |
| this.isRecording = true; | |
| this.updateUI('recording'); | |
| // Start visualization loop | |
| this.visualize(); | |
| } catch (error) { | |
| console.error('Error starting recording:', error); | |
| this.updateStatus('listening', 'خطأ في الوصول للميكروفون'); | |
| } | |
| } | |
| stopRecording() { | |
| this.isRecording = false; | |
| if (this.animationId) { | |
| cancelAnimationFrame(this.animationId); | |
| this.animationId = null; | |
| } | |
| // Stop audio processing | |
| if (this.processor) { | |
| this.processor.disconnect(); | |
| this.processor = null; | |
| } | |
| if (this.analyser) { | |
| this.analyser.disconnect(); | |
| this.analyser = null; | |
| } | |
| if (this.audioContext) { | |
| this.audioContext.close(); | |
| this.audioContext = null; | |
| } | |
| if (this.mediaStream) { | |
| this.mediaStream.getTracks().forEach(track => track.stop()); | |
| this.mediaStream = null; | |
| } | |
| // Close WebSocket | |
| if (this.websocket) { | |
| this.websocket.close(); | |
| this.websocket = null; | |
| } | |
| // Update UI | |
| this.updateUI('stopped'); | |
| this.drawIdleWaveform(); | |
| } | |
| visualize() { | |
| if (!this.isRecording || !this.analyser) return; | |
| this.analyser.getFloatTimeDomainData(this.audioDataBuffer); | |
| this.drawWaveform(this.audioDataBuffer); | |
| this.animationId = requestAnimationFrame(() => this.visualize()); | |
| } | |
| async connectWebSocket() { | |
| return new Promise((resolve, reject) => { | |
| const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; | |
| const wsUrl = `${protocol}//${window.location.host}/ws/audio`; | |
| this.websocket = new WebSocket(wsUrl); | |
| this.websocket.onopen = () => { | |
| console.log('WebSocket connected'); | |
| this.connectionStatus.classList.add('connected'); | |
| this.connectionStatus.querySelector('.status-text').textContent = 'متصل'; | |
| resolve(); | |
| }; | |
| this.websocket.onclose = () => { | |
| console.log('WebSocket disconnected'); | |
| this.connectionStatus.classList.remove('connected'); | |
| this.connectionStatus.querySelector('.status-text').textContent = 'غير متصل'; | |
| }; | |
| this.websocket.onerror = (error) => { | |
| console.error('WebSocket error:', error); | |
| reject(error); | |
| }; | |
| this.websocket.onmessage = (event) => { | |
| const data = JSON.parse(event.data); | |
| this.handleServerMessage(data); | |
| }; | |
| }); | |
| } | |
| sendAudioChunk(audioData) { | |
| if (!this.websocket || this.websocket.readyState !== WebSocket.OPEN) { | |
| return; | |
| } | |
| // Convert Float32 to Int16 for transmission | |
| const int16Data = new Int16Array(audioData.length); | |
| for (let i = 0; i < audioData.length; i++) { | |
| const s = Math.max(-1, Math.min(1, audioData[i])); | |
| int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7FFF; | |
| } | |
| // Send as binary | |
| this.websocket.send(int16Data.buffer); | |
| } | |
| handleServerMessage(data) { | |
| const { status, probability, transcription, remaining, confidence, token_confidences, accumulating, accumulation_remaining } = data; | |
| // Update probability bar | |
| if (probability !== undefined) { | |
| this.probabilityFill.style.width = `${probability * 100}%`; | |
| } | |
| // Update status | |
| switch (status) { | |
| case 'speaking': | |
| if (accumulating && accumulation_remaining > 0) { | |
| this.updateStatus('speaking', `جاري التحدث... (${accumulation_remaining}s)`); | |
| } else { | |
| this.updateStatus('speaking', 'جاري التحدث...'); | |
| } | |
| break; | |
| case 'waiting': | |
| // Waiting for speech accumulation delay | |
| const waitText = accumulation_remaining ? ` (${accumulation_remaining}s)` : ''; | |
| this.updateStatus('speaking', `انتظر لإكمال الكلام${waitText}`); | |
| break; | |
| case 'listening': | |
| this.updateStatus('listening', 'في انتظار الكلام...'); | |
| break; | |
| case 'transcription': | |
| this.updateStatus('listening', 'تم التعرف على الكلام'); | |
| this.showTranscription(transcription); | |
| if (token_confidences) { | |
| this.updateConfidenceTable(token_confidences, confidence); | |
| } | |
| break; | |
| } | |
| } | |
| updateStatus(state, message) { | |
| // Update status indicator class | |
| this.statusIndicator.className = 'status-indicator'; | |
| if (state === 'speaking' || state === 'silence') { | |
| this.statusIndicator.classList.add(state); | |
| } | |
| // Update message | |
| this.statusMessage.textContent = message; | |
| } | |
| showTranscription(text) { | |
| if (!text || text.trim() === '') return; | |
| // Move current transcription to history | |
| const currentText = this.transcriptionContent.querySelector('p:not(.placeholder-text)'); | |
| if (currentText && currentText.textContent.trim()) { | |
| const historyItem = document.createElement('div'); | |
| historyItem.className = 'history-item new'; | |
| historyItem.textContent = currentText.textContent; | |
| this.transcriptionHistory.insertBefore(historyItem, this.transcriptionHistory.firstChild); | |
| // Limit history to 10 items | |
| while (this.transcriptionHistory.children.length > 10) { | |
| this.transcriptionHistory.removeChild(this.transcriptionHistory.lastChild); | |
| } | |
| } | |
| // Show new transcription | |
| this.transcriptionContent.innerHTML = `<p class="new">${text}</p>`; | |
| } | |
| updateConfidenceTable(tokens, globalConf) { | |
| this.confidencePanel.classList.remove('hidden'); | |
| // Update global confidence | |
| const percentage = Math.round(globalConf * 100); | |
| this.globalConfidence.textContent = `${percentage}%`; | |
| this.globalConfidence.className = 'confidence-value'; | |
| if (percentage < 50) this.globalConfidence.classList.add('low'); | |
| else if (percentage < 80) this.globalConfidence.classList.add('medium'); | |
| // Update table | |
| this.confidenceTableBody.innerHTML = ''; | |
| tokens.forEach(tk => { | |
| const row = document.createElement('tr'); | |
| const prob = Math.round(tk.probability * 100); | |
| let probClass = 'confidence-value'; | |
| if (prob < 50) probClass += ' low'; | |
| else if (prob < 80) probClass += ' medium'; | |
| row.innerHTML = ` | |
| <td>${tk.token}</td> | |
| <td class="${probClass}">${prob}%</td> | |
| `; | |
| this.confidenceTableBody.appendChild(row); | |
| }); | |
| } | |
| updateUI(state) { | |
| if (state === 'recording') { | |
| this.micButton.classList.add('recording'); | |
| this.micIcon.classList.add('hidden'); | |
| this.stopIcon.classList.remove('hidden'); | |
| this.statusMessage.textContent = 'في انتظار الكلام...'; | |
| } else { | |
| this.micButton.classList.remove('recording'); | |
| this.micIcon.classList.remove('hidden'); | |
| this.stopIcon.classList.add('hidden'); | |
| this.statusMessage.textContent = 'اضغط للبدء'; | |
| this.statusIndicator.className = 'status-indicator'; | |
| this.probabilityFill.style.width = '0%'; | |
| } | |
| } | |
| } | |
| // Initialize on page load | |
| document.addEventListener('DOMContentLoaded', () => { | |
| new AudioRecorder(); | |
| }); | |