sherif31's picture
update
55131fa
/**
* Real-time VAD-ASR Pipeline - Frontend Application
* Handles microphone capture, WebSocket communication, and UI updates
*/
class AudioRecorder {
constructor() {
// Audio settings
this.sampleRate = 16000;
this.chunkSize = 512; // Samples per chunk
this.bufferSize = 1024;
// State
this.isRecording = false;
this.audioContext = null;
this.mediaStream = null;
this.processor = null;
this.analyser = null;
this.animationId = null;
this.websocket = null;
// UI elements
this.micButton = document.getElementById('micButton');
this.micIcon = document.querySelector('.mic-icon');
this.stopIcon = document.querySelector('.stop-icon');
this.statusIndicator = document.getElementById('statusIndicator');
this.statusMessage = document.getElementById('statusMessage');
this.probabilityFill = document.getElementById('probabilityFill');
this.connectionStatus = document.getElementById('connectionStatus');
this.transcriptionContent = document.getElementById('transcriptionContent');
this.transcriptionHistory = document.getElementById('transcriptionHistory');
this.confidencePanel = document.getElementById('confidencePanel');
this.confidenceTableBody = document.getElementById('confidenceTableBody');
this.globalConfidence = document.getElementById('globalConfidence');
this.waveformCanvas = document.getElementById('waveformCanvas');
this.waveformCtx = this.waveformCanvas.getContext('2d');
// Audio buffer for visualization
this.audioDataBuffer = new Float32Array(128);
// Bind events
this.micButton.addEventListener('click', () => this.toggleRecording());
// Initialize canvas
this.initCanvas();
window.addEventListener('resize', () => this.initCanvas());
}
initCanvas() {
const container = this.waveformCanvas.parentElement;
this.waveformCanvas.width = container.clientWidth - 32;
this.waveformCanvas.height = 80;
this.drawIdleWaveform();
}
drawIdleWaveform() {
const { width, height } = this.waveformCanvas;
this.waveformCtx.fillStyle = 'rgba(99, 102, 241, 0.1)';
this.waveformCtx.fillRect(0, 0, width, height);
this.waveformCtx.strokeStyle = 'rgba(99, 102, 241, 0.3)';
this.waveformCtx.lineWidth = 2;
this.waveformCtx.beginPath();
this.waveformCtx.moveTo(0, height / 2);
this.waveformCtx.lineTo(width, height / 2);
this.waveformCtx.stroke();
}
drawWaveform(audioData) {
const { width, height } = this.waveformCanvas;
const ctx = this.waveformCtx;
// Clear canvas
ctx.fillStyle = 'rgba(10, 10, 26, 0.3)';
ctx.fillRect(0, 0, width, height);
// Draw waveform
const gradient = ctx.createLinearGradient(0, 0, width, 0);
gradient.addColorStop(0, '#6366f1');
gradient.addColorStop(0.5, '#8b5cf6');
gradient.addColorStop(1, '#a855f7');
ctx.strokeStyle = gradient;
ctx.lineWidth = 2;
ctx.beginPath();
const sliceWidth = width / audioData.length;
let x = 0;
for (let i = 0; i < audioData.length; i++) {
const v = audioData[i] * 0.5 + 0.5;
const y = v * height;
if (i === 0) {
ctx.moveTo(x, y);
} else {
ctx.lineTo(x, y);
}
x += sliceWidth;
}
ctx.stroke();
// Add glow effect
ctx.shadowColor = '#6366f1';
ctx.shadowBlur = 10;
ctx.stroke();
ctx.shadowBlur = 0;
}
async toggleRecording() {
if (this.isRecording) {
this.stopRecording();
} else {
await this.startRecording();
}
}
async startRecording() {
try {
// Request microphone access
this.mediaStream = await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
sampleRate: this.sampleRate,
echoCancellation: true,
noiseSuppression: true
}
});
// Create audio context
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: this.sampleRate
});
// Connect WebSocket
await this.connectWebSocket();
// Create audio processing pipeline
const source = this.audioContext.createMediaStreamSource(this.mediaStream);
// Create analyser for smooth visualization
this.analyser = this.audioContext.createAnalyser();
this.analyser.fftSize = 512; // Controls resolution of data
this.analyser.smoothingTimeConstant = 0.5;
this.audioDataBuffer = new Float32Array(this.analyser.fftSize);
// Use ScriptProcessorNode for audio processing
this.processor = this.audioContext.createScriptProcessor(this.bufferSize, 1, 1);
this.processor.onaudioprocess = (e) => {
if (!this.isRecording) return;
const inputData = e.inputBuffer.getChannelData(0);
// Send audio chunks to server
this.sendAudioChunk(inputData);
};
source.connect(this.analyser);
this.analyser.connect(this.processor);
this.processor.connect(this.audioContext.destination);
// Update UI
this.isRecording = true;
this.updateUI('recording');
// Start visualization loop
this.visualize();
} catch (error) {
console.error('Error starting recording:', error);
this.updateStatus('listening', 'خطأ في الوصول للميكروفون');
}
}
stopRecording() {
this.isRecording = false;
if (this.animationId) {
cancelAnimationFrame(this.animationId);
this.animationId = null;
}
// Stop audio processing
if (this.processor) {
this.processor.disconnect();
this.processor = null;
}
if (this.analyser) {
this.analyser.disconnect();
this.analyser = null;
}
if (this.audioContext) {
this.audioContext.close();
this.audioContext = null;
}
if (this.mediaStream) {
this.mediaStream.getTracks().forEach(track => track.stop());
this.mediaStream = null;
}
// Close WebSocket
if (this.websocket) {
this.websocket.close();
this.websocket = null;
}
// Update UI
this.updateUI('stopped');
this.drawIdleWaveform();
}
visualize() {
if (!this.isRecording || !this.analyser) return;
this.analyser.getFloatTimeDomainData(this.audioDataBuffer);
this.drawWaveform(this.audioDataBuffer);
this.animationId = requestAnimationFrame(() => this.visualize());
}
async connectWebSocket() {
return new Promise((resolve, reject) => {
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
const wsUrl = `${protocol}//${window.location.host}/ws/audio`;
this.websocket = new WebSocket(wsUrl);
this.websocket.onopen = () => {
console.log('WebSocket connected');
this.connectionStatus.classList.add('connected');
this.connectionStatus.querySelector('.status-text').textContent = 'متصل';
resolve();
};
this.websocket.onclose = () => {
console.log('WebSocket disconnected');
this.connectionStatus.classList.remove('connected');
this.connectionStatus.querySelector('.status-text').textContent = 'غير متصل';
};
this.websocket.onerror = (error) => {
console.error('WebSocket error:', error);
reject(error);
};
this.websocket.onmessage = (event) => {
const data = JSON.parse(event.data);
this.handleServerMessage(data);
};
});
}
sendAudioChunk(audioData) {
if (!this.websocket || this.websocket.readyState !== WebSocket.OPEN) {
return;
}
// Convert Float32 to Int16 for transmission
const int16Data = new Int16Array(audioData.length);
for (let i = 0; i < audioData.length; i++) {
const s = Math.max(-1, Math.min(1, audioData[i]));
int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
}
// Send as binary
this.websocket.send(int16Data.buffer);
}
handleServerMessage(data) {
const { status, probability, transcription, remaining, confidence, token_confidences, accumulating, accumulation_remaining } = data;
// Update probability bar
if (probability !== undefined) {
this.probabilityFill.style.width = `${probability * 100}%`;
}
// Update status
switch (status) {
case 'speaking':
if (accumulating && accumulation_remaining > 0) {
this.updateStatus('speaking', `جاري التحدث... (${accumulation_remaining}s)`);
} else {
this.updateStatus('speaking', 'جاري التحدث...');
}
break;
case 'waiting':
// Waiting for speech accumulation delay
const waitText = accumulation_remaining ? ` (${accumulation_remaining}s)` : '';
this.updateStatus('speaking', `انتظر لإكمال الكلام${waitText}`);
break;
case 'listening':
this.updateStatus('listening', 'في انتظار الكلام...');
break;
case 'transcription':
this.updateStatus('listening', 'تم التعرف على الكلام');
this.showTranscription(transcription);
if (token_confidences) {
this.updateConfidenceTable(token_confidences, confidence);
}
break;
}
}
updateStatus(state, message) {
// Update status indicator class
this.statusIndicator.className = 'status-indicator';
if (state === 'speaking' || state === 'silence') {
this.statusIndicator.classList.add(state);
}
// Update message
this.statusMessage.textContent = message;
}
showTranscription(text) {
if (!text || text.trim() === '') return;
// Move current transcription to history
const currentText = this.transcriptionContent.querySelector('p:not(.placeholder-text)');
if (currentText && currentText.textContent.trim()) {
const historyItem = document.createElement('div');
historyItem.className = 'history-item new';
historyItem.textContent = currentText.textContent;
this.transcriptionHistory.insertBefore(historyItem, this.transcriptionHistory.firstChild);
// Limit history to 10 items
while (this.transcriptionHistory.children.length > 10) {
this.transcriptionHistory.removeChild(this.transcriptionHistory.lastChild);
}
}
// Show new transcription
this.transcriptionContent.innerHTML = `<p class="new">${text}</p>`;
}
updateConfidenceTable(tokens, globalConf) {
this.confidencePanel.classList.remove('hidden');
// Update global confidence
const percentage = Math.round(globalConf * 100);
this.globalConfidence.textContent = `${percentage}%`;
this.globalConfidence.className = 'confidence-value';
if (percentage < 50) this.globalConfidence.classList.add('low');
else if (percentage < 80) this.globalConfidence.classList.add('medium');
// Update table
this.confidenceTableBody.innerHTML = '';
tokens.forEach(tk => {
const row = document.createElement('tr');
const prob = Math.round(tk.probability * 100);
let probClass = 'confidence-value';
if (prob < 50) probClass += ' low';
else if (prob < 80) probClass += ' medium';
row.innerHTML = `
<td>${tk.token}</td>
<td class="${probClass}">${prob}%</td>
`;
this.confidenceTableBody.appendChild(row);
});
}
updateUI(state) {
if (state === 'recording') {
this.micButton.classList.add('recording');
this.micIcon.classList.add('hidden');
this.stopIcon.classList.remove('hidden');
this.statusMessage.textContent = 'في انتظار الكلام...';
} else {
this.micButton.classList.remove('recording');
this.micIcon.classList.remove('hidden');
this.stopIcon.classList.add('hidden');
this.statusMessage.textContent = 'اضغط للبدء';
this.statusIndicator.className = 'status-indicator';
this.probabilityFill.style.width = '0%';
}
}
}
// Initialize on page load
document.addEventListener('DOMContentLoaded', () => {
new AudioRecorder();
});