Jambonz_impl / asr_websocket_client.html
alaatiger989's picture
Upload folder using huggingface_hub
04c4cd1 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>ASR WebSocket Testing Client</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
display: flex;
align-items: center;
justify-content: center;
padding: 20px;
}
.container {
background: rgba(255, 255, 255, 0.95);
backdrop-filter: blur(10px);
border-radius: 20px;
padding: 40px;
box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1);
max-width: 600px;
width: 100%;
border: 1px solid rgba(255, 255, 255, 0.2);
}
.header {
text-align: center;
margin-bottom: 30px;
}
.header h1 {
color: #333;
font-size: 2.5em;
font-weight: 300;
margin-bottom: 10px;
}
.header p {
color: #666;
font-size: 1.1em;
}
.connection-section {
margin-bottom: 30px;
}
.input-group {
margin-bottom: 20px;
}
.input-group label {
display: block;
margin-bottom: 8px;
color: #333;
font-weight: 500;
}
.input-group input {
width: 100%;
padding: 12px 16px;
border: 2px solid #e1e5e9;
border-radius: 10px;
font-size: 16px;
transition: all 0.3s ease;
background: rgba(255, 255, 255, 0.8);
}
.input-group input:focus {
outline: none;
border-color: #667eea;
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
}
.btn {
padding: 12px 24px;
border: none;
border-radius: 10px;
font-size: 16px;
font-weight: 500;
cursor: pointer;
transition: all 0.3s ease;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.btn:disabled {
opacity: 0.6;
cursor: not-allowed;
}
.btn-connect {
background: linear-gradient(135deg, #4CAF50, #45a049);
color: white;
width: 100%;
}
.btn-connect:hover:not(:disabled) {
transform: translateY(-2px);
box-shadow: 0 5px 15px rgba(76, 175, 80, 0.3);
}
.btn-disconnect {
background: linear-gradient(135deg, #f44336, #da190b);
color: white;
width: 100%;
}
.btn-disconnect:hover:not(:disabled) {
transform: translateY(-2px);
box-shadow: 0 5px 15px rgba(244, 67, 54, 0.3);
}
.audio-controls {
display: flex;
justify-content: center;
gap: 20px;
margin: 30px 0;
}
.btn-mic {
background: linear-gradient(135deg, #2196F3, #1976D2);
color: white;
width: 80px;
height: 80px;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
font-size: 24px;
}
.btn-mic:hover:not(:disabled) {
transform: scale(1.1);
box-shadow: 0 10px 25px rgba(33, 150, 243, 0.3);
}
.btn-mic.recording {
background: linear-gradient(135deg, #f44336, #da190b);
animation: pulse 1.5s infinite;
}
.btn-stop {
background: linear-gradient(135deg, #FF9800, #F57C00);
color: white;
width: 80px;
height: 80px;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
font-size: 24px;
}
.btn-stop:hover:not(:disabled) {
transform: scale(1.1);
box-shadow: 0 10px 25px rgba(255, 152, 0, 0.3);
}
@keyframes pulse {
0% { transform: scale(1); }
50% { transform: scale(1.05); }
100% { transform: scale(1); }
}
.status {
text-align: center;
margin: 20px 0;
padding: 12px;
border-radius: 10px;
font-weight: 500;
}
.status.connected {
background: rgba(76, 175, 80, 0.1);
color: #4CAF50;
border: 1px solid rgba(76, 175, 80, 0.3);
}
.status.disconnected {
background: rgba(244, 67, 54, 0.1);
color: #f44336;
border: 1px solid rgba(244, 67, 54, 0.3);
}
.status.recording {
background: rgba(33, 150, 243, 0.1);
color: #2196F3;
border: 1px solid rgba(33, 150, 243, 0.3);
}
.response-section {
margin-top: 30px;
}
.response-box {
background: rgba(0, 0, 0, 0.05);
border-radius: 10px;
padding: 20px;
min-height: 120px;
border: 1px solid rgba(0, 0, 0, 0.1);
font-family: 'Courier New', monospace;
white-space: pre-wrap;
word-wrap: break-word;
}
.loading {
display: flex;
align-items: center;
justify-content: center;
color: #666;
}
.loading::after {
content: '';
width: 20px;
height: 20px;
border: 2px solid #f3f3f3;
border-top: 2px solid #667eea;
border-radius: 50%;
animation: spin 1s linear infinite;
margin-left: 10px;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
.audio-visualizer {
display: flex;
align-items: center;
justify-content: center;
height: 40px;
margin: 10px 0;
}
.bar {
width: 3px;
height: 10px;
background: #667eea;
margin: 0 1px;
border-radius: 2px;
animation: wave 1s ease-in-out infinite;
}
.bar:nth-child(2) { animation-delay: 0.1s; }
.bar:nth-child(3) { animation-delay: 0.2s; }
.bar:nth-child(4) { animation-delay: 0.3s; }
.bar:nth-child(5) { animation-delay: 0.4s; }
@keyframes wave {
0%, 100% { height: 10px; }
50% { height: 30px; }
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🎤 ASR Tester</h1>
<p>WebSocket-based Speech Recognition Testing</p>
</div>
<div class="connection-section">
<div class="input-group">
<label for="websocketUrl">WebSocket URL:</label>
<input type="text" id="websocketUrl" value="ws://52.59.169.24:3015" placeholder="ws://localhost:5005/url">
</div>
<button id="connectBtn" class="btn btn-connect">Connect</button>
<button id="disconnectBtn" class="btn btn-disconnect" style="display: none;">Disconnect</button>
</div>
<div id="status" class="status disconnected">Disconnected</div>
<div class="audio-controls">
<button id="micBtn" class="btn btn-mic" disabled title="Start Recording">🎤</button>
<button id="stopBtn" class="btn btn-stop" disabled title="Stop Recording">⏹️</button>
</div>
<div id="visualizer" class="audio-visualizer" style="display: none;">
<div class="bar"></div>
<div class="bar"></div>
<div class="bar"></div>
<div class="bar"></div>
<div class="bar"></div>
</div>
<div class="response-section">
<h3>ASR Response:</h3>
<div id="responseBox" class="response-box">Waiting for audio input...</div>
</div>
</div>
<script>
class JambonzASRClient {
constructor() {
this.websocket = null;
this.audioContext = null;
this.mediaRecorder = null;
this.audioStream = null;
this.processor = null;
this.isRecording = false;
this.isConnected = false;
this.initializeElements();
this.attachEventListeners();
}
initializeElements() {
this.elements = {
websocketUrl: document.getElementById('websocketUrl'),
connectBtn: document.getElementById('connectBtn'),
disconnectBtn: document.getElementById('disconnectBtn'),
micBtn: document.getElementById('micBtn'),
stopBtn: document.getElementById('stopBtn'),
status: document.getElementById('status'),
responseBox: document.getElementById('responseBox'),
visualizer: document.getElementById('visualizer')
};
}
attachEventListeners() {
this.elements.connectBtn.addEventListener('click', () => this.connect());
this.elements.disconnectBtn.addEventListener('click', () => this.disconnect());
this.elements.micBtn.addEventListener('click', () => this.startRecording());
this.elements.stopBtn.addEventListener('click', () => this.stopRecording());
}
updateStatus(message, type) {
this.elements.status.textContent = message;
this.elements.status.className = `status ${type}`;
}
async connect() {
const url = this.elements.websocketUrl.value.trim();
if (!url) {
alert('Please enter a WebSocket URL');
return;
}
try {
this.updateStatus('Connecting...', 'disconnected');
this.elements.connectBtn.disabled = true;
this.websocket = new WebSocket(url);
this.websocket.binaryType = 'arraybuffer';
this.websocket.onopen = () => {
this.isConnected = true;
this.updateStatus('Connected - Ready for Jambonz Protocol', 'connected');
this.elements.connectBtn.style.display = 'none';
this.elements.disconnectBtn.style.display = 'block';
this.elements.micBtn.disabled = false;
this.elements.responseBox.textContent = 'Connected. Ready to start ASR session...';
};
this.websocket.onmessage = (event) => {
if (typeof event.data === 'string') {
try {
const response = JSON.parse(event.data);
this.displayResponse('JSON Control Message', response);
} catch (e) {
this.displayResponse('Text Message', event.data);
}
} else {
// Binary data (should not happen in normal Jambonz flow from server)
this.displayResponse('Binary Message', `Received binary data: ${event.data.byteLength} bytes`);
}
};
this.websocket.onerror = (error) => {
console.error('WebSocket error:', error);
this.updateStatus('Connection Error', 'disconnected');
this.resetConnection();
};
this.websocket.onclose = (event) => {
this.isConnected = false;
this.updateStatus(`Disconnected (Code: ${event.code})`, 'disconnected');
this.resetConnection();
this.displayResponse('Connection Closed', `WebSocket closed with code: ${event.code}, reason: ${event.reason || 'No reason provided'}`);
};
} catch (error) {
console.error('Connection failed:', error);
this.updateStatus('Connection Failed', 'disconnected');
this.resetConnection();
}
}
disconnect() {
if (this.isRecording) {
this.stopRecording();
}
if (this.websocket && this.websocket.readyState === WebSocket.OPEN) {
this.websocket.close(1000, 'Client disconnect');
}
this.resetConnection();
}
resetConnection() {
this.isConnected = false;
this.elements.connectBtn.disabled = false;
this.elements.connectBtn.style.display = 'block';
this.elements.disconnectBtn.style.display = 'none';
this.elements.micBtn.disabled = true;
this.elements.stopBtn.disabled = true;
this.stopRecording();
}
// Convert Float32Array to Int16Array (LINEAR16 PCM)
floatTo16BitPCM(float32Array) {
const int16Array = new Int16Array(float32Array.length);
for (let i = 0; i < float32Array.length; i++) {
const clipped = Math.max(-1, Math.min(1, float32Array[i]));
int16Array[i] = clipped * 0x7FFF;
}
return int16Array;
}
// Resample audio from source sample rate to 8kHz
resampleTo8kHz(audioBuffer, sourceSampleRate) {
const targetSampleRate = 8000;
const ratio = sourceSampleRate / targetSampleRate;
const targetLength = Math.round(audioBuffer.length / ratio);
const resampled = new Float32Array(targetLength);
for (let i = 0; i < targetLength; i++) {
const sourceIndex = i * ratio;
const sourceIndexFloor = Math.floor(sourceIndex);
const sourceIndexCeil = Math.min(sourceIndexFloor + 1, audioBuffer.length - 1);
const weight = sourceIndex - sourceIndexFloor;
resampled[i] = audioBuffer[sourceIndexFloor] * (1 - weight) +
audioBuffer[sourceIndexCeil] * weight;
}
return resampled;
}
async startRecording() {
if (!this.isConnected) {
alert('Please connect to WebSocket first');
return;
}
try {
// Initialize audio context
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 44100 // Let browser choose, we'll resample
});
// Get microphone stream
this.audioStream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: false, // Jambonz typically handles this
noiseSuppression: false, // Jambonz typically handles this
autoGainControl: false, // Jambonz typically handles this
channelCount: 1 // Mono audio
}
});
const source = this.audioContext.createMediaStreamSource(this.audioStream);
// Create ScriptProcessorNode for audio processing
// Note: ScriptProcessorNode is deprecated but still widely supported
// In production, consider using AudioWorklet
this.processor = this.audioContext.createScriptProcessor(4096, 1, 1);
this.processor.onaudioprocess = (event) => {
if (!this.isRecording || !this.websocket || this.websocket.readyState !== WebSocket.OPEN) {
return;
}
const inputBuffer = event.inputBuffer;
const audioData = inputBuffer.getChannelData(0); // Get mono channel
// Resample to 8kHz
const resampled = this.resampleTo8kHz(audioData, this.audioContext.sampleRate);
// Convert to LINEAR16 PCM
const pcmData = this.floatTo16BitPCM(resampled);
// Send binary audio data
this.websocket.send(pcmData.buffer);
};
// Connect audio nodes
source.connect(this.processor);
this.processor.connect(this.audioContext.destination);
// Send Jambonz START control message
const startMessage = {
type: "start",
language: "en-US",
format: "raw",
encoding: "LINEAR16",
interimResults: true,
sampleRateHz: 8000,
options: {
callSid: Date.now().toString()
}
};
this.websocket.send(JSON.stringify(startMessage));
this.displayResponse('Sent START Message', startMessage);
this.isRecording = true;
// Update UI
this.elements.micBtn.classList.add('recording');
this.elements.micBtn.disabled = true;
this.elements.stopBtn.disabled = false;
this.elements.visualizer.style.display = 'flex';
this.updateStatus('Recording - Sending LINEAR16 PCM @ 8kHz', 'recording');
} catch (error) {
console.error('Failed to start recording:', error);
alert('Failed to access microphone. Please check permissions.');
this.stopRecording();
}
}
stopRecording() {
if (this.isRecording) {
this.isRecording = false;
// Send Jambonz STOP control message
if (this.websocket && this.websocket.readyState === WebSocket.OPEN) {
const stopMessage = {
type: "stop"
};
this.websocket.send(JSON.stringify(stopMessage));
this.displayResponse('Sent STOP Message', stopMessage);
}
}
// Clean up audio resources
if (this.processor) {
this.processor.disconnect();
this.processor = null;
}
if (this.audioContext) {
this.audioContext.close().then(() => {
this.audioContext = null;
});
}
if (this.audioStream) {
this.audioStream.getTracks().forEach(track => track.stop());
this.audioStream = null;
}
// Update UI
this.elements.micBtn.classList.remove('recording');
this.elements.micBtn.disabled = false;
this.elements.stopBtn.disabled = true;
this.elements.visualizer.style.display = 'none';
if (this.isConnected) {
this.updateStatus('Connected - Waiting for final transcript...', 'connected');
}
}
displayResponse(messageType, response) {
const responseBox = this.elements.responseBox;
const timestamp = new Date().toLocaleTimeString();
let content = `<strong>[${timestamp}] ${messageType}:</strong>\n`;
if (typeof response === 'object') {
content += JSON.stringify(response, null, 2);
} else {
content += response;
}
// Append to existing content
if (responseBox.innerHTML.includes('Connected. Ready to start ASR session...') ||
responseBox.innerHTML.includes('Processing audio...')) {
responseBox.innerHTML = content;
} else {
responseBox.innerHTML += '\n\n' + content;
}
// Auto-scroll to bottom
responseBox.scrollTop = responseBox.scrollHeight;
}
}
// Initialize the client when page loads
document.addEventListener('DOMContentLoaded', () => {
new JambonzASRClient();
});
</script>
</body>
</html>