| const API_BASE_URL = window.location.origin;
|
| const recorder = new AudioRecorder();
|
|
|
|
|
| let audioContext = null;
|
| let audioBuffers = [];
|
| let pendingAudioPaths = new Set();
|
| let currentAudioPath = null;
|
| let ws = null;
|
|
|
|
|
| const startRecordingBtn = document.getElementById('startRecording');
|
| const stopRecordingBtn = document.getElementById('stopRecording');
|
| const transcriptionArea = document.getElementById('transcription');
|
| const asrStatus = document.getElementById('asrStatus');
|
| const ttsInput = document.getElementById('ttsInput');
|
| const generateSpeechBtn = document.getElementById('generateSpeech');
|
| const ttsStatus = document.getElementById('ttsStatus');
|
| const audioPlayer = document.getElementById('audioPlayer');
|
| const downloadAudioBtn = document.getElementById('downloadAudio');
|
| const audioFileInput = document.getElementById('audioFileInput');
|
| const uploadAudioBtn = document.getElementById('uploadAudio');
|
|
|
|
|
| uploadAudioBtn.addEventListener('click', async () => {
|
| const file = audioFileInput.files[0];
|
| if (!file) {
|
| asrStatus.textContent = 'Please select an audio file';
|
| asrStatus.className = 'status error';
|
| return;
|
| }
|
|
|
| try {
|
| asrStatus.textContent = 'Processing audio file...';
|
| asrStatus.className = 'status';
|
|
|
|
|
| const audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
| const arrayBuffer = await file.arrayBuffer();
|
| const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
|
|
|
|
|
| const wavBuffer = await audioBufferToWav(audioBuffer);
|
| const wavBlob = new Blob([wavBuffer], { type: 'audio/wav' });
|
|
|
| const formData = new FormData();
|
| formData.append('file', wavBlob, 'recording.wav');
|
|
|
| const response = await fetch(`${API_BASE_URL}/asr`, {
|
| method: 'POST',
|
| body: formData
|
| });
|
|
|
| if (!response.ok) throw new Error('ASR request failed');
|
|
|
| const data = await response.json();
|
| transcriptionArea.value = data.text;
|
| asrStatus.textContent = 'Transcription complete!';
|
| asrStatus.className = 'status success';
|
|
|
|
|
| audioContext.close();
|
| } catch (error) {
|
| asrStatus.textContent = 'Error: ' + error.message;
|
| asrStatus.className = 'status error';
|
| }
|
| });
|
|
|
|
|
| startRecordingBtn.addEventListener('click', async () => {
|
| try {
|
| asrStatus.textContent = 'Starting recording...';
|
| asrStatus.className = 'status';
|
| await recorder.start();
|
| startRecordingBtn.disabled = true;
|
| stopRecordingBtn.disabled = false;
|
| asrStatus.textContent = 'Recording...';
|
| } catch (error) {
|
| asrStatus.textContent = 'Error starting recording: ' + error.message;
|
| asrStatus.className = 'status error';
|
| }
|
| });
|
|
|
| stopRecordingBtn.addEventListener('click', async () => {
|
| try {
|
| const audioBlob = await recorder.stop();
|
| startRecordingBtn.disabled = false;
|
| stopRecordingBtn.disabled = true;
|
| asrStatus.textContent = 'Processing audio...';
|
|
|
|
|
| const formData = new FormData();
|
| formData.append('file', audioBlob);
|
|
|
| const response = await fetch(`${API_BASE_URL}/asr`, {
|
| method: 'POST',
|
| body: formData
|
| });
|
|
|
| if (!response.ok) throw new Error('ASR request failed');
|
|
|
| const data = await response.json();
|
| transcriptionArea.value = data.text;
|
| asrStatus.textContent = 'Transcription complete!';
|
| asrStatus.className = 'status success';
|
| } catch (error) {
|
| asrStatus.textContent = 'Error: ' + error.message;
|
| asrStatus.className = 'status error';
|
| startRecordingBtn.disabled = false;
|
| stopRecordingBtn.disabled = true;
|
| }
|
| });
|
|
|
|
|
| function connectWebSocket() {
|
| const wsProtocol = window.location.protocol === 'https:' ? 'wss' : 'ws';
|
| ws = new WebSocket(`${wsProtocol}://${window.location.host}/tts-ws`);
|
|
|
| ws.onopen = () => {
|
| console.log('WebSocket connected');
|
| generateSpeechBtn.disabled = false;
|
| ttsStatus.textContent = 'Connected to TTS service';
|
| ttsStatus.className = 'status success';
|
|
|
|
|
| if (!audioContext) {
|
| audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
| } else if (audioContext.state === 'suspended') {
|
| audioContext.resume();
|
| }
|
| };
|
|
|
| ws.onmessage = async (event) => {
|
| const response = JSON.parse(event.data);
|
|
|
| if (response.status === 'partial') {
|
| ttsStatus.textContent = 'Generating audio...';
|
| ttsStatus.className = 'status';
|
|
|
| try {
|
| const audioPath = response.audioPath.split('/').pop();
|
| pendingAudioPaths.add(audioPath);
|
|
|
| if (audioContext.state === 'suspended') {
|
| await audioContext.resume();
|
| }
|
|
|
|
|
| const audioResponse = await fetchWithRetry(`${API_BASE_URL}/cache/${audioPath}`);
|
| const arrayBuffer = await audioResponse.arrayBuffer();
|
|
|
| if (arrayBuffer.byteLength === 0) {
|
| throw new Error('Empty audio data received');
|
| }
|
|
|
| const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
|
| audioBuffers.push(audioBuffer);
|
| pendingAudioPaths.delete(audioPath);
|
| } catch (error) {
|
| console.error('Error loading audio:', error);
|
| ttsStatus.textContent = 'Error loading audio: ' + error.message;
|
| ttsStatus.className = 'status error';
|
| pendingAudioPaths.clear();
|
| }
|
| } else if (response.status === 'complete') {
|
|
|
| if (pendingAudioPaths.size > 0) {
|
| ttsStatus.textContent = 'Finalizing audio...';
|
| await new Promise(resolve => setTimeout(resolve, 500));
|
| }
|
|
|
| try {
|
|
|
| const targetSampleRate = 16000;
|
| const totalLength = audioBuffers.reduce((acc, buffer) => {
|
|
|
| const ratio = targetSampleRate / buffer.sampleRate;
|
| return acc + Math.ceil(buffer.length * ratio);
|
| }, 0);
|
|
|
| const combinedBuffer = audioContext.createBuffer(
|
| 1,
|
| totalLength,
|
| targetSampleRate
|
| );
|
|
|
| let offset = 0;
|
| for (const buffer of audioBuffers) {
|
|
|
| let channelData = buffer.getChannelData(0);
|
| if (buffer.sampleRate !== targetSampleRate) {
|
| channelData = await resampleAudio(channelData, buffer.sampleRate, targetSampleRate);
|
| }
|
| combinedBuffer.copyToChannel(channelData, 0, offset);
|
| offset += channelData.length;
|
| }
|
|
|
|
|
| const wavBlob = new Blob([await audioBufferToWav(combinedBuffer)], { type: 'audio/wav' });
|
| const audioUrl = URL.createObjectURL(wavBlob);
|
|
|
|
|
| audioPlayer.src = audioUrl;
|
| audioPlayer.load();
|
| downloadAudioBtn.disabled = false;
|
|
|
|
|
| currentAudioPath = audioUrl;
|
|
|
| ttsStatus.textContent = 'Audio generated successfully!';
|
| ttsStatus.className = 'status success';
|
| } catch (error) {
|
| console.error('Error combining audio:', error);
|
| ttsStatus.textContent = 'Error combining audio: ' + error.message;
|
| ttsStatus.className = 'status error';
|
| } finally {
|
|
|
| audioBuffers = [];
|
| pendingAudioPaths.clear();
|
| }
|
| } else if (response.status === 'error') {
|
| ttsStatus.textContent = 'Error: ' + response.message;
|
| ttsStatus.className = 'status error';
|
| audioBuffers = [];
|
| pendingAudioPaths.clear();
|
| }
|
| };
|
|
|
| ws.onclose = () => {
|
| console.log('WebSocket disconnected');
|
| generateSpeechBtn.disabled = true;
|
| ttsStatus.textContent = 'Disconnected. Trying to reconnect...';
|
| ttsStatus.className = 'status error';
|
|
|
|
|
| audioBuffers = [];
|
| pendingAudioPaths.clear();
|
| if (currentAudioPath) {
|
| URL.revokeObjectURL(currentAudioPath);
|
| currentAudioPath = null;
|
| }
|
|
|
| setTimeout(connectWebSocket, 5000);
|
| };
|
|
|
| ws.onerror = (error) => {
|
| console.error('WebSocket error:', error);
|
| ttsStatus.textContent = 'Connection error. Retrying...';
|
| ttsStatus.className = 'status error';
|
|
|
|
|
| audioBuffers = [];
|
| pendingAudioPaths.clear();
|
| if (currentAudioPath) {
|
| URL.revokeObjectURL(currentAudioPath);
|
| currentAudioPath = null;
|
| }
|
| };
|
| }
|
|
|
|
|
| async function audioBufferToWav(buffer) {
|
|
|
| let audioData = buffer.getChannelData(0);
|
| if (buffer.sampleRate !== 16000) {
|
| audioData = await resampleAudio(audioData, buffer.sampleRate, 16000);
|
| }
|
|
|
| const numChannels = 1;
|
| const sampleRate = 16000;
|
| const format = 1;
|
| const bitDepth = 16;
|
|
|
| const dataLength = audioData.length * (bitDepth / 8);
|
| const headerLength = 44;
|
| const totalLength = headerLength + dataLength;
|
|
|
| const arrayBuffer = new ArrayBuffer(totalLength);
|
| const view = new DataView(arrayBuffer);
|
|
|
|
|
| writeString(view, 0, 'RIFF');
|
| view.setUint32(4, totalLength - 8, true);
|
| writeString(view, 8, 'WAVE');
|
| writeString(view, 12, 'fmt ');
|
| view.setUint32(16, 16, true);
|
| view.setUint16(20, format, true);
|
| view.setUint16(22, numChannels, true);
|
| view.setUint32(24, sampleRate, true);
|
| view.setUint32(28, sampleRate * numChannels * (bitDepth / 8), true);
|
| view.setUint16(32, numChannels * (bitDepth / 8), true);
|
| view.setUint16(34, bitDepth, true);
|
| writeString(view, 36, 'data');
|
| view.setUint32(40, dataLength, true);
|
|
|
|
|
| floatTo16BitPCM(view, 44, audioData);
|
|
|
| return arrayBuffer;
|
| }
|
|
|
| function resampleAudio(audioData, originalSampleRate, targetSampleRate) {
|
| const ratio = targetSampleRate / originalSampleRate;
|
| const newLength = Math.round(audioData.length * ratio);
|
| const result = new Float32Array(newLength);
|
|
|
| for (let i = 0; i < newLength; i++) {
|
| const position = i / ratio;
|
| const index = Math.floor(position);
|
| const fraction = position - index;
|
|
|
| if (index + 1 < audioData.length) {
|
| result[i] = audioData[index] * (1 - fraction) + audioData[index + 1] * fraction;
|
| } else {
|
| result[i] = audioData[index];
|
| }
|
| }
|
|
|
| return result;
|
| }
|
|
|
| function writeString(view, offset, string) {
|
| for (let i = 0; i < string.length; i++) {
|
| view.setUint8(offset + i, string.charCodeAt(i));
|
| }
|
| }
|
|
|
| function floatTo16BitPCM(view, offset, input) {
|
| for (let i = 0; i < input.length; i++, offset += 2) {
|
| const s = Math.max(-1, Math.min(1, input[i]));
|
| view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
|
| }
|
| }
|
|
|
| generateSpeechBtn.addEventListener('click', () => {
|
| const text = ttsInput.value.trim();
|
| if (!text) {
|
| ttsStatus.textContent = 'Please enter some text';
|
| ttsStatus.className = 'status error';
|
| return;
|
| }
|
|
|
| if (ws && ws.readyState === WebSocket.OPEN) {
|
| ws.send(JSON.stringify({ text }));
|
| ttsStatus.textContent = 'Generating audio...';
|
| ttsStatus.className = 'status';
|
| } else {
|
| ttsStatus.textContent = 'Connection lost. Reconnecting...';
|
| ttsStatus.className = 'status error';
|
| connectWebSocket();
|
| }
|
| });
|
|
|
| downloadAudioBtn.addEventListener('click', () => {
|
| if (currentAudioPath) {
|
| const link = document.createElement('a');
|
| link.href = currentAudioPath;
|
| link.download = `combined_audio_${Date.now()}.wav`;
|
| document.body.appendChild(link);
|
| link.click();
|
| document.body.removeChild(link);
|
| }
|
| });
|
|
|
|
|
| window.addEventListener('beforeunload', () => {
|
| if (audioContext) {
|
| audioContext.close();
|
| }
|
| if (ws) {
|
| ws.close();
|
| }
|
|
|
| if (currentAudioPath) {
|
| URL.revokeObjectURL(currentAudioPath);
|
| }
|
|
|
| audioBuffers = [];
|
| pendingAudioPaths.clear();
|
| });
|
|
|
|
|
| connectWebSocket();
|
|
|
| async function fetchWithRetry(url, maxRetries = 3, retryDelay = 1000) {
|
| for (let i = 0; i < maxRetries; i++) {
|
| try {
|
| const response = await fetch(url);
|
| if (!response.ok) {
|
| throw new Error(`HTTP error! status: ${response.status}`);
|
| }
|
| return response;
|
| } catch (error) {
|
| if (i === maxRetries - 1) throw error;
|
| await new Promise(resolve => setTimeout(resolve, retryDelay));
|
| }
|
| }
|
| } |