Spaces:
No application file
No application file
| /** | |
| * LFM2.5-Audio WebGPU Demo | |
| * | |
| * Demonstrates ASR, TTS, and interleaved audio generation using ONNX Runtime Web. | |
| */ | |
| import { AudioModel, loadAudioFile, clearModelCache, getCacheInfo } from './audio-model.js'; | |
| // HuggingFace model URL | |
| const MODEL_URL = 'https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B-ONNX/resolve/main'; | |
| // Model configurations | |
| const MODELS = { | |
| 'LFM2.5-Audio-1.5B-Q4': { | |
| path: MODEL_URL, | |
| label: 'LFM2.5-Audio-1.5B Q4 (~1.6 GB)', | |
| quantization: { | |
| decoder: 'q4', | |
| audioEncoder: 'q4', | |
| audioEmbedding: 'q4', | |
| audioDetokenizer: 'q4', | |
| vocoder: 'q4', | |
| }, | |
| }, | |
| }; | |
| // DOM elements | |
| const modelSelect = document.getElementById('modelSelect'); | |
| const loadBtn = document.getElementById('loadBtn'); | |
| const clearBtn = document.getElementById('clearBtn'); | |
| const statusEl = document.getElementById('status'); | |
| const chatContainer = document.getElementById('chatContainer'); | |
| const userInput = document.getElementById('userInput'); | |
| const sendBtn = document.getElementById('sendBtn'); | |
| const progressBar = document.getElementById('progressBar'); | |
| const progressFill = document.getElementById('progressFill'); | |
| const progressText = document.getElementById('progressText'); | |
| const audioModeSelect = document.getElementById('audioModeSelect'); | |
| const recordBtn = document.getElementById('recordBtn'); | |
| const audioBtn = document.getElementById('audioBtn'); | |
| const audioInput = document.getElementById('audioInput'); | |
| const audioPreview = document.getElementById('audioPreview'); | |
| const clearCacheBtn = document.getElementById('clearCacheBtn'); | |
| const cacheInfoEl = document.getElementById('cacheInfo'); | |
| const dropOverlay = document.getElementById('dropOverlay'); | |
| const spinner = document.getElementById('spinner'); | |
| const spinnerText = document.getElementById('spinnerText'); | |
| const spinnerStats = document.getElementById('spinnerStats'); | |
| // State | |
| let audioModel = null; | |
| let messages = []; | |
| let isGenerating = false; | |
| let pendingAudio = null; | |
| let audioMode = 'interleaved'; | |
| let isRecording = false; | |
| let mediaRecorder = null; | |
| let audioChunks = []; | |
| // ============================================================================ | |
| // Audio Helpers | |
| // ============================================================================ | |
| function createWavBlob(samples, sampleRate) { | |
| // Debug: check waveform statistics | |
| let min = Infinity, max = -Infinity, sum = 0, nonZero = 0; | |
| for (let i = 0; i < samples.length; i++) { | |
| const v = samples[i]; | |
| if (v < min) min = v; | |
| if (v > max) max = v; | |
| sum += Math.abs(v); | |
| if (Math.abs(v) > 0.001) nonZero++; | |
| } | |
| console.log('WAV input stats:', { | |
| length: samples.length, | |
| min: min.toFixed(6), | |
| max: max.toFixed(6), | |
| avgAbs: (sum / samples.length).toFixed(6), | |
| nonZeroSamples: nonZero, | |
| percentNonZero: ((nonZero / samples.length) * 100).toFixed(1) + '%' | |
| }); | |
| const numChannels = 1; | |
| const bitsPerSample = 16; | |
| const bytesPerSample = bitsPerSample / 8; | |
| const blockAlign = numChannels * bytesPerSample; | |
| const byteRate = sampleRate * blockAlign; | |
| const dataSize = samples.length * bytesPerSample; | |
| const bufferSize = 44 + dataSize; | |
| const buffer = new ArrayBuffer(bufferSize); | |
| const view = new DataView(buffer); | |
| const writeString = (offset, string) => { | |
| for (let i = 0; i < string.length; i++) { | |
| view.setUint8(offset + i, string.charCodeAt(i)); | |
| } | |
| }; | |
| writeString(0, 'RIFF'); | |
| view.setUint32(4, bufferSize - 8, true); | |
| writeString(8, 'WAVE'); | |
| writeString(12, 'fmt '); | |
| view.setUint32(16, 16, true); | |
| view.setUint16(20, 1, true); | |
| view.setUint16(22, numChannels, true); | |
| view.setUint32(24, sampleRate, true); | |
| view.setUint32(28, byteRate, true); | |
| view.setUint16(32, blockAlign, true); | |
| view.setUint16(34, bitsPerSample, true); | |
| writeString(36, 'data'); | |
| view.setUint32(40, dataSize, true); | |
| let offset = 44; | |
| for (let i = 0; i < samples.length; i++) { | |
| const sample = Math.max(-1, Math.min(1, samples[i])); | |
| const int16 = sample < 0 ? sample * 0x8000 : sample * 0x7FFF; | |
| view.setInt16(offset, int16, true); | |
| offset += 2; | |
| } | |
| return new Blob([buffer], { type: 'audio/wav' }); | |
| } | |
| // Test function to verify WAV creation works | |
| function createTestToneBlob(durationSec = 1, frequency = 440, sampleRate = 24000) { | |
| const numSamples = Math.floor(durationSec * sampleRate); | |
| const samples = new Float32Array(numSamples); | |
| for (let i = 0; i < numSamples; i++) { | |
| samples[i] = 0.5 * Math.sin(2 * Math.PI * frequency * i / sampleRate); | |
| } | |
| return createWavBlob(samples, sampleRate); | |
| } | |
| // ============================================================================ | |
| // UI Helpers | |
| // ============================================================================ | |
| function setStatus(text, type = '') { | |
| statusEl.textContent = text; | |
| statusEl.className = type; | |
| } | |
| function setLoading(loading) { | |
| loadBtn.disabled = loading; | |
| modelSelect.disabled = loading; | |
| } | |
| function setReady(ready) { | |
| userInput.disabled = !ready; | |
| sendBtn.disabled = !ready; | |
| audioBtn.disabled = !ready; | |
| recordBtn.disabled = !ready; | |
| } | |
| function showProgress(show) { | |
| progressBar.style.display = show ? 'block' : 'none'; | |
| } | |
| function updateProgress(percent, text) { | |
| progressFill.style.width = `${percent}%`; | |
| progressText.textContent = text || `${percent}%`; | |
| } | |
| function showSpinner(text, stats = '') { | |
| spinner.classList.add('active'); | |
| spinnerText.textContent = text; | |
| spinnerStats.textContent = stats; | |
| } | |
| function updateSpinner(text, stats = '') { | |
| if (text) spinnerText.textContent = text; | |
| spinnerStats.textContent = stats; | |
| } | |
| function hideSpinner() { | |
| spinner.classList.remove('active'); | |
| spinnerText.textContent = ''; | |
| spinnerStats.textContent = ''; | |
| } | |
| function addMessage(role, content, isStreaming = false, audio = null) { | |
| const msgEl = document.createElement('div'); | |
| msgEl.className = `message ${role}${isStreaming ? ' generating' : ''}`; | |
| if (audio) { | |
| const audioEl = document.createElement('div'); | |
| audioEl.className = 'audio-preview-item'; | |
| audioEl.style.marginBottom = '0.5rem'; | |
| audioEl.innerHTML = ` | |
| <span class="audio-icon">🎤</span> | |
| <span class="audio-name">${audio.fileName}</span> | |
| <span class="audio-info">${(audio.audioData.length / audio.sampleRate).toFixed(1)}s</span> | |
| `; | |
| msgEl.appendChild(audioEl); | |
| } | |
| const textEl = document.createElement('span'); | |
| textEl.textContent = content; | |
| msgEl.appendChild(textEl); | |
| chatContainer.appendChild(msgEl); | |
| chatContainer.scrollTop = chatContainer.scrollHeight; | |
| return { msgEl, textEl }; | |
| } | |
| function updatePlaceholder() { | |
| const mode = audioModeSelect?.value || 'interleaved'; | |
| if (mode === 'asr') { | |
| userInput.placeholder = 'Record or upload audio to transcribe...'; | |
| } else if (mode === 'tts') { | |
| userInput.placeholder = 'Type text to convert to speech...'; | |
| } else { | |
| userInput.placeholder = 'Type a message or record audio...'; | |
| } | |
| } | |
| function clearPendingAudio() { | |
| pendingAudio = null; | |
| if (audioPreview) { | |
| audioPreview.innerHTML = ''; | |
| } | |
| } | |
| async function updateCacheInfo() { | |
| if (!cacheInfoEl || !clearCacheBtn) return; | |
| const info = await getCacheInfo(); | |
| if (info && info.used > 0) { | |
| const usedMB = info.used / 1024 / 1024; | |
| if (usedMB >= 1000) { | |
| cacheInfoEl.textContent = `${(usedMB / 1024).toFixed(1)} GB cached`; | |
| } else if (usedMB >= 1) { | |
| cacheInfoEl.textContent = `${usedMB.toFixed(0)} MB cached`; | |
| } else { | |
| cacheInfoEl.textContent = 'No models cached'; | |
| } | |
| clearCacheBtn.disabled = usedMB < 1; | |
| } else { | |
| cacheInfoEl.textContent = 'No models cached'; | |
| clearCacheBtn.disabled = true; | |
| } | |
| } | |
| // ============================================================================ | |
| // Microphone Recording | |
| // ============================================================================ | |
| async function startRecording() { | |
| try { | |
| if (!window.isSecureContext) { | |
| throw new Error('Microphone requires HTTPS. Use localhost or enable HTTPS.'); | |
| } | |
| if (!navigator.mediaDevices?.getUserMedia) { | |
| throw new Error('MediaDevices API not available in this browser.'); | |
| } | |
| const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); | |
| audioChunks = []; | |
| mediaRecorder = new MediaRecorder(stream); | |
| mediaRecorder.ondataavailable = (e) => { | |
| if (e.data.size > 0) { | |
| audioChunks.push(e.data); | |
| } | |
| }; | |
| mediaRecorder.onstop = async () => { | |
| stream.getTracks().forEach(track => track.stop()); | |
| const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }); | |
| await processRecordedAudio(audioBlob, false); | |
| }; | |
| mediaRecorder.start(); | |
| isRecording = true; | |
| recordBtn.classList.add('recording'); | |
| recordBtn.textContent = '⏹️'; | |
| recordBtn.title = 'Stop recording'; | |
| setStatus('Recording... Click to stop', 'success'); | |
| } catch (error) { | |
| console.error('Failed to start recording:', error); | |
| setStatus(`Microphone error: ${error.message}`, 'error'); | |
| } | |
| } | |
| function stopRecording() { | |
| if (mediaRecorder && mediaRecorder.state === 'recording') { | |
| mediaRecorder.stop(); | |
| isRecording = false; | |
| recordBtn.classList.remove('recording'); | |
| recordBtn.textContent = '🎤'; | |
| recordBtn.title = 'Record from microphone'; | |
| } | |
| } | |
| async function processRecordedAudio(audioBlob) { | |
| try { | |
| setStatus('Processing recording...'); | |
| const arrayBuffer = await audioBlob.arrayBuffer(); | |
| const audioContext = new (window.AudioContext || window.webkitAudioContext)(); | |
| const audioBuffer = await audioContext.decodeAudioData(arrayBuffer); | |
| let audioData; | |
| if (audioBuffer.numberOfChannels === 1) { | |
| audioData = new Float32Array(audioBuffer.getChannelData(0)); | |
| } else { | |
| const ch0 = audioBuffer.getChannelData(0); | |
| const ch1 = audioBuffer.getChannelData(1); | |
| audioData = new Float32Array(ch0.length); | |
| for (let i = 0; i < ch0.length; i++) { | |
| audioData[i] = (ch0[i] + ch1[i]) / 2; | |
| } | |
| } | |
| const sampleRate = audioBuffer.sampleRate; | |
| const duration = (audioData.length / sampleRate).toFixed(1); | |
| pendingAudio = { audioData, sampleRate, fileName: `Recording (${duration}s)` }; | |
| audioContext.close(); | |
| if (audioPreview) { | |
| audioPreview.innerHTML = ` | |
| <div class="audio-preview-item"> | |
| <span class="audio-icon">🎤</span> | |
| <span class="audio-name">Recording</span> | |
| <span class="audio-info">${duration}s @ ${sampleRate}Hz</span> | |
| <button class="remove-btn" onclick="window.clearPendingAudio()">×</button> | |
| </div> | |
| `; | |
| } | |
| setStatus('Recording ready. Click Send to process.', 'success'); | |
| } catch (error) { | |
| console.error('Failed to process recording:', error); | |
| setStatus(`Error processing recording: ${error.message}`, 'error'); | |
| } | |
| } | |
| // ============================================================================ | |
| // Model Loading | |
| // ============================================================================ | |
| async function loadModel() { | |
| const modelKey = modelSelect.value; | |
| const modelConfig = MODELS[modelKey]; | |
| if (!modelConfig) { | |
| setStatus('Invalid model selection', 'error'); | |
| return; | |
| } | |
| setLoading(true); | |
| setReady(false); | |
| showProgress(true); | |
| updateProgress(0, 'Starting...'); | |
| setStatus(`Loading ${modelConfig.label}...`); | |
| if (audioModel) { | |
| console.log('Disposing previous model...'); | |
| audioModel.dispose(); | |
| audioModel = null; | |
| } | |
| messages = []; | |
| pendingAudio = null; | |
| chatContainer.innerHTML = ''; | |
| try { | |
| let useWebGPU = !!navigator.gpu; | |
| if (!useWebGPU) { | |
| console.warn('WebGPU not available, falling back to WASM (CPU)'); | |
| } | |
| const device = useWebGPU ? 'webgpu' : 'wasm'; | |
| setStatus(`Loading audio model (${device})...`); | |
| audioModel = new AudioModel(); | |
| await audioModel.load(modelConfig.path, { | |
| device, | |
| quantization: modelConfig.quantization || null, | |
| progressCallback: (progress) => { | |
| if (progress.status === 'loading') { | |
| updateProgress(progress.progress, `Loading ${progress.file}...`); | |
| } else if (progress.status === 'done') { | |
| updateProgress(100, 'Done'); | |
| } | |
| }, | |
| }); | |
| showProgress(false); | |
| setStatus(`Ready! Audio model loaded on ${device === 'webgpu' ? 'WebGPU' : 'CPU'}`, 'success'); | |
| setReady(true); | |
| updateCacheInfo(); | |
| updatePlaceholder(); | |
| } catch (error) { | |
| console.error('Load error:', error); | |
| showProgress(false); | |
| const msg = error instanceof Error ? error.message : String(error); | |
| setStatus(`Error: ${msg}`, 'error'); | |
| audioModel = null; | |
| } finally { | |
| setLoading(false); | |
| } | |
| } | |
| // ============================================================================ | |
| // Generation | |
| // ============================================================================ | |
| async function generate(userMessage) { | |
| if (!audioModel || isGenerating) return; | |
| isGenerating = true; | |
| setReady(false); | |
| const audioToSend = pendingAudio; | |
| if (audioToSend && audioPreview) { | |
| audioPreview.innerHTML = ''; | |
| } | |
| messages.push({ role: 'user', content: userMessage }); | |
| addMessage('user', userMessage, false, audioToSend); | |
| const { msgEl, textEl } = addMessage('assistant', '', true); | |
| let generatedText = ''; | |
| const startTime = performance.now(); | |
| let tokenCount = 0; | |
| let audioFrameCount = 0; | |
| try { | |
| const currentMode = audioModeSelect?.value || 'interleaved'; | |
| const onTokenCallback = (token, tokenId) => { | |
| if (token.includes('<|im_end|>') || token.includes('<|endoftext|>')) { | |
| return true; | |
| } | |
| generatedText += token; | |
| tokenCount++; | |
| textEl.textContent = generatedText; | |
| chatContainer.scrollTop = chatContainer.scrollHeight; | |
| const elapsed = ((performance.now() - startTime) / 1000).toFixed(1); | |
| updateSpinner(null, `${tokenCount} tokens · ${elapsed}s`); | |
| return false; | |
| }; | |
| if (currentMode === 'asr' && audioToSend) { | |
| showSpinner('Transcribing audio...'); | |
| generatedText = await audioModel.transcribe( | |
| audioToSend.audioData, | |
| audioToSend.sampleRate, | |
| { onToken: onTokenCallback } | |
| ); | |
| pendingAudio = null; | |
| } else if (currentMode === 'tts') { | |
| showSpinner('Generating speech...'); | |
| const result = await audioModel.generateSpeech(userMessage, { | |
| onToken: onTokenCallback, | |
| onAudioFrame: (frame, count) => { | |
| audioFrameCount = count; | |
| const elapsed = ((performance.now() - startTime) / 1000).toFixed(1); | |
| updateSpinner('Generating audio...', `${count} frames · ${elapsed}s`); | |
| }, | |
| }); | |
| if (result.audioCodes && result.audioCodes.length > 0) { | |
| updateSpinner('Decoding audio...', `${result.audioCodes.length} frames`); | |
| const waveform = await audioModel.decodeAudioCodes(result.audioCodes); | |
| console.log('TTS waveform decoded:', waveform.length, 'samples'); | |
| if (waveform.length > 0) { | |
| generatedText = result.textOutput || `Generated ${result.audioCodes.length} audio frames (${(waveform.length / 24000).toFixed(2)}s)`; | |
| // Create audio player inline with the message | |
| const wavBlob = createWavBlob(waveform, 24000); | |
| console.log('TTS WAV blob created:', wavBlob.size, 'bytes, duration:', (waveform.length / 24000).toFixed(2), 's'); | |
| const audioUrl = URL.createObjectURL(wavBlob); | |
| // Add audio element to the existing message | |
| const audioContainer = document.createElement('div'); | |
| audioContainer.style.marginTop = '0.75rem'; | |
| audioContainer.innerHTML = ` | |
| <audio controls preload="auto" src="${audioUrl}" style="width:100%;max-width:360px;display:block;"></audio> | |
| <a href="${audioUrl}" download="generated_audio.wav" style="display:block;font-size:0.7rem;margin-top:0.25rem;color:#666;">Download WAV (${(waveform.length / 24000).toFixed(1)}s)</a> | |
| `; | |
| msgEl.appendChild(audioContainer); | |
| chatContainer.scrollTop = chatContainer.scrollHeight; | |
| } else { | |
| generatedText = '[Audio decoding failed - no waveform generated]'; | |
| console.warn('TTS waveform decoding returned empty result'); | |
| } | |
| } else { | |
| generatedText = result.textOutput || '[No audio generated]'; | |
| } | |
| } else if (currentMode === 'interleaved' && audioToSend) { | |
| showSpinner('Processing audio...'); | |
| const result = await audioModel.generateInterleaved( | |
| audioToSend.audioData, | |
| audioToSend.sampleRate, | |
| userMessage, | |
| { | |
| onToken: (text, tokenId) => { | |
| generatedText = text; | |
| tokenCount = text.length; | |
| textEl.textContent = text; | |
| chatContainer.scrollTop = chatContainer.scrollHeight; | |
| const elapsed = ((performance.now() - startTime) / 1000).toFixed(1); | |
| updateSpinner('Generating text...', `${tokenCount} chars · ${elapsed}s`); | |
| }, | |
| onAudioFrame: (frame, count) => { | |
| audioFrameCount = count; | |
| const elapsed = ((performance.now() - startTime) / 1000).toFixed(1); | |
| updateSpinner('Generating audio...', `${count} frames · ${elapsed}s`); | |
| }, | |
| } | |
| ); | |
| pendingAudio = null; | |
| generatedText = result.text || ''; | |
| textEl.textContent = generatedText; | |
| if (result.audioCodes && result.audioCodes.length > 0) { | |
| updateSpinner('Decoding audio...', `${result.audioCodes.length} frames`); | |
| const waveform = await audioModel.decodeAudioCodes(result.audioCodes); | |
| console.log('Waveform decoded:', waveform.length, 'samples'); | |
| if (waveform.length > 0) { | |
| if (!generatedText) { | |
| generatedText = `Generated ${result.audioCodes.length} audio frames`; | |
| } | |
| // Create audio player inline with the message | |
| const wavBlob = createWavBlob(waveform, 24000); | |
| console.log('WAV blob created:', wavBlob.size, 'bytes, duration:', (waveform.length / 24000).toFixed(2), 's'); | |
| const audioUrl = URL.createObjectURL(wavBlob); | |
| // Add audio element to the existing message | |
| const audioContainer = document.createElement('div'); | |
| audioContainer.style.marginTop = '0.75rem'; | |
| audioContainer.innerHTML = ` | |
| <audio controls preload="auto" src="${audioUrl}" style="width:100%;max-width:360px;display:block;"></audio> | |
| <a href="${audioUrl}" download="generated_audio.wav" style="display:block;font-size:0.7rem;margin-top:0.25rem;color:#666;">Download WAV (${(waveform.length / 24000).toFixed(1)}s)</a> | |
| `; | |
| msgEl.appendChild(audioContainer); | |
| chatContainer.scrollTop = chatContainer.scrollHeight; | |
| } else { | |
| console.warn('Waveform decoding returned empty result'); | |
| } | |
| } | |
| } else if (currentMode === 'interleaved' && userMessage) { | |
| // Text-only follow-up in interleaved mode (still produces audio) | |
| showSpinner('Generating response...'); | |
| const result = await audioModel.generateInterleavedFromText(userMessage, { | |
| onToken: (text, tokenId) => { | |
| generatedText = text; | |
| tokenCount = text.length; | |
| textEl.textContent = text; | |
| chatContainer.scrollTop = chatContainer.scrollHeight; | |
| const elapsed = ((performance.now() - startTime) / 1000).toFixed(1); | |
| updateSpinner('Generating text...', `${tokenCount} chars · ${elapsed}s`); | |
| }, | |
| onAudioFrame: (frame, count) => { | |
| audioFrameCount = count; | |
| const elapsed = ((performance.now() - startTime) / 1000).toFixed(1); | |
| updateSpinner('Generating audio...', `${count} frames · ${elapsed}s`); | |
| }, | |
| }); | |
| generatedText = result.text || ''; | |
| textEl.textContent = generatedText; | |
| // Decode and display audio if generated | |
| if (result.audioCodes && result.audioCodes.length > 0) { | |
| updateSpinner('Decoding audio...', `${result.audioCodes.length} frames`); | |
| const waveform = await audioModel.decodeAudioCodes(result.audioCodes); | |
| console.log('Waveform decoded:', waveform.length, 'samples'); | |
| if (waveform.length > 0) { | |
| if (!generatedText) { | |
| generatedText = `Generated ${result.audioCodes.length} audio frames`; | |
| } | |
| const wavBlob = createWavBlob(waveform, 24000); | |
| console.log('WAV blob created:', wavBlob.size, 'bytes, duration:', (waveform.length / 24000).toFixed(2), 's'); | |
| const audioUrl = URL.createObjectURL(wavBlob); | |
| const audioContainer = document.createElement('div'); | |
| audioContainer.className = 'audio-output'; | |
| audioContainer.innerHTML = ` | |
| <audio controls preload="auto" src="${audioUrl}" style="width:100%;max-width:360px;display:block;"></audio> | |
| <a href="${audioUrl}" download="generated_audio.wav" style="display:block;font-size:0.7rem;margin-top:0.25rem;color:#666;">Download WAV (${(waveform.length / 24000).toFixed(1)}s)</a> | |
| `; | |
| msgEl.appendChild(audioContainer); | |
| chatContainer.scrollTop = chatContainer.scrollHeight; | |
| } | |
| } | |
| } else if (userMessage) { | |
| // Fallback text-only generation | |
| showSpinner('Generating response...'); | |
| const result = await audioModel.generateTextOnly(userMessage, { | |
| maxNewTokens: 256, | |
| onToken: (text, tokenId) => { | |
| generatedText = text; | |
| tokenCount = text.length; | |
| textEl.textContent = text; | |
| chatContainer.scrollTop = chatContainer.scrollHeight; | |
| }, | |
| }); | |
| generatedText = result.text || ''; | |
| } | |
| generatedText = generatedText.replace(/<\|im_end\|>$/g, '').trim(); | |
| const elapsed = (performance.now() - startTime) / 1000; | |
| const tokensPerSec = tokenCount / elapsed; | |
| msgEl.classList.remove('generating'); | |
| textEl.textContent = generatedText; | |
| const statsEl = document.createElement('div'); | |
| statsEl.className = 'stats'; | |
| statsEl.textContent = `${tokenCount} tokens in ${elapsed.toFixed(1)}s (${tokensPerSec.toFixed(1)} tok/s)`; | |
| msgEl.appendChild(statsEl); | |
| messages.push({ role: 'assistant', content: generatedText }); | |
| setStatus('Ready', 'success'); | |
| } catch (error) { | |
| console.error('Generation error:', error); | |
| textEl.textContent = `Error: ${error.message}`; | |
| msgEl.classList.remove('generating'); | |
| messages.pop(); | |
| setStatus(`Error: ${error.message}`, 'error'); | |
| } finally { | |
| hideSpinner(); | |
| isGenerating = false; | |
| setReady(true); | |
| userInput.focus(); | |
| } | |
| } | |
| // ============================================================================ | |
| // Event Handlers | |
| // ============================================================================ | |
| loadBtn.addEventListener('click', loadModel); | |
| audioModeSelect.addEventListener('change', () => { | |
| audioMode = audioModeSelect.value; | |
| updatePlaceholder(); | |
| console.log(`Audio mode changed to: ${audioMode}`); | |
| }); | |
| recordBtn.addEventListener('click', () => { | |
| if (isRecording) { | |
| stopRecording(); | |
| } else { | |
| startRecording(); | |
| } | |
| }); | |
| clearBtn.addEventListener('click', () => { | |
| messages = []; | |
| chatContainer.innerHTML = ''; | |
| clearPendingAudio(); | |
| // Reset model conversation state (KV cache) | |
| if (audioModel) { | |
| audioModel.reset(); | |
| setStatus('Conversation reset', 'success'); | |
| } | |
| }); | |
| clearCacheBtn.addEventListener('click', async () => { | |
| if (clearCacheBtn.disabled) return; | |
| const info = await getCacheInfo(); | |
| const usedMB = info ? (info.used / 1024 / 1024).toFixed(0) : 0; | |
| const confirmed = confirm( | |
| `Delete downloaded model files?\n\n` + | |
| `This will free up ~${usedMB} MB of storage.\n` + | |
| `Models will be re-downloaded next time you load them.` | |
| ); | |
| if (!confirmed) return; | |
| clearCacheBtn.textContent = 'Deleting...'; | |
| await clearModelCache(); | |
| clearCacheBtn.textContent = 'Delete Models'; | |
| await updateCacheInfo(); | |
| setStatus('Downloaded models deleted', 'success'); | |
| }); | |
| sendBtn.addEventListener('click', () => { | |
| const text = userInput.value.trim(); | |
| const mode = audioModeSelect?.value || 'interleaved'; | |
| if (mode === 'tts' && !text) { | |
| setStatus('Please enter text to convert to speech', 'error'); | |
| return; | |
| } | |
| if (mode === 'asr' && !pendingAudio && !text) { | |
| setStatus('Record or upload audio to transcribe', 'error'); | |
| return; | |
| } | |
| if (text || pendingAudio) { | |
| userInput.value = ''; | |
| generate(text); | |
| } | |
| }); | |
| userInput.addEventListener('keydown', (e) => { | |
| if (e.key === 'Enter' && !e.shiftKey) { | |
| e.preventDefault(); | |
| sendBtn.click(); | |
| } | |
| }); | |
| audioBtn.addEventListener('click', () => { | |
| audioInput.click(); | |
| }); | |
| audioInput.addEventListener('change', async (e) => { | |
| const file = e.target.files[0]; | |
| if (file) { | |
| try { | |
| setStatus('Loading audio file...'); | |
| const { audioData, sampleRate } = await loadAudioFile(file); | |
| pendingAudio = { audioData, sampleRate, fileName: file.name }; | |
| if (audioPreview) { | |
| audioPreview.innerHTML = ` | |
| <div class="audio-preview-item"> | |
| <span class="audio-icon">🎤</span> | |
| <span class="audio-name">${file.name}</span> | |
| <span class="audio-info">${(audioData.length / sampleRate).toFixed(1)}s @ ${sampleRate}Hz</span> | |
| <button class="remove-btn" onclick="window.clearPendingAudio()">×</button> | |
| </div> | |
| `; | |
| } | |
| setStatus('Audio loaded. Click Send to process.', 'success'); | |
| } catch (error) { | |
| console.error('Error loading audio:', error); | |
| setStatus(`Error loading audio: ${error.message}`, 'error'); | |
| } | |
| } | |
| audioInput.value = ''; | |
| }); | |
| window.clearPendingAudio = clearPendingAudio; | |
| // Drag and drop | |
| document.addEventListener('dragenter', (e) => { | |
| if (isGenerating) return; | |
| e.preventDefault(); | |
| dropOverlay.classList.add('active'); | |
| }); | |
| dropOverlay.addEventListener('dragleave', (e) => { | |
| e.preventDefault(); | |
| dropOverlay.classList.remove('active'); | |
| }); | |
| dropOverlay.addEventListener('dragover', (e) => { | |
| e.preventDefault(); | |
| }); | |
| dropOverlay.addEventListener('drop', async (e) => { | |
| e.preventDefault(); | |
| dropOverlay.classList.remove('active'); | |
| if (isGenerating) return; | |
| const files = e.dataTransfer?.files; | |
| if (!files) return; | |
| for (const file of files) { | |
| if (file.type.startsWith('audio/')) { | |
| try { | |
| setStatus('Loading audio file...'); | |
| const { audioData, sampleRate } = await loadAudioFile(file); | |
| pendingAudio = { audioData, sampleRate, fileName: file.name }; | |
| if (audioPreview) { | |
| audioPreview.innerHTML = ` | |
| <div class="audio-preview-item"> | |
| <span class="audio-icon">🎤</span> | |
| <span class="audio-name">${file.name}</span> | |
| <span class="audio-info">${(audioData.length / sampleRate).toFixed(1)}s @ ${sampleRate}Hz</span> | |
| <button class="remove-btn" onclick="window.clearPendingAudio()">×</button> | |
| </div> | |
| `; | |
| } | |
| setStatus('Audio loaded. Click Send to process.', 'success'); | |
| } catch (error) { | |
| console.error('Error loading audio:', error); | |
| setStatus(`Error loading audio: ${error.message}`, 'error'); | |
| } | |
| break; | |
| } | |
| } | |
| }); | |
| // Populate model dropdown | |
| function populateModelDropdown() { | |
| modelSelect.innerHTML = ''; | |
| let firstOption = null; | |
| for (const [key, config] of Object.entries(MODELS)) { | |
| const option = document.createElement('option'); | |
| option.value = key; | |
| option.textContent = config.label; | |
| modelSelect.appendChild(option); | |
| if (!firstOption) firstOption = option; | |
| } | |
| if (firstOption) firstOption.selected = true; | |
| } | |
| // Initialize | |
| populateModelDropdown(); | |
| updateCacheInfo(); | |
| updatePlaceholder(); | |
| // Check WebGPU on load | |
| (async () => { | |
| if (!navigator.gpu) { | |
| setStatus('WebGPU not available - will use CPU (WASM). For GPU acceleration, enable chrome://flags/#enable-unsafe-webgpu'); | |
| return; | |
| } | |
| try { | |
| const adapter = await navigator.gpu.requestAdapter(); | |
| if (!adapter) { | |
| setStatus('WebGPU adapter not found - will use CPU. Check chrome://gpu for WebGPU status.'); | |
| return; | |
| } | |
| const info = adapter.info || {}; | |
| const desc = info.description || info.vendor || info.architecture || 'Available'; | |
| setStatus(`WebGPU: ${desc}. Select model and click Load.`); | |
| } catch (e) { | |
| setStatus(`WebGPU error: ${e.message} - will use CPU.`); | |
| } | |
| })(); | |