soprano-web-onnx / onnx-streaming.js
KevinAHM's picture
Soprano 1.1
9b19787
import { PCMPlayerWorklet as PCMPlayer } from './PCMPlayerWorklet.js';
// Configuration
const SAMPLE_RATE = 32000;
export class SopranoONNXStreaming {
constructor() {
this.worker = null;
this.player = null;
this.audioContext = null;
this.isGenerating = false;
this.isWorkerReady = false;
this.pendingGeneration = false;
// Metrics State
this.generationStartTime = 0;
this.lastChunkFinishTime = 0;
this.rtfMovingAverage = 0;
this.elements = {
textInput: document.getElementById('text-input'),
generateBtn: document.getElementById('generate-btn'),
stopBtn: document.getElementById('stop-btn'),
statusText: document.getElementById('stat-status'),
statusIndicator: document.getElementById('status-indicator'),
modelStatusIcon: document.querySelector('#model-status .model-status__dot'),
modelStatusText: document.querySelector('#model-status .model-status__text'),
btnLoader: document.getElementById('btn-loader'),
statTTFB: document.getElementById('stat-ttfb'),
statRTFx: document.getElementById('stat-rtfx'),
ttfbBar: document.getElementById('ttfb-bar'),
rtfxContext: document.getElementById('rtfx-context')
};
this.attachEventListeners();
this.init();
this.setupVisualization();
}
async init() {
console.log('Soprano v1.1 - Worker Edition');
this.updateStatus('Initializing Worker...', 'running');
// Initial button state
this.elements.generateBtn.disabled = true;
const btnText = this.elements.generateBtn.querySelector('.btn__text');
if (btnText) btnText.textContent = 'Loading Models...';
this.elements.btnLoader.style.display = 'block';
// Initialize Audio Context and Player
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: SAMPLE_RATE,
latencyHint: 'interactive'
});
await this.audioContext.audioWorklet.addModule('PCMPlayerWorklet.js');
this.player = new PCMPlayer(this.audioContext);
// Initialize Worker
console.log('Spawning Inference Worker (Classic)...');
this.worker = new Worker('./inference-worker.js');
this.worker.onmessage = (e) => {
const { type, data, error, status, state, metrics, text } = e.data;
switch (type) {
case 'status':
this.updateStatus(status, state);
break;
case 'model_status':
this.updateModelStatus(status, text);
break;
case 'loaded':
console.log('Worker confirmed models loaded.');
this.isWorkerReady = true;
this.elements.generateBtn.disabled = false;
this.elements.btnLoader.style.display = 'none';
const loadedBtnText = this.elements.generateBtn.querySelector('.btn__text');
if (loadedBtnText) loadedBtnText.textContent = 'Generate Audio';
if (this.pendingGeneration) {
this.pendingGeneration = false;
this.startGeneration();
}
break;
case 'generation_started':
// The main thread already sets this in startGeneration for better precision
break;
case 'audio_chunk':
this.handleAudioChunk(data, metrics);
break;
case 'stream_ended':
this.handleStreamEnd();
break;
case 'error':
console.error('Worker Error:', error);
this.updateStatus(`Error: ${error}`, 'error');
this.resetUI();
break;
}
};
// Trigger Model Load in Worker
this.worker.postMessage({ type: 'load' });
}
attachEventListeners() {
this.elements.generateBtn.addEventListener('click', () => this.startGeneration());
this.elements.stopBtn.addEventListener('click', () => this.stopGeneration());
// Sample buttons
document.querySelectorAll('.sample-btn').forEach(btn => {
btn.addEventListener('click', () => {
this.elements.textInput.value = btn.getAttribute('data-text');
// Trigger input event to update character count
this.elements.textInput.dispatchEvent(new Event('input'));
});
});
// Character count
this.elements.textInput.addEventListener('input', () => {
const count = this.elements.textInput.value.length;
const countEl = document.getElementById('char-count');
if (countEl) countEl.textContent = count;
});
this.elements.textInput.addEventListener('keydown', (e) => {
if ((e.ctrlKey || e.metaKey) && e.key === 'Enter') {
this.startGeneration();
}
});
}
async startGeneration() {
this.generationStartTime = performance.now();
try {
if (!this.isWorkerReady) {
this.pendingGeneration = true;
const btnText = this.elements.generateBtn.querySelector('.btn__text');
if (btnText) btnText.textContent = 'Starting soon...';
return;
}
if (this.isGenerating) return;
if (this.audioContext && this.audioContext.state === 'suspended') {
await this.audioContext.resume();
}
const text = this.elements.textInput.value.trim();
if (!text) return;
this.isGenerating = true;
this.elements.generateBtn.disabled = true;
this.elements.generateBtn.classList.add('btn--generating');
this.elements.stopBtn.disabled = false;
if (this.player) this.player.reset();
// Reset metrics
this.elements.statTTFB.textContent = '--';
this.elements.statRTFx.textContent = '--';
if (this.elements.ttfbBar) this.elements.ttfbBar.style.width = '0%';
this.rtfMovingAverage = 0;
this.worker.postMessage({ type: 'generate', data: { text } });
} catch (err) {
console.error('Error in startGeneration:', err);
this.updateStatus(`Error: ${err.message}`, 'error');
this.isGenerating = false;
this.resetUI();
}
}
stopGeneration() {
if (!this.isGenerating) return;
this.worker.postMessage({ type: 'stop' });
// Handle stop immediately in UI
this.handleStreamEnd();
}
handleAudioChunk(audioData, metrics) {
if (!this.isGenerating) return;
// console.log(`[DEBUG] Received audio chunk: ${audioData.length} samples`);
// Play audio
this.player.playAudio(audioData);
// Update Metrics
requestAnimationFrame(() => {
// TTFB (only on first chunk)
if (metrics.isFirst) {
const ttfb = performance.now() - this.generationStartTime;
this.updateTTFB(ttfb);
}
// Real-Time Factor (Output Arrival Rate)
const now = performance.now();
const timeSinceLastChunk = (now - this.lastChunkFinishTime) / 1000;
this.lastChunkFinishTime = now;
if (timeSinceLastChunk > 0) {
const chunkDurationSec = metrics.chunkDuration;
const instantaneousRTF = chunkDurationSec / timeSinceLastChunk;
if (this.rtfMovingAverage === 0) {
this.rtfMovingAverage = instantaneousRTF;
} else {
this.rtfMovingAverage = this.rtfMovingAverage * 0.8 + instantaneousRTF * 0.2;
}
this.updateRTFx(this.rtfMovingAverage);
}
});
}
handleStreamEnd() {
if (this.player.notifyStreamEnded) this.player.notifyStreamEnded();
this.resetUI();
this.isGenerating = false;
}
resetUI() {
this.elements.generateBtn.disabled = false;
this.elements.generateBtn.classList.remove('btn--generating');
const btnText = this.elements.generateBtn.querySelector('.btn__text');
if (btnText) btnText.textContent = 'Generate Audio';
this.elements.stopBtn.disabled = true;
}
updateStatus(text, state) {
this.elements.statusText.textContent = text;
this.elements.statusIndicator.className = `status-indicator status-${state}`;
}
updateModelStatus(state, text) {
this.elements.modelStatusText.textContent = text;
this.elements.modelStatusIcon.className = `status-icon status-${state}`;
}
updateTTFB(ms) {
this.elements.statTTFB.textContent = Math.round(ms);
const percentage = Math.min((ms / 2000) * 100, 100);
this.elements.ttfbBar.style.width = `${percentage}%`;
this.elements.ttfbBar.style.background = ms < 500 ? '#10b981' : ms < 1000 ? '#f59e0b' : '#ef4444';
}
updateRTFx(val) {
this.elements.statRTFx.textContent = `${val.toFixed(2)}x`;
this.elements.rtfxContext.style.color = val >= 1.0 ? '#10b981' : '#ef4444';
}
// -------------------------------------------------------------------------
// Visualization
// -------------------------------------------------------------------------
setupVisualization() {
this.waveformCanvas = document.getElementById('visualizer-waveform');
this.barsCanvas = document.getElementById('visualizer-bars');
if (!this.waveformCanvas || !this.barsCanvas) return;
this.waveformCtx = this.waveformCanvas.getContext('2d');
this.barsCtx = this.barsCanvas.getContext('2d');
// Initial resize
this.resizeCanvases();
window.addEventListener('resize', () => this.resizeCanvases());
// Start animation loop
requestAnimationFrame(() => this.draw());
}
resizeCanvases() {
if (!this.waveformCanvas || !this.barsCanvas) return;
const parent = this.waveformCanvas.parentElement;
const width = parent.clientWidth;
const height = parent.clientHeight;
const dpr = window.devicePixelRatio || 1;
[this.waveformCanvas, this.barsCanvas].forEach(canvas => {
canvas.width = width * dpr;
canvas.height = height * dpr;
canvas.style.width = `${width}px`;
canvas.style.height = `${height}px`;
const ctx = canvas.getContext('2d');
ctx.scale(dpr, dpr);
});
}
draw() {
requestAnimationFrame(() => this.draw());
if (!this.player || !this.player.analyser) return;
const bufferLength = this.player.analyser.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
// Draw Bars (Frequency)
this.player.analyser.getByteFrequencyData(dataArray);
this.drawBars(dataArray);
// Draw Waveform (Time Domain)
this.player.analyser.getByteTimeDomainData(dataArray);
this.drawWaveform(dataArray);
}
drawWaveform(dataArray) {
const ctx = this.waveformCtx;
const canvas = this.waveformCanvas;
const width = canvas.width / (window.devicePixelRatio || 1);
const height = canvas.height / (window.devicePixelRatio || 1);
ctx.clearRect(0, 0, width, height);
ctx.lineWidth = 2;
ctx.strokeStyle = '#3b82f6'; // Blue primary
ctx.beginPath();
const sliceWidth = width / dataArray.length;
let x = 0;
for (let i = 0; i < dataArray.length; i++) {
const v = dataArray[i] / 128.0;
const y = (v * height) / 2;
if (i === 0) ctx.moveTo(x, y);
else ctx.lineTo(x, y);
x += sliceWidth;
}
ctx.lineTo(width, height / 2);
ctx.stroke();
}
drawBars(dataArray) {
const ctx = this.barsCtx;
const canvas = this.barsCanvas;
const width = canvas.width / (window.devicePixelRatio || 1);
const height = canvas.height / (window.devicePixelRatio || 1);
ctx.clearRect(0, 0, width, height);
const barCount = 120; // Number of bars to display
const barWidth = (width / barCount);
const samplesPerBar = Math.floor(dataArray.length / barCount);
for (let i = 0; i < barCount; i++) {
let sum = 0;
for (let j = 0; j < samplesPerBar; j++) {
sum += dataArray[i * samplesPerBar + j];
}
const average = sum / samplesPerBar;
const barHeight = (average / 255) * height * 0.8;
// Gradient for bar
const gradient = ctx.createLinearGradient(0, height, 0, height - barHeight);
gradient.addColorStop(0, '#3b82f644');
gradient.addColorStop(1, '#8b5cf6cc');
ctx.fillStyle = gradient;
// Rounded bars
const x = i * barWidth;
const y = height - barHeight;
const radius = barWidth / 2;
ctx.beginPath();
ctx.roundRect(x + 1, y, barWidth - 2, barHeight, [2, 2, 0, 0]);
ctx.fill();
}
}
}
// Start the app
document.addEventListener('DOMContentLoaded', () => {
window.app = new SopranoONNXStreaming();
});