import * as ort from 'onnxruntime-web'; const presetTexts = window.presetTexts || {}; const PLAY_ICON_SVG = ``; const PAUSE_ICON_SVG = ``; const STOP_ICON_SVG = ``; // Lightning background parallax (function initLightningParallax() { if (typeof document === 'undefined') { return; } // Removed scroll-based CSS variable updates for direct scroll response // const updateLightningOffset = () => { // document.body.style.setProperty('--lightning-scroll', `${window.scrollY}px`); // }; // let ticking = false; // const onScroll = () => { // if (!ticking) { // window.requestAnimationFrame(() => { // updateLightningOffset(); // ticking = false; // }); // ticking = true; // } // }; // updateLightningOffset(); // window.addEventListener('scroll', onScroll, { passive: true }); const runBlink = (className, onComplete) => { let remaining = 1 + Math.round(Math.random()); const blink = () => { if (remaining-- <= 0) { if (typeof onComplete === 'function') { onComplete(); } return; } const wait = 20 + Math.random() * 80; document.body.classList.add(className); setTimeout(() => { document.body.classList.remove(className); setTimeout(blink, wait); }, wait); }; blink(); }; const schedule = () => { setTimeout(() => runBlink('lightning-flicker', schedule), Math.random() * 10000); }; schedule(); /* const heroSection = document.querySelector('.hero'); if (heroSection) { heroSection.addEventListener('click', (event) => { runBlink('lightning-flash'); }); } */ })(); function escapeHtml(value) { return value.replace(/[&<>"']/g, (match) => { switch (match) { case '&': return '&'; case '<': return '<'; case '>': return '>'; case '"': return '"'; case "'": return '''; default: return match; } }); } function formatStatValueWithSuffix(value, suffix, options = {}) { const { firstLabel = false } = options; if (value === undefined || value === null) { return ''; } if (!suffix) { const raw = `${value}`; return escapeHtml(raw); } const raw = `${value}`.trim(); if (!raw || raw === '--' || raw === '-' || raw.toLowerCase() === 'error') { return escapeHtml(raw); } const appendSuffix = (segment, includePrefix = false) => { const trimmed = segment.trim(); if (!trimmed) { return ''; } const escapedValue = `${escapeHtml(trimmed)}`; const suffixSpan = `${escapeHtml(suffix)}`; const prefixSpan = includePrefix && firstLabel ? `First` : ''; const segmentClass = includePrefix && firstLabel ? 'stat-value-segment has-prefix' : 'stat-value-segment'; return `${prefixSpan}${escapedValue}${suffixSpan}`; }; if (raw.includes('/')) { const parts = raw.split('/'); const segments = parts.map((part, index) => appendSuffix(part, index === 0)); return segments.join(' / '); } return appendSuffix(raw); } /** * Unicode text processor */ export class UnicodeProcessor { constructor(indexer) { this.indexer = indexer; } call(textList) { const processedTexts = textList.map(t => preprocessText(t)); const textIdsLengths = processedTexts.map(t => t.length); const maxLen = Math.max(...textIdsLengths); const textIds = []; const unsupportedChars = new Set(); for (let i = 0; i < processedTexts.length; i++) { const row = new Array(maxLen).fill(0); const unicodeVals = textToUnicodeValues(processedTexts[i]); for (let j = 0; j < unicodeVals.length; j++) { const indexValue = this.indexer[unicodeVals[j]]; // Check if character is supported (not -1, undefined, or null) if (indexValue === undefined || indexValue === null || indexValue === -1) { unsupportedChars.add(processedTexts[i][j]); row[j] = 0; // Use 0 as fallback } else { row[j] = indexValue; } } textIds.push(row); } const textMask = getTextMask(textIdsLengths); return { textIds, textMask, unsupportedChars: Array.from(unsupportedChars) }; } } export function preprocessText(text) { // Normalize unicode characters text = text.normalize('NFKD'); // Remove emojis text = text.replace(/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F700}-\u{1F77F}\u{1F780}-\u{1F7FF}\u{1F800}-\u{1F8FF}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1F1E6}-\u{1F1FF}]/gu, ''); // Replace various dashes and symbols text = text.replace(/–/g, "-"); text = text.replace(/‑/g, "-"); text = text.replace(/—/g, "-"); text = text.replace(/¯/g, " "); text = text.replace(/_/g, " "); text = text.replace(/[“”]/g, '"'); text = text.replace(/[‘’´`]/g, "'"); text = text.replace(/\[/g, " "); text = text.replace(/\]/g, " "); text = text.replace(/\|/g, " "); text = text.replace(/[\u0302\u0303\u0304\u0305\u0306\u0307\u0308\u030A\u030B\u030C\u0327\u0328\u0329\u032A\u032B\u032C\u032D\u032E\u032F]/g, ""); text = text.replace(/\//g, " "); // FIXME: slash should be kept (e.g., fraction) text = text.replace(/#/g, " "); // FIXME: hash should be kept (e.g., hashtag) text = text.replace(/→/g, " "); text = text.replace(/←/g, " "); // Remove special symbols text = text.replace(/[♥☆♡©\\]/g, ""); // Replace known expressions text = text.replace(/@/g, " at "); text = text.replace(/&/g, " and "); text = text.replace(/e\.g\.,/g, "for example, "); text = text.replace(/i\.e\.,/g, "that is, "); // Fix spacing around punctuation text = text.replace(/ ,/g, ","); text = text.replace(/ \./g, "."); text = text.replace(/ !/g, "!"); text = text.replace(/ \?/g, "?"); text = text.replace(/ ;/g, ";"); text = text.replace(/ :/g, ":"); text = text.replace(/ '/g, "'"); // Remove duplicate quotes while (text.includes('""')) { text = text.replace(/""/g, '"'); } while (text.includes("''")) { text = text.replace(/''/g, "'"); } while (text.includes("``")) { text = text.replace(/``/g, "`"); } // Remove extra spaces while (text.includes(" ")) { text = text.replace(/ /g, " "); } // Remove first and last spaces text = text.trim(); text = text.replace(/\s+/g, " "); // Remove extra spaces // if text doesn't end with punctuation, quotes, or closing brackets, add a period const lastChar = text[text.length - 1]; if (!/[.!?;:,'"'"')\]}…。」』】〉》›»]/.test(lastChar)) { text = text + '.'; } return text; } export function textToUnicodeValues(text) { return Array.from(text).map(char => char.charCodeAt(0)); } export function lengthToMask(lengths, maxLen = null) { maxLen = maxLen || Math.max(...lengths); const mask = []; for (let i = 0; i < lengths.length; i++) { const row = []; for (let j = 0; j < maxLen; j++) { row.push(j < lengths[i] ? 1.0 : 0.0); } mask.push([row]); } return mask; } export function getTextMask(textIdsLengths) { return lengthToMask(textIdsLengths); } export function getLatentMask(wavLengths, cfgs) { const baseChunkSize = cfgs.ae.base_chunk_size; const chunkCompressFactor = cfgs.ttl.chunk_compress_factor; const latentSize = baseChunkSize * chunkCompressFactor; const latentLengths = wavLengths.map(len => Math.floor((len + latentSize - 1) / latentSize) ); return lengthToMask(latentLengths); } export function sampleNoisyLatent(duration, cfgs) { const sampleRate = cfgs.ae.sample_rate; const baseChunkSize = cfgs.ae.base_chunk_size; const chunkCompressFactor = cfgs.ttl.chunk_compress_factor; const ldim = cfgs.ttl.latent_dim; const wavLenMax = Math.max(...duration.map(d => d[0][0])) * sampleRate; const wavLengths = duration.map(d => Math.floor(d[0][0] * sampleRate)); const chunkSize = baseChunkSize * chunkCompressFactor; const latentLen = Math.floor((wavLenMax + chunkSize - 1) / chunkSize); const latentDim = ldim * chunkCompressFactor; const noisyLatent = []; for (let b = 0; b < duration.length; b++) { const batch = []; for (let d = 0; d < latentDim; d++) { const row = []; for (let t = 0; t < latentLen; t++) { const u1 = Math.random(); const u2 = Math.random(); const randNormal = Math.sqrt(-2.0 * Math.log(u1)) * Math.cos(2.0 * Math.PI * u2); row.push(randNormal); } batch.push(row); } noisyLatent.push(batch); } const latentMask = getLatentMask(wavLengths, cfgs); for (let b = 0; b < noisyLatent.length; b++) { for (let d = 0; d < noisyLatent[b].length; d++) { for (let t = 0; t < noisyLatent[b][d].length; t++) { noisyLatent[b][d][t] *= latentMask[b][0][t]; } } } return { noisyLatent, latentMask }; } export async function loadOnnx(onnxPath, opts) { return await ort.InferenceSession.create(onnxPath, opts); } export async function loadOnnxAll(basePath, opts, onProgress) { const models = [ { name: 'Duration Predictor', path: `${basePath}/duration_predictor.onnx`, key: 'dpOrt' }, { name: 'Text Encoder', path: `${basePath}/text_encoder.onnx`, key: 'textEncOrt' }, { name: 'Vector Estimator', path: `${basePath}/vector_estimator.onnx`, key: 'vectorEstOrt' }, { name: 'Vocoder', path: `${basePath}/vocoder.onnx`, key: 'vocoderOrt' } ]; const result = {}; let loadedCount = 0; // Load all models in parallel const loadPromises = models.map(async (model) => { const session = await loadOnnx(model.path, opts); loadedCount++; if (onProgress) { onProgress(model.name, loadedCount, models.length); } return { key: model.key, session }; }); // Wait for all models to load const loadedModels = await Promise.all(loadPromises); // Organize results loadedModels.forEach(({ key, session }) => { result[key] = session; }); try { // Download counting await fetch('https://huggingface.co/Supertone/supertonic/resolve/main/config.json'); } catch (error) { console.warn('Failed to update download count:', error); } return result; } export async function loadCfgs(basePath) { const response = await fetch(`${basePath}/tts.json`); return await response.json(); } export async function loadProcessors(basePath) { const response = await fetch(`${basePath}/unicode_indexer.json`); const unicodeIndexerData = await response.json(); const textProcessor = new UnicodeProcessor(unicodeIndexerData); return { textProcessor }; } function parseWavFile(buffer) { const view = new DataView(buffer); // Check RIFF header const riff = String.fromCharCode(view.getUint8(0), view.getUint8(1), view.getUint8(2), view.getUint8(3)); if (riff !== 'RIFF') { throw new Error('Not a valid WAV file'); } const wave = String.fromCharCode(view.getUint8(8), view.getUint8(9), view.getUint8(10), view.getUint8(11)); if (wave !== 'WAVE') { throw new Error('Not a valid WAV file'); } let offset = 12; let fmtChunk = null; let dataChunk = null; while (offset < buffer.byteLength) { const chunkId = String.fromCharCode( view.getUint8(offset), view.getUint8(offset + 1), view.getUint8(offset + 2), view.getUint8(offset + 3) ); const chunkSize = view.getUint32(offset + 4, true); if (chunkId === 'fmt ') { fmtChunk = { audioFormat: view.getUint16(offset + 8, true), numChannels: view.getUint16(offset + 10, true), sampleRate: view.getUint32(offset + 12, true), bitsPerSample: view.getUint16(offset + 22, true) }; } else if (chunkId === 'data') { dataChunk = { offset: offset + 8, size: chunkSize }; break; } offset += 8 + chunkSize; } if (!fmtChunk || !dataChunk) { throw new Error('Invalid WAV file format'); } const bytesPerSample = fmtChunk.bitsPerSample / 8; const numSamples = Math.floor(dataChunk.size / (bytesPerSample * fmtChunk.numChannels)); const audioData = new Float32Array(numSamples); if (fmtChunk.bitsPerSample === 16) { for (let i = 0; i < numSamples; i++) { let sample = 0; for (let ch = 0; ch < fmtChunk.numChannels; ch++) { const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 2; sample += view.getInt16(sampleOffset, true); } audioData[i] = (sample / fmtChunk.numChannels) / 32768.0; } } else if (fmtChunk.bitsPerSample === 24) { // Support 24-bit PCM for (let i = 0; i < numSamples; i++) { let sample = 0; for (let ch = 0; ch < fmtChunk.numChannels; ch++) { const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 3; // Read 3 bytes and convert to signed 24-bit integer const byte1 = view.getUint8(sampleOffset); const byte2 = view.getUint8(sampleOffset + 1); const byte3 = view.getUint8(sampleOffset + 2); let value = (byte3 << 16) | (byte2 << 8) | byte1; // Convert to signed (two's complement) if (value & 0x800000) { value = value - 0x1000000; } sample += value; } audioData[i] = (sample / fmtChunk.numChannels) / 8388608.0; // 2^23 } } else if (fmtChunk.bitsPerSample === 32) { for (let i = 0; i < numSamples; i++) { let sample = 0; for (let ch = 0; ch < fmtChunk.numChannels; ch++) { const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 4; sample += view.getFloat32(sampleOffset, true); } audioData[i] = sample / fmtChunk.numChannels; } } else { throw new Error(`Unsupported bit depth: ${fmtChunk.bitsPerSample}. Supported formats: 16-bit, 24-bit, 32-bit`); } return { sampleRate: fmtChunk.sampleRate, audioData: audioData }; } export function arrayToTensor(array, dims) { const flat = array.flat(Infinity); return new ort.Tensor('float32', Float32Array.from(flat), dims); } export function intArrayToTensor(array, dims) { const flat = array.flat(Infinity); return new ort.Tensor('int64', BigInt64Array.from(flat.map(x => BigInt(x))), dims); } export function writeWavFile(audioData, sampleRate) { const numChannels = 1; const bitsPerSample = 16; const byteRate = sampleRate * numChannels * bitsPerSample / 8; const blockAlign = numChannels * bitsPerSample / 8; const dataSize = audioData.length * bitsPerSample / 8; const buffer = new ArrayBuffer(44 + dataSize); const view = new DataView(buffer); // RIFF header view.setUint8(0, 'R'.charCodeAt(0)); view.setUint8(1, 'I'.charCodeAt(0)); view.setUint8(2, 'F'.charCodeAt(0)); view.setUint8(3, 'F'.charCodeAt(0)); view.setUint32(4, 36 + dataSize, true); view.setUint8(8, 'W'.charCodeAt(0)); view.setUint8(9, 'A'.charCodeAt(0)); view.setUint8(10, 'V'.charCodeAt(0)); view.setUint8(11, 'E'.charCodeAt(0)); // fmt chunk view.setUint8(12, 'f'.charCodeAt(0)); view.setUint8(13, 'm'.charCodeAt(0)); view.setUint8(14, 't'.charCodeAt(0)); view.setUint8(15, ' '.charCodeAt(0)); view.setUint32(16, 16, true); view.setUint16(20, 1, true); // PCM view.setUint16(22, numChannels, true); view.setUint32(24, sampleRate, true); view.setUint32(28, byteRate, true); view.setUint16(32, blockAlign, true); view.setUint16(34, bitsPerSample, true); // data chunk view.setUint8(36, 'd'.charCodeAt(0)); view.setUint8(37, 'a'.charCodeAt(0)); view.setUint8(38, 't'.charCodeAt(0)); view.setUint8(39, 'a'.charCodeAt(0)); view.setUint32(40, dataSize, true); // Write audio data for (let i = 0; i < audioData.length; i++) { const sample = Math.max(-1, Math.min(1, audioData[i])); const intSample = Math.floor(sample * 32767); view.setInt16(44 + i * 2, intSample, true); } return buffer; } // Smooth scroll functionality document.addEventListener('DOMContentLoaded', () => { // Smooth scroll for anchor links document.querySelectorAll('a[href^="#"]').forEach(anchor => { anchor.addEventListener('click', function (e) { e.preventDefault(); const href = this.getAttribute('href'); const target = document.querySelector(href); if (target) { // Update URL with anchor if (history.pushState) { history.pushState(null, null, href); } target.scrollIntoView({ behavior: 'smooth', block: 'start' }); } }); }); // Add scroll animation for sections const observerOptions = { threshold: 0.1, rootMargin: '0px 0px -100px 0px' }; const observer = new IntersectionObserver((entries) => { entries.forEach(entry => { if (entry.isIntersecting) { entry.target.style.opacity = '1'; entry.target.style.transform = 'translateY(0)'; } }); }, observerOptions); // Observe language icons and paper cards document.querySelectorAll('.language-icon, .paper-card').forEach(card => { card.style.opacity = '0'; card.style.transform = 'translateY(20px)'; card.style.transition = 'opacity 0.6s ease-out, transform 0.6s ease-out'; observer.observe(card); }); // Add parallax effect to hero background window.addEventListener('scroll', () => { const scrolled = window.pageYOffset; const heroBg = document.querySelector('.hero-bg'); if (heroBg) { heroBg.style.transform = `translateY(${scrolled * 0.5}px)`; } }); const paperCards = document.querySelectorAll('.paper-card[data-link]'); paperCards.forEach((card) => { const href = card.dataset.link ? card.dataset.link.trim() : ''; if (!href) { return; } const openLink = () => { window.open(href, '_blank', 'noopener,noreferrer'); }; card.addEventListener('click', (event) => { if (event.defaultPrevented) { return; } openLink(); }); card.addEventListener('keydown', (event) => { if (event.key === 'Enter' || event.key === ' ') { event.preventDefault(); openLink(); } }); }); // Active side navigation dot on scroll const sections = document.querySelectorAll('section[id]'); const navDots = document.querySelectorAll('.nav-dot'); const languageSnippetElement = document.querySelector('.languages-placeholder [data-language-snippet]'); const languageTabButtons = Array.from(document.querySelectorAll('.languages-placeholder .language-option')); const languageIconButtons = Array.from(document.querySelectorAll('.languages-icons-container .language-icon')); const languageCopyBtn = document.querySelector('.languages-placeholder .code-copy-btn'); const languageCopyToast = document.querySelector('.languages-placeholder .code-copy-toast'); let copyToastTimeout = null; if (languageSnippetElement && (languageTabButtons.length || languageIconButtons.length)) { const sharedSetupSteps = [ { text: '# Clone the Supertonic repository', type: 'heading' }, { text: 'git clone https://github.com/supertone-inc/supertonic.git', type: 'command' }, { text: 'cd supertonic', type: 'command' }, { text: ' ', type: 'plain' }, { text: '# Download ONNX models (NOTE: Make sure git-lfs is installed)', type: 'heading' }, { text: 'git clone https://huggingface.co/Supertone/supertonic assets', type: 'command' }, { text: ' ', type: 'plain' }, ]; const perLanguageCommands = { python: [ 'cd py', 'uv sync', 'uv run example_onnx.py', ], javascript: [ 'cd nodejs', 'npm install', 'npm start', ], java: [ 'cd java', 'mvn clean install', 'mvn exec:java', ], cpp: [ 'cd cpp', 'mkdir build && cd build', 'cmake .. && cmake --build . --config Release', './example_onnx', ], csharp: [ 'cd csharp', 'dotnet restore', 'dotnet run', ], go: [ 'cd go', 'go mod download', 'go run example_onnx.go helper.go', ], swift: [ 'cd swift', 'swift build -c release', '.build/release/example_onnx', ], rust: [ 'cd rust', 'cargo build --release', './target/release/example_onnx', ], }; const buildCodeSample = (commands = []) => [ ...sharedSetupSteps, { text: '# Run example', type: 'heading' }, ...commands.map(text => ({ text, type: 'command' })), ]; const codeSamples = Object.fromEntries( Object.entries(perLanguageCommands).map(([language, commands]) => [ language, buildCodeSample(commands), ]), ); const escapeHtml = (value) => value .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); const renderSnippet = (lines) => lines.map((line) => { if (!line || typeof line.text !== 'string') { return ''; } const kind = line.type || (line.highlight ? 'command' : 'plain'); if (kind === 'heading') { return `${escapeHtml(line.text)}`; } if (kind === 'command') { const [command, ...rest] = line.text.trim().split(/\s+/); if (!command) { return ''; } const commandHtml = `${escapeHtml(command)}`; const restHtml = rest.length ? ` ${escapeHtml(rest.join(' '))}` : ''; return `${commandHtml}${restHtml}`; } return `${escapeHtml(line.text)}`; }).join('\n'); const setLanguage = (language) => { const snippet = codeSamples[language]; if (!snippet) { console.warn(`No code sample registered for language "${language}".`); return; } languageTabButtons.forEach((button) => { const isActive = button.dataset.language === language; button.classList.toggle('active', isActive); button.setAttribute('aria-selected', String(isActive)); button.setAttribute('tabindex', isActive ? '0' : '-1'); }); languageIconButtons.forEach((button) => { const isActive = button.dataset.language === language; button.classList.toggle('active', isActive); button.setAttribute('aria-pressed', String(isActive)); }); languageSnippetElement.innerHTML = renderSnippet(snippet); }; const interactiveButtons = [...new Set([...languageTabButtons, ...languageIconButtons])]; interactiveButtons.forEach((button) => { const { language } = button.dataset; if (!language || !codeSamples[language]) { return; } button.addEventListener('click', () => setLanguage(language)); button.addEventListener('keydown', (event) => { if (event.key === 'Enter' || event.key === ' ') { event.preventDefault(); setLanguage(language); } }); }); const defaultLanguage = (languageTabButtons[0] && languageTabButtons[0].dataset.language) || (languageIconButtons[0] && languageIconButtons[0].dataset.language) || 'python'; setLanguage(defaultLanguage); if (languageCopyBtn) { languageCopyBtn.addEventListener('click', async () => { const codeText = languageSnippetElement ? languageSnippetElement.textContent.trim() : ''; if (!codeText) { return; } const showToast = () => { if (!languageCopyToast) return; languageCopyToast.textContent = 'Code copied to clipboard'; languageCopyToast.classList.add('is-visible'); if (copyToastTimeout) { clearTimeout(copyToastTimeout); } copyToastTimeout = setTimeout(() => { languageCopyToast.classList.remove('is-visible'); }, 2000); }; try { if (navigator.clipboard && navigator.clipboard.writeText) { await navigator.clipboard.writeText(codeText); } else { const textArea = document.createElement('textarea'); textArea.value = codeText; textArea.style.position = 'fixed'; textArea.style.top = '-1000px'; textArea.style.left = '-1000px'; document.body.appendChild(textArea); textArea.focus(); textArea.select(); document.execCommand('copy'); document.body.removeChild(textArea); } showToast(); } catch (error) { console.error('Failed to copy code snippet:', error); } }); } } window.addEventListener('scroll', () => { let current = ''; const scrollPosition = window.pageYOffset || window.scrollY; const windowHeight = window.innerHeight; const documentHeight = document.documentElement.scrollHeight; // Check if we're near the bottom of the page (within 100px) const isNearBottom = scrollPosition + windowHeight >= documentHeight - 100; if (isNearBottom && sections.length > 0) { // If near bottom, activate the last section const lastSection = sections[sections.length - 1]; current = lastSection.getAttribute('id'); } else { // Otherwise, find the current section based on scroll position sections.forEach(section => { const sectionTop = section.offsetTop; const sectionHeight = section.clientHeight; if (scrollPosition >= sectionTop - 300) { current = section.getAttribute('id'); } }); } navDots.forEach(dot => { dot.classList.remove('active'); if (dot.getAttribute('href') === `#${current}`) { dot.classList.add('active'); } }); }); }); // Import helper functions and ONNX Runtime at the top // import { // sampleNoisyLatent, // loadOnnxAll, // loadCfgs, // loadProcessors, // loadWavRef, // arrayToTensor, // intArrayToTensor, // writeWavFile // } from './helper.js'; // import * as ort from 'onnxruntime-web'; // TTS Demo functionality (async function() { // Check if we're on a page with the TTS demo const demoTextInput = document.getElementById('demoTextInput'); if (!demoTextInput) return; // Configure ONNX Runtime for WebGPU support ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/'; ort.env.wasm.numThreads = 1; // Configuration const REF_EMBEDDING_PATHS = { 'F': 'assets/voice_styles/F.json', 'M': 'assets/voice_styles/M.json' }; // Global state let models = null; let cfgs = null; let processors = null; let currentVoice = 'F'; // Default to Female voice let refEmbeddingCache = {}; // Cache for embeddings let currentStyleTtlTensor = null; let currentStyleDpTensor = null; // UI Elements const demoStatusBox = document.getElementById('demoStatusBox'); const demoStatusText = document.getElementById('demoStatusText'); const demoBackendBadge = document.getElementById('demoBackendBadge'); const demoGenerateBtn = document.getElementById('demoGenerateBtn'); const demoTotalSteps = document.getElementById('demoTotalSteps'); const demoDurationFactor = document.getElementById('demoDurationFactor'); const demoTotalStepsValue = document.getElementById('demoTotalStepsValue'); const demoDurationFactorValue = document.getElementById('demoDurationFactorValue'); const demoResults = document.getElementById('demoResults'); const demoError = document.getElementById('demoError'); const demoCharCount = document.getElementById('demoCharCount'); const demoCharCounter = document.getElementById('demoCharCounter'); const demoCharStatus = document.getElementById('demoCharStatus'); const demoElevenLabsApiKey = document.getElementById('demoElevenLabsApiKey'); const demoSecondaryApiKey = document.getElementById('demoSecondaryApiKey'); const demoTertiaryApiKey = document.getElementById('demoTertiaryApiKey'); const demoComparisonSection = document.getElementById('demoComparisonSection'); // Billing Modal Elements const billingModal = document.getElementById('billingModal'); const billingModalMessage = document.getElementById('billingModalMessage'); const billingCharCount = document.getElementById('billingCharCount'); const billingProviders = document.getElementById('billingProviders'); const billingModalCancel = document.getElementById('billingModalCancel'); const billingModalConfirm = document.getElementById('billingModalConfirm'); // Text validation constants const MIN_CHARS = 10; const MAX_CHUNK_LENGTH = 300; // Maximum length for each chunk // Custom audio player state (shared across generations) let audioContext = null; let scheduledSources = []; let audioChunks = []; let totalDuration = 0; let startTime = 0; let pauseTime = 0; let isPaused = false; let isPlaying = false; let animationFrameId = null; let playPauseBtn = null; let progressBar = null; let currentTimeDisplay = null; let durationDisplay = null; let progressFill = null; let firstChunkGenerationTime = 0; // Processing time for first chunk let totalChunks = 0; let nextScheduledTime = 0; // Next time to schedule audio chunk let currentGenerationTextLength = 0; let supertonicPlayerRecord = null; // Supertonic player record for cross-player pause management let isGenerating = false; // Track if speech generation is in progress // Track all custom audio players (for ElevenLabs, etc.) let customAudioPlayers = []; const textHandlingAudioPlayers = []; const TEXT_HANDLING_CARD_AUDIO_MAP = [1, 2, 3, 4]; let isComparisonMode = false; const isMobileViewport = () => window.matchMedia('(max-width: 768px)').matches; const trimDecimalsForMobile = (formatted) => { if (!formatted) return formatted; return isMobileViewport() ? formatted.replace(/\.\d{2}$/, '') : formatted; }; function pauseAllPlayersExcept(currentPlayer) { customAudioPlayers.forEach(player => { if (player !== currentPlayer && player && typeof player.pausePlayback === 'function') { player.pausePlayback(); } }); } function pauseTextHandlingPlayersExcept(currentPlayer) { textHandlingAudioPlayers.forEach(player => { if (player !== currentPlayer && player && typeof player.pausePlayback === 'function') { player.pausePlayback(); } }); } /** * Chunk text into smaller pieces based on sentence boundaries * @param {string} text - The text to chunk * @param {number} maxLen - Maximum length for each chunk * @returns {Array} - Array of text chunks */ function chunkText(text, maxLen = MAX_CHUNK_LENGTH) { // Split by paragraph (two or more newlines) const paragraphs = text.trim().split(/\n\s*\n+/).filter(p => p.trim()); const chunks = []; for (let paragraph of paragraphs) { paragraph = paragraph.trim(); if (!paragraph) continue; // Split by sentence boundaries (period, question mark, exclamation mark followed by space) // But exclude common abbreviations like Mr., Mrs., Dr., etc. and single capital letters like F. const sentences = paragraph.split(/(?= 0 && progress <= 100) { const clampedProgress = Math.max(0, Math.min(progress, 100)); demoStatusBox.style.setProperty('--status-progress', `${clampedProgress}%`); demoStatusBox.classList.toggle('complete', clampedProgress >= 100); } else if (type === 'success' || type === 'error') { demoStatusBox.style.removeProperty('--status-progress'); demoStatusBox.classList.remove('complete'); } else { demoStatusBox.style.removeProperty('--status-progress'); demoStatusBox.classList.remove('complete'); } } function hideDemoStatus() { demoStatusBox.style.display = 'none'; } function showDemoError(message) { demoError.textContent = message; demoError.classList.add('active'); } function hideDemoError() { demoError.classList.remove('active'); } // Custom billing confirmation modal function showBillingConfirmation(charCount, providers) { return new Promise((resolve) => { // Set modal content billingCharCount.textContent = charCount; billingProviders.textContent = providers.join(', '); billingModalMessage.textContent = 'You are about to generate speech using API services.'; // Show modal billingModal.classList.add('show'); // Handle confirm const handleConfirm = () => { cleanup(); resolve(true); }; // Handle cancel const handleCancel = () => { cleanup(); resolve(false); }; // Handle overlay click const handleOverlayClick = (e) => { if (e.target === billingModal || e.target.classList.contains('billing-modal-overlay')) { cleanup(); resolve(false); } }; // Handle escape key const handleEscape = (e) => { if (e.key === 'Escape') { cleanup(); resolve(false); } }; // Cleanup function const cleanup = () => { billingModal.classList.remove('show'); billingModalConfirm.removeEventListener('click', handleConfirm); billingModalCancel.removeEventListener('click', handleCancel); billingModal.removeEventListener('click', handleOverlayClick); document.removeEventListener('keydown', handleEscape); }; // Add event listeners billingModalConfirm.addEventListener('click', handleConfirm); billingModalCancel.addEventListener('click', handleCancel); billingModal.addEventListener('click', handleOverlayClick); document.addEventListener('keydown', handleEscape); }); } function showBackendBadge(backend) { demoBackendBadge.textContent = backend; demoBackendBadge.classList.add('visible'); if (backend === 'WebGPU') { demoBackendBadge.classList.add('webgpu'); } else { demoBackendBadge.classList.add('wasm'); } } // Validate characters in text function validateCharacters(text) { if (!processors || !processors.textProcessor) { return { valid: true, unsupportedChars: [] }; } try { // Extract unique characters to minimize preprocessText calls const uniqueChars = [...new Set(text)]; // Build mapping for unique chars only (much faster for long texts) // For example, Korean '간' -> 'ㄱㅏㄴ', so we map 'ㄱ','ㅏ','ㄴ' -> '간' const processedToOriginal = new Map(); const charToProcessed = new Map(); for (const char of uniqueChars) { const processedChar = preprocessText(char); charToProcessed.set(char, processedChar); // Map each processed character back to its original for (const pc of processedChar) { if (!processedToOriginal.has(pc)) { processedToOriginal.set(pc, new Set()); } processedToOriginal.get(pc).add(char); } } // Build full processed text using cached mappings const fullProcessedText = Array.from(text).map(c => charToProcessed.get(c)).join(''); // Check the entire processed text once (efficient) const { unsupportedChars } = processors.textProcessor.call([fullProcessedText]); // Map unsupported processed chars back to original chars const unsupportedOriginalChars = new Set(); if (unsupportedChars && unsupportedChars.length > 0) { for (const unsupportedChar of unsupportedChars) { const originalChars = processedToOriginal.get(unsupportedChar); if (originalChars) { originalChars.forEach(c => unsupportedOriginalChars.add(c)); } } } const unsupportedCharsArray = Array.from(unsupportedOriginalChars); return { valid: unsupportedCharsArray.length === 0, unsupportedChars: unsupportedCharsArray }; } catch (error) { return { valid: true, unsupportedChars: [] }; } } // Update character counter and validate text length function updateCharCounter() { const text = demoTextInput.value; const length = text.length; demoCharCount.textContent = length; // Get the actual width of the textarea const textareaWidth = demoTextInput.offsetWidth; // Max width reference: 1280px (container max-width) / 2 (grid column) - padding/gap ≈ 638px // Using 640px as reference for easier calculation const maxWidthRef = 640; // Calculate font size based on width ratio // Original rem values at max-width (640px): // 5rem = 80px @ 16px base → 80/640 = 12.5% // 4rem = 64px → 64/640 = 10% // 3rem = 48px → 48/640 = 7.5% // 2.5rem = 40px → 40/640 = 6.25% // 2rem = 32px → 32/640 = 5% // 1.5rem = 24px → 24/640 = 3.75% // 1rem = 16px → 16/640 = 2.5% let fontSizeRatio; if (length < 160) { fontSizeRatio = 0.06375; // ~6.375% of width (scaled from 3rem) } else if (length < 240) { fontSizeRatio = 0.053125; // ~5.3125% of width (scaled from 2.5rem) } else if (length < 400) { fontSizeRatio = 0.0425; // ~4.25% of width (scaled from 2rem) } else if (length < 700) { fontSizeRatio = 0.031875; // ~3.1875% of width (scaled from 1.5rem) } else { fontSizeRatio = 0.025; // 2.5% of width (minimum stays the same) } // Calculate font size based on actual width const fontSize = textareaWidth * fontSizeRatio; demoTextInput.style.fontSize = `${fontSize}px`; // Remove all status classes demoCharCounter.classList.remove('error', 'warning', 'valid'); // Check for unsupported characters first (only if models are loaded) let hasUnsupportedChars = false; if (models && processors && length > 0) { const validation = validateCharacters(text); if (!validation.valid && validation.unsupportedChars.length > 0) { hasUnsupportedChars = true; const charList = validation.unsupportedChars.slice(0, 5).map(c => `"${c}"`).join(', '); const moreChars = validation.unsupportedChars.length > 5 ? ` and ${validation.unsupportedChars.length - 5} more` : ''; showDemoError(`Unsupported characters detected: ${charList}${moreChars}. Please remove them before generating speech.`); } else { hideDemoError(); } } // Update status based on length and character validation if (length < MIN_CHARS) { demoCharCounter.classList.add('error'); demoCharStatus.textContent = '✗'; demoGenerateBtn.disabled = true; } else if (hasUnsupportedChars) { demoCharCounter.classList.add('error'); demoCharStatus.textContent = '✗'; demoGenerateBtn.disabled = true; } else { demoCharCounter.classList.add('valid'); demoCharStatus.textContent = '✓'; // Enable only if models are loaded AND not currently generating demoGenerateBtn.disabled = !models || isGenerating; } } // Validate text input function validateTextInput(text) { if (!text || text.trim().length === 0) { return { valid: false, message: 'Please enter some text.' }; } if (text.length < MIN_CHARS) { return { valid: false, message: `Text must be at least ${MIN_CHARS} characters long. (Currently ${text.length})` }; } return { valid: true }; } // Load pre-extracted style embeddings from JSON async function loadStyleEmbeddings(voice) { try { // Check if already cached if (refEmbeddingCache[voice]) { return refEmbeddingCache[voice]; } const embeddingPath = REF_EMBEDDING_PATHS[voice]; if (!embeddingPath) { throw new Error(`No embedding path configured for voice: ${voice}`); } const response = await fetch(embeddingPath); if (!response.ok) { throw new Error(`Failed to fetch embedding: ${response.statusText}`); } const embeddingData = await response.json(); // Convert JSON data to ONNX tensors // Flatten nested arrays before creating Float32Array const styleTtlData = embeddingData.style_ttl.data.flat(Infinity); const styleTtlTensor = new ort.Tensor( embeddingData.style_ttl.type || 'float32', Float32Array.from(styleTtlData), embeddingData.style_ttl.dims ); const styleDpData = embeddingData.style_dp.data.flat(Infinity); const styleDpTensor = new ort.Tensor( embeddingData.style_dp.type || 'float32', Float32Array.from(styleDpData), embeddingData.style_dp.dims ); const embeddings = { styleTtl: styleTtlTensor, styleDp: styleDpTensor }; // Cache the embeddings refEmbeddingCache[voice] = embeddings; return embeddings; } catch (error) { throw error; } } // Switch to a different voice async function switchVoice(voice) { try { const embeddings = await loadStyleEmbeddings(voice); currentStyleTtlTensor = embeddings.styleTtl; currentStyleDpTensor = embeddings.styleDp; currentVoice = voice; // Re-validate text after switching voice updateCharCounter(); } catch (error) { showDemoError(`Failed to load ${voice === 'F' ? 'Female' : 'Male'} voice: ${error.message}`); throw error; } } // Check WebGPU support more thoroughly async function checkWebGPUSupport() { try { // Detect iOS/Safari const isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent) || (navigator.platform === 'MacIntel' && navigator.maxTouchPoints > 1); const isSafari = /^((?!chrome|crios|android|edg|firefox).)*safari/i.test(navigator.userAgent); // iOS and Safari have incomplete WebGPU support if (isIOS) { return { supported: false, reason: 'iOS does not support the required WebGPU features' }; } if (isSafari) { // Desktop Safari might work, but check carefully return { supported: false, reason: 'Safari does not support the required WebGPU features' }; } // Check if WebGPU is available in the browser if (!navigator.gpu) { return { supported: false, reason: 'WebGPU not available in this browser' }; } // Request adapter const adapter = await navigator.gpu.requestAdapter(); if (!adapter) { return { supported: false, reason: 'No WebGPU adapter found' }; } // Check adapter info try { const adapterInfo = await adapter.requestAdapterInfo(); } catch (infoError) { // Ignore adapter info errors } // Request device to test if it actually works const device = await adapter.requestDevice(); if (!device) { return { supported: false, reason: 'Failed to create WebGPU device' }; } return { supported: true, adapter, device }; } catch (error) { // Handle specific iOS/Safari errors const errorMsg = error.message || ''; if (errorMsg.includes('subgroupMinSize') || errorMsg.includes('subgroup')) { return { supported: false, reason: 'iOS/Safari does not support required WebGPU features (subgroup operations)' }; } return { supported: false, reason: error.message }; } } // Warmup models with dummy inference (no audio playback, no UI updates) async function warmupModels() { try { const dummyText = 'Looking to integrate Supertonic into your product? We offer customized on-device SDK solutions tailored to your business needs. Our lightweight, high-performance TTS technology can be seamlessly integrated into mobile apps, IoT devices, automotive systems, and more. Try it now, and enjoy its speed.'; const totalStep = 5; // Use minimal steps for faster warmup const durationFactor = 1.0; const textList = [dummyText]; const bsz = 1; // Use pre-computed style embeddings const styleTtlTensor = currentStyleTtlTensor; const styleDpTensor = currentStyleDpTensor; // Step 1: Estimate duration const { textIds, textMask } = processors.textProcessor.call(textList); const textIdsShape = [bsz, textIds[0].length]; const textMaskShape = [bsz, 1, textMask[0][0].length]; const textMaskTensor = arrayToTensor(textMask, textMaskShape); const dpResult = await models.dpOrt.run({ text_ids: intArrayToTensor(textIds, textIdsShape), style_dp: styleDpTensor, text_mask: textMaskTensor }); const durOnnx = Array.from(dpResult.duration.data); for (let i = 0; i < durOnnx.length; i++) { durOnnx[i] *= durationFactor; } const durReshaped = []; for (let b = 0; b < bsz; b++) { durReshaped.push([[durOnnx[b]]]); } // Step 2: Encode text const textEncResult = await models.textEncOrt.run({ text_ids: intArrayToTensor(textIds, textIdsShape), style_ttl: styleTtlTensor, text_mask: textMaskTensor }); const textEmbTensor = textEncResult.text_emb; // Step 3: Denoising let { noisyLatent, latentMask } = sampleNoisyLatent(durReshaped, cfgs); const latentShape = [bsz, noisyLatent[0].length, noisyLatent[0][0].length]; const latentMaskShape = [bsz, 1, latentMask[0][0].length]; const latentMaskTensor = arrayToTensor(latentMask, latentMaskShape); const totalStepArray = new Array(bsz).fill(totalStep); const scalarShape = [bsz]; const totalStepTensor = arrayToTensor(totalStepArray, scalarShape); for (let step = 0; step < totalStep; step++) { const currentStepArray = new Array(bsz).fill(step); const vectorEstResult = await models.vectorEstOrt.run({ noisy_latent: arrayToTensor(noisyLatent, latentShape), text_emb: textEmbTensor, style_ttl: styleTtlTensor, text_mask: textMaskTensor, latent_mask: latentMaskTensor, total_step: totalStepTensor, current_step: arrayToTensor(currentStepArray, scalarShape) }); const denoisedLatent = Array.from(vectorEstResult.denoised_latent.data); // Update latent let idx = 0; for (let b = 0; b < noisyLatent.length; b++) { for (let d = 0; d < noisyLatent[b].length; d++) { for (let t = 0; t < noisyLatent[b][d].length; t++) { noisyLatent[b][d][t] = denoisedLatent[idx++]; } } } } // Step 4: Generate waveform const vocoderResult = await models.vocoderOrt.run({ latent: arrayToTensor(noisyLatent, latentShape) }); // Warmup complete - no need to process the audio further } catch (error) { console.warn('Warmup failed (non-critical):', error.message); // Don't throw - warmup failure shouldn't prevent normal usage } } // Load models on page load async function initializeModels() { try { showDemoStatus('Loading configuration...', 'info', 5); const basePath = 'assets/onnx'; // Load config cfgs = await loadCfgs(basePath); // Check WebGPU support first showDemoStatus('Checking WebGPU support...', 'info', 8); const webgpuCheck = await checkWebGPUSupport(); // If WebGPU is not supported, show message and disable demo if (!webgpuCheck.supported) { // Show specific message for iOS users const errorMessage = webgpuCheck.reason.includes('iOS') || webgpuCheck.reason.includes('Safari') ? `iOS/Safari is not currently supported.
Please use a desktop browser that supports WebGPU (Chrome 113+, Edge 113+).` : `Please use a browser that supports WebGPU (Chrome 113+, Edge 113+, or other WebGPU-enabled browsers).`; showDemoStatus(errorMessage, 'error', 100); showBackendBadge('Not Supported'); // Disable all input elements demoTextInput.disabled = true; demoGenerateBtn.disabled = true; demoTotalSteps.disabled = true; demoDurationFactor.disabled = true; demoElevenLabsApiKey.disabled = true; if (demoSecondaryApiKey) demoSecondaryApiKey.disabled = true; if (demoTertiaryApiKey) demoTertiaryApiKey.disabled = true; // Disable voice toggle const voiceToggleTexts = document.querySelectorAll('.voice-toggle-text'); voiceToggleTexts.forEach(text => { text.classList.add('disabled'); text.style.pointerEvents = 'none'; text.style.opacity = '0.5'; }); return; // Stop initialization } // Load models with WebGPU showDemoStatus('WebGPU detected! Loading models...', 'info', 10); const modelsLoadPromise = loadOnnxAll(basePath, { executionProviders: ['webgpu'], graphOptimizationLevel: 'all' }, (modelName, current, total) => { const progress = 10 + (current / total) * 70; // 10-80% for model loading showDemoStatus(`Loading models with WebGPU (${current}/${total}): ${modelName}...`, 'info', progress); }); // Load processors in parallel with models const [loadedModels, loadedProcessors] = await Promise.all([ modelsLoadPromise, loadProcessors(basePath) ]); models = loadedModels; processors = loadedProcessors; showDemoStatus('Loading reference embeddings...', 'info', 85); // Load pre-extracted embeddings for default voice const embeddings = await loadStyleEmbeddings(currentVoice); currentStyleTtlTensor = embeddings.styleTtl; currentStyleDpTensor = embeddings.styleDp; showDemoStatus('Warming up models...', 'info', 90); // Warmup step: run inference once in background with dummy text await warmupModels(); hideDemoStatus(); demoGenerateBtn.disabled = false; // Enable voice toggle buttons after models are loaded const voiceToggleTexts = document.querySelectorAll('.voice-toggle-text'); voiceToggleTexts.forEach(text => text.classList.remove('disabled')); // Validate initial text now that models are loaded updateCharCounter(); } catch (error) { showDemoStatus(`Error: ${error.message}`, 'error'); showDemoError(`Failed to initialize: ${error.message}. Check console for details.`); } } // ElevenLabs API synthesis function async function generateSpeechElevenLabs(text, apiKey) { const startTime = Date.now(); try { const response = await fetch('https://api.elevenlabs.io/v1/text-to-speech/JBFqnCBsd6RMkjVDRZzb', { method: 'POST', headers: { 'Accept': 'audio/mpeg', 'Content-Type': 'application/json', 'xi-api-key': apiKey }, body: JSON.stringify({ text: text, model_id: 'eleven_flash_v2_5', voice_settings: { stability: 0.5, similarity_boost: 0.5 } }) }); if (!response.ok) { throw new Error(`ElevenLabs API error: ${response.status} ${response.statusText}`); } const audioBlob = await response.blob(); const audioBuffer = await audioBlob.arrayBuffer(); // Get audio duration const audioContext = new (window.AudioContext || window.webkitAudioContext)(); const decodedAudio = await audioContext.decodeAudioData(audioBuffer); const audioDuration = decodedAudio.duration; const endTime = Date.now(); const processingTime = (endTime - startTime) / 1000; return { success: true, audioBlob, audioDuration, processingTime, url: URL.createObjectURL(audioBlob), text: text // 추가: text를 반환에 포함 }; } catch (error) { const endTime = Date.now(); const processingTime = (endTime - startTime) / 1000; return { success: false, error: error.message, processingTime, text: text // 추가: 에러 시에도 text 포함 }; } } // OpenAI TTS-1 API synthesis function async function generateSpeechOpenAI(text, apiKey) { const startTime = Date.now(); try { const response = await fetch('https://api.openai.com/v1/audio/speech', { method: 'POST', headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ model: 'tts-1', input: text, voice: 'alloy', response_format: 'mp3' }) }); if (!response.ok) { throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`); } const audioBlob = await response.blob(); const audioBuffer = await audioBlob.arrayBuffer(); // Get audio duration const audioContext = new (window.AudioContext || window.webkitAudioContext)(); const decodedAudio = await audioContext.decodeAudioData(audioBuffer); const audioDuration = decodedAudio.duration; const endTime = Date.now(); const processingTime = (endTime - startTime) / 1000; return { success: true, audioBlob, audioDuration, processingTime, url: URL.createObjectURL(audioBlob), text: text }; } catch (error) { const endTime = Date.now(); const processingTime = (endTime - startTime) / 1000; return { success: false, error: error.message, processingTime, text: text }; } } // Gemini 2.5 Flash TTS API synthesis function async function generateSpeechGemini(text, apiKey) { const startTime = Date.now(); try { const response = await fetch('https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-tts:generateContent', { method: 'POST', headers: { 'x-goog-api-key': apiKey, 'Content-Type': 'application/json', }, body: JSON.stringify({ contents: [{ parts: [{ text: text }] }], generationConfig: { responseModalities: ["AUDIO"], speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: "Kore" } } } } }) }); if (!response.ok) { const errorText = await response.text(); throw new Error(`Gemini API error: ${response.status} ${response.statusText}`); } const data = await response.json(); // Extract audio data from Gemini response let audioContent = null; let mimeType = null; if (data.candidates && data.candidates[0]?.content?.parts) { for (const part of data.candidates[0].content.parts) { if (part.inlineData && part.inlineData.data) { audioContent = part.inlineData.data; mimeType = part.inlineData.mimeType; break; } } } if (!audioContent) { throw new Error('No audio content found in Gemini response'); } // Decode base64 audio content const binaryString = atob(audioContent); const pcmData = new Uint8Array(binaryString.length); for (let i = 0; i < binaryString.length; i++) { pcmData[i] = binaryString.charCodeAt(i); } // Parse sample rate from mimeType if available (e.g., "audio/pcm;rate=24000") let sampleRate = 24000; // default if (mimeType && mimeType.includes('rate=')) { const match = mimeType.match(/rate=(\d+)/); if (match) { sampleRate = parseInt(match[1]); } } // Gemini returns s16le (signed 16-bit little-endian PCM) const numChannels = 1; // mono const bitsPerSample = 16; const byteRate = sampleRate * numChannels * (bitsPerSample / 8); const blockAlign = numChannels * (bitsPerSample / 8); const dataSize = pcmData.length; // Create WAV header (44 bytes) const wavHeader = new ArrayBuffer(44); const view = new DataView(wavHeader); // RIFF chunk descriptor view.setUint32(0, 0x52494646, false); // "RIFF" view.setUint32(4, 36 + dataSize, true); // File size - 8 view.setUint32(8, 0x57415645, false); // "WAVE" // fmt sub-chunk view.setUint32(12, 0x666d7420, false); // "fmt " view.setUint32(16, 16, true); // Subchunk1Size (16 for PCM) view.setUint16(20, 1, true); // AudioFormat (1 for PCM) view.setUint16(22, numChannels, true); // NumChannels view.setUint32(24, sampleRate, true); // SampleRate view.setUint32(28, byteRate, true); // ByteRate view.setUint16(32, blockAlign, true); // BlockAlign view.setUint16(34, bitsPerSample, true); // BitsPerSample // data sub-chunk view.setUint32(36, 0x64617461, false); // "data" view.setUint32(40, dataSize, true); // Subchunk2Size // Combine header and PCM data const wavData = new Uint8Array(44 + dataSize); wavData.set(new Uint8Array(wavHeader), 0); wavData.set(pcmData, 44); const finalAudioBuffer = wavData.buffer; // Get audio duration const audioContext = new (window.AudioContext || window.webkitAudioContext)(); let decodedAudio; try { decodedAudio = await audioContext.decodeAudioData(finalAudioBuffer.slice(0)); // Use slice to create a copy } catch (decodeError) { throw new Error(`Unable to decode Gemini audio: ${decodeError.message}`); } const audioDuration = decodedAudio.duration; await audioContext.close(); // Create blob with WAV format const audioBlob = new Blob([finalAudioBuffer], { type: 'audio/wav' }); const endTime = Date.now(); const processingTime = (endTime - startTime) / 1000; return { success: true, audioBlob, audioDuration, processingTime, url: URL.createObjectURL(audioBlob), text: text }; } catch (error) { const endTime = Date.now(); const processingTime = (endTime - startTime) / 1000; return { success: false, error: error.message, processingTime, text: text }; } } // Update individual system result in comparison table function updateComparisonRow(system, result) { if (!isComparisonMode) return; const statusEl = document.getElementById(`${system}Status`); const titleStatusEl = document.getElementById(`${system}-status`); const timeEl = document.getElementById(`${system}Time`); const durationEl = document.getElementById(`${system}Duration`); const rtfEl = document.getElementById(`${system}RTF`); if (result.success) { if (statusEl) { statusEl.textContent = ''; statusEl.className = 'demo-comparison-cell'; } if (titleStatusEl) { titleStatusEl.textContent = '✅ Completed'; titleStatusEl.classList.remove('status-error', 'status-running'); titleStatusEl.classList.add('status-success'); } timeEl.textContent = `${result.processingTime.toFixed(2)}s`; durationEl.textContent = `${result.audioDuration.toFixed(2)}s`; const rtfValue = result.processingTime / result.audioDuration; rtfEl.innerHTML = `${rtfValue.toFixed(3)}x`; } else { if (statusEl) { statusEl.textContent = ''; statusEl.className = 'demo-comparison-cell'; } if (titleStatusEl) { titleStatusEl.textContent = '❌ Failed'; titleStatusEl.classList.remove('status-success', 'status-running'); titleStatusEl.classList.add('status-error'); } timeEl.textContent = result.error || 'Error'; durationEl.textContent = '-'; rtfEl.textContent = '-'; } } // Highlight winner after all complete (based on RTF) function highlightWinner(results) { if (!isComparisonMode) return; if (!Array.isArray(results) || results.length < 2) return; // Remove all winner classes first const systems = ['supertonic', 'elevenlabs', 'openai', 'gemini']; systems.forEach(system => { const row = document.querySelector(`.${system}-row`); const rtfEl = document.getElementById(`${system}RTF`); if (row) row.classList.remove('winner'); if (rtfEl) rtfEl.classList.remove('fastest'); }); // Calculate RTF for each result and find the best one const systemResults = []; results.forEach((result, index) => { if (result && result.success && result.audioDuration > 0) { const rtfValue = result.processingTime / result.audioDuration; // Determine system by result order in the results array // Results are typically passed in order: [supertonicResult, elevenlabsResult, openaiResult, geminiResult] let system = null; if (index === 0 || result.text === results[0]?.text) { system = 'supertonic'; } else { // Check which system by looking at existing elements const hasElevenlabs = document.querySelector('.elevenlabs-row'); const hasOpenai = document.querySelector('.openai-row'); const hasGemini = document.querySelector('.gemini-row'); if (hasElevenlabs && !systemResults.find(s => s.system === 'elevenlabs')) { system = 'elevenlabs'; } else if (hasOpenai && !systemResults.find(s => s.system === 'openai')) { system = 'openai'; } else if (hasGemini && !systemResults.find(s => s.system === 'gemini')) { system = 'gemini'; } } if (system) { systemResults.push({ system, rtfValue }); } } }); // Find the best (lowest RTF) if (systemResults.length > 0) { const best = systemResults.reduce((prev, curr) => curr.rtfValue < prev.rtfValue ? curr : prev ); const row = document.querySelector(`.${best.system}-row`); const rtfEl = document.getElementById(`${best.system}RTF`); if (row) row.classList.add('winner'); if (rtfEl) rtfEl.classList.add('fastest'); } } // Supertonic synthesis function (extracted for parallel execution) async function generateSupertonicSpeech(text, totalStep, durationFactor) { const supertonicStartTime = Date.now(); try { const textList = [text]; const bsz = 1; const sampleRate = cfgs.ae.sample_rate; // Use pre-computed style embeddings const styleTtlTensor = currentStyleTtlTensor; const styleDpTensor = currentStyleDpTensor; // Step 1: Estimate duration const { textIds, textMask, unsupportedChars } = processors.textProcessor.call(textList); // Check for unsupported characters if (unsupportedChars && unsupportedChars.length > 0) { const charList = unsupportedChars.map(c => `"${c}"`).join(', '); throw new Error(`Unsupported characters: ${charList}`); } const textIdsShape = [bsz, textIds[0].length]; const textMaskShape = [bsz, 1, textMask[0][0].length]; const textMaskTensor = arrayToTensor(textMask, textMaskShape); const dpResult = await models.dpOrt.run({ text_ids: intArrayToTensor(textIds, textIdsShape), style_dp: styleDpTensor, text_mask: textMaskTensor }); const durOnnx = Array.from(dpResult.duration.data); // Apply duration factor to adjust speech length (once) const durationAdjustment = currentVoice === 'F' ? 0.1 : 0.08; for (let i = 0; i < durOnnx.length; i++) { durOnnx[i] *= (durationFactor - durationAdjustment); } const durReshaped = []; for (let b = 0; b < bsz; b++) { durReshaped.push([[durOnnx[b]]]); } // Step 2: Encode text const textEncResult = await models.textEncOrt.run({ text_ids: intArrayToTensor(textIds, textIdsShape), style_ttl: styleTtlTensor, text_mask: textMaskTensor }); const textEmbTensor = textEncResult.text_emb; // Step 3: Denoising let { noisyLatent, latentMask } = sampleNoisyLatent(durReshaped, cfgs); const latentShape = [bsz, noisyLatent[0].length, noisyLatent[0][0].length]; const latentMaskShape = [bsz, 1, latentMask[0][0].length]; const latentMaskTensor = arrayToTensor(latentMask, latentMaskShape); // Prepare constant tensors const totalStepArray = new Array(bsz).fill(totalStep); const scalarShape = [bsz]; const totalStepTensor = arrayToTensor(totalStepArray, scalarShape); for (let step = 0; step < totalStep; step++) { const currentStepArray = new Array(bsz).fill(step); const vectorEstResult = await models.vectorEstOrt.run({ noisy_latent: arrayToTensor(noisyLatent, latentShape), text_emb: textEmbTensor, style_ttl: styleTtlTensor, text_mask: textMaskTensor, latent_mask: latentMaskTensor, total_step: totalStepTensor, current_step: arrayToTensor(currentStepArray, scalarShape) }); const denoisedLatent = Array.from(vectorEstResult.denoised_latent.data); // Update latent let idx = 0; for (let b = 0; b < noisyLatent.length; b++) { for (let d = 0; d < noisyLatent[b].length; d++) { for (let t = 0; t < noisyLatent[b][d].length; t++) { noisyLatent[b][d][t] = denoisedLatent[idx++]; } } } } // Step 4: Generate waveform const vocoderResult = await models.vocoderOrt.run({ latent: arrayToTensor(noisyLatent, latentShape) }); const wavBatch = Array.from(vocoderResult.wav_tts.data); const wavLen = Math.floor(sampleRate * durOnnx[0]); const wavOut = wavBatch.slice(0, wavLen); // Create WAV file const wavBuffer = writeWavFile(wavOut, sampleRate); const blob = new Blob([wavBuffer], { type: 'audio/wav' }); const url = URL.createObjectURL(blob); // Calculate times for Supertonic const supertonicEndTime = Date.now(); const supertonicProcessingTime = (supertonicEndTime - supertonicStartTime) / 1000; const audioDurationSec = durOnnx[0]; return { success: true, processingTime: supertonicProcessingTime, audioDuration: audioDurationSec, url: url, text: text }; } catch (error) { return { success: false, error: error.message, text: text }; } } // Format time: 60초 미만 -> 00.00, 60분 미만 -> 00:00.00, 60분 이상 -> 00:00:00.00 function formatTimeDetailed(seconds) { const hours = Math.floor(seconds / 3600); const mins = Math.floor((seconds % 3600) / 60); const secs = seconds % 60; const ms = Math.floor((secs % 1) * 100); const wholeSecs = Math.floor(secs); if (seconds < 60) { return `${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`; } else if (seconds < 3600) { return `${mins.toString().padStart(2, '0')}:${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`; } else { return `${hours.toString().padStart(2, '0')}:${mins.toString().padStart(2, '0')}:${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`; } } // Render result to UI with custom audio player async function renderResult(system, result, isFirst = false) { const container = document.getElementById('demoResults'); const formatTime = (seconds, { trimMobile = false } = {}) => { const mins = Math.floor(seconds / 60); const secs = seconds % 60; const secString = secs.toFixed(2).padStart(5, '0'); let formatted = `${mins}:${secString}`; if (trimMobile) { formatted = trimDecimalsForMobile(formatted); } return formatted; }; const textLength = result.text ? result.text.length : 0; const isBatch = textLength >= MAX_CHUNK_LENGTH; const successfulResult = result && result.success; const firstChunkTimeValue = result.firstChunkTime; const processingTimeStr = successfulResult ? (isBatch && firstChunkTimeValue ? `${formatTimeDetailed(firstChunkTimeValue)} / ${formatTimeDetailed(result.processingTime)}` : formatTimeDetailed(result.processingTime)) : (result.error || 'Error'); const charsPerSec = successfulResult && result.processingTime > 0 ? (textLength / result.processingTime).toFixed(1) : '-'; const rtf = successfulResult && result.audioDuration > 0 ? (result.processingTime / result.audioDuration).toFixed(3) : '-'; const progressValue = successfulResult && textLength > 0 ? 100 : 0; const titleMain = system === 'supertonic' ? 'Supertonic' : (system === 'openai' ? 'OpenAI TTS-1' : (system === 'gemini' ? 'Gemini 2.5 Flash TTS' : 'ElevenLabs Flash v2.5')); const titleSub = system === 'supertonic' ? 'On-Device' : 'Cloud API'; const titleColor = system === 'supertonic' ? 'var(--supertone_blue)' : system === 'elevenlabs' ? 'var(--brand-elevenlabs)' : system === 'openai' ? 'var(--brand-openai)' : system === 'gemini' ? 'var(--brand-gemini)' : '#999'; const titleStatus = isComparisonMode ? `⏳ Running...` : ''; const hasAudio = successfulResult && result.url; const totalDurationDisplay = successfulResult && typeof result.audioDuration === 'number' ? formatTime(result.audioDuration, { trimMobile: true }) : '--'; const downloadActionsHTML = hasAudio ? `
` : ''; const infoMarkupSuccess = `
${formatStatValueWithSuffix(processingTimeStr, 's', { firstLabel: true })}
Processing Time
${charsPerSec}
Chars/sec
${formatStatValueWithSuffix(rtf, 'x')}
RTF
`; const infoMarkupError = `
${result.error || 'Failed'}
`; const resultItemEl = document.getElementById(`${system}-result`); const infoContainer = resultItemEl ? resultItemEl.querySelector('.demo-result-info') : null; const playerContainer = resultItemEl ? resultItemEl.querySelector('.custom-audio-player') : null; if (resultItemEl && infoContainer && playerContainer) { resultItemEl.classList.add(`${system}-result-item`); resultItemEl.classList.remove('generating'); resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); resultItemEl.style.setProperty('--provider-color', titleColor); const titleMainEl = resultItemEl.querySelector('.title-main'); if (titleMainEl) { titleMainEl.textContent = titleMain; titleMainEl.style.color = titleColor; } const titleSubEl = resultItemEl.querySelector('.title-sub'); if (titleSubEl) { titleSubEl.textContent = titleSub; } if (!resultItemEl.querySelector('.title-status') && titleStatus) { const titleEl = resultItemEl.querySelector('.demo-result-title'); if (titleEl) { titleEl.insertAdjacentHTML('beforeend', titleStatus); } } infoContainer.classList.toggle('error', !successfulResult); infoContainer.innerHTML = successfulResult ? infoMarkupSuccess : infoMarkupError; if (successfulResult) { playerContainer.style.display = ''; playerContainer.innerHTML = `
0:00.00
${totalDurationDisplay}
${downloadActionsHTML} `; } else { playerContainer.style.display = 'none'; playerContainer.innerHTML = ''; } container.style.display = 'flex'; if (successfulResult && hasAudio) { await setupCustomPlayer(system, result); } else if (successfulResult && !hasAudio) { const playBtnEl = document.getElementById(`${system}-play-pause-btn`); if (playBtnEl) playBtnEl.disabled = true; } return; } const infoSection = successfulResult ? `
${infoMarkupSuccess}
` : `
${infoMarkupError}
`; const resultHTML = `
${titleMain} ${titleSub} ${titleStatus}
${infoSection}
0:00.00
${totalDurationDisplay}
${downloadActionsHTML}
`; container.insertAdjacentHTML('beforeend', resultHTML); container.style.display = 'flex'; if (hasAudio) { await setupCustomPlayer(system, result); } else { const playBtnEl = document.getElementById(`${system}-play-pause-btn`); if (playBtnEl) playBtnEl.disabled = true; } } // Setup custom audio player for a given system async function setupCustomPlayer(system, result) { const playPauseBtn = document.getElementById(`${system}-play-pause-btn`); const progressContainer = document.getElementById(`${system}-progress-container`); const currentTimeDisplay = document.getElementById(`${system}-current-time`); const durationDisplay = document.getElementById(`${system}-total-duration`); const progressFill = document.getElementById(`${system}-progress-fill`); if (!playPauseBtn || !progressContainer || !currentTimeDisplay || !durationDisplay || !progressFill) { console.error('Failed to find player elements for', system); return; } // Create dedicated audio context for this player const playerAudioContext = new (window.AudioContext || window.webkitAudioContext)(); let audioBuffer = null; let source = null; let startTime = 0; let pauseTime = 0; let isPlaying = false; let isPaused = false; let animationFrameId = null; let playerRecord = null; const formatTime = (seconds) => { const mins = Math.floor(seconds / 60); const secs = seconds % 60; const secString = secs.toFixed(2).padStart(5, '0'); return `${mins}:${secString}`; }; // Fetch and decode audio try { const response = await fetch(result.url); const arrayBuffer = await response.arrayBuffer(); audioBuffer = await playerAudioContext.decodeAudioData(arrayBuffer); } catch (error) { console.error('Failed to load audio for', system, error); playPauseBtn.disabled = true; return; } const updateProgress = () => { if (!isPlaying || !playerAudioContext) return; const currentTime = isPaused ? pauseTime : (playerAudioContext.currentTime - startTime); const duration = audioBuffer.duration; const progress = duration > 0 ? (currentTime / duration) * 100 : 0; progressFill.style.width = `${Math.min(progress, 100)}%`; currentTimeDisplay.textContent = formatTime(Math.min(currentTime, duration), { trimMobile: true }); if (currentTime < duration) { animationFrameId = requestAnimationFrame(updateProgress); } else { // Playback finished isPlaying = false; isPaused = false; playPauseBtn.innerHTML = PLAY_ICON_SVG; progressFill.style.width = '100%'; currentTimeDisplay.textContent = formatTime(duration, { trimMobile: true }); } }; const togglePlayPause = () => { if (!audioBuffer) return; if (isPaused) { // Resume from paused position pauseAllPlayersExcept(playerRecord); if (playerAudioContext.state === 'suspended') { playerAudioContext.resume(); } source = playerAudioContext.createBufferSource(); source.buffer = audioBuffer; source.connect(playerAudioContext.destination); source.start(0, pauseTime); startTime = playerAudioContext.currentTime - pauseTime; isPaused = false; isPlaying = true; playPauseBtn.innerHTML = PAUSE_ICON_SVG; updateProgress(); } else if (isPlaying) { // Pause playback pauseTime = playerAudioContext.currentTime - startTime; if (source) { source.stop(); source = null; } playerAudioContext.suspend(); isPaused = true; isPlaying = false; playPauseBtn.innerHTML = PLAY_ICON_SVG; if (animationFrameId) { cancelAnimationFrame(animationFrameId); } } else { // Start from beginning pauseAllPlayersExcept(playerRecord); pauseTime = 0; if (playerAudioContext.state === 'suspended') { playerAudioContext.resume(); } source = playerAudioContext.createBufferSource(); source.buffer = audioBuffer; source.connect(playerAudioContext.destination); source.start(0); startTime = playerAudioContext.currentTime; isPlaying = true; isPaused = false; playPauseBtn.innerHTML = PAUSE_ICON_SVG; updateProgress(); } }; const seekTo = (percentage) => { if (!audioBuffer) return; const seekTime = (percentage / 100) * audioBuffer.duration; const wasPlaying = isPlaying && !isPaused; // Stop current playback if (source) { try { source.stop(); } catch (e) { // Already stopped } source = null; } if (animationFrameId) { cancelAnimationFrame(animationFrameId); } pauseTime = seekTime; // Update UI const progress = (seekTime / audioBuffer.duration) * 100; progressFill.style.width = `${Math.min(progress, 100)}%`; currentTimeDisplay.textContent = formatTime(seekTime, { trimMobile: true }); if (wasPlaying) { // Resume from new position if (playerAudioContext.state === 'suspended') { playerAudioContext.resume(); } source = playerAudioContext.createBufferSource(); source.buffer = audioBuffer; source.connect(playerAudioContext.destination); source.start(0, seekTime); startTime = playerAudioContext.currentTime - seekTime; isPlaying = true; isPaused = false; playPauseBtn.innerHTML = PAUSE_ICON_SVG; updateProgress(); } else { // Just update position, stay paused isPaused = true; isPlaying = true; playPauseBtn.innerHTML = PLAY_ICON_SVG; } }; // Cleanup function for this player const pausePlayback = () => { if (!playerAudioContext || playerAudioContext.state === 'closed') return; if (isPlaying) { pauseTime = playerAudioContext.currentTime - startTime; if (source) { try { source.stop(); } catch (e) { // Already stopped } source = null; } playerAudioContext.suspend().catch(() => {}); isPaused = true; isPlaying = false; playPauseBtn.innerHTML = PLAY_ICON_SVG; if (animationFrameId) { cancelAnimationFrame(animationFrameId); animationFrameId = null; } } }; const cleanup = () => { pausePlayback(); if (playerAudioContext && playerAudioContext.state !== 'closed') { playerAudioContext.close(); } if (playerRecord) { customAudioPlayers = customAudioPlayers.filter(p => p !== playerRecord); } }; playerRecord = { audioContext: playerAudioContext, cleanup, pausePlayback }; customAudioPlayers.push(playerRecord); // Setup event listeners playPauseBtn.addEventListener('click', togglePlayPause); progressContainer.addEventListener('click', (e) => { const rect = progressContainer.getBoundingClientRect(); const percentage = ((e.clientX - rect.left) / rect.width) * 100; seekTo(percentage); }); } // Generate Supertonic speech with chunking support and progressive playback async function generateSupertonicSpeechChunked(text, totalStep, durationFactor, onFirstChunkReady, onChunkAdded) { const supertonicStartTime = Date.now(); const sampleRate = cfgs.ae.sample_rate; const silenceDuration = 0.3; // 0.3 seconds of silence between chunks try { // Split text into chunks const chunks = chunkText(text); const audioDataArrays = []; const durations = []; const silenceSamples = Math.floor(silenceDuration * sampleRate); let firstChunkEndTime = 0; let firstChunkTime = 0; // Generate speech for each chunk for (let i = 0; i < chunks.length; i++) { const chunkText = chunks[i]; const result = await generateSupertonicSpeech(chunkText, totalStep, durationFactor); if (!result.success) { throw new Error(`Failed to generate chunk ${i + 1}: ${result.error}`); } // Fetch and parse the WAV file using the existing parseWavFile function const response = await fetch(result.url); const arrayBuffer = await response.arrayBuffer(); const { audioData } = parseWavFile(arrayBuffer); audioDataArrays.push(audioData); durations.push(result.audioDuration); // Clean up the blob URL URL.revokeObjectURL(result.url); // Progressive playback: send each chunk individually for Web Audio API if (i === 0 && onFirstChunkReady) { // First chunk ready - send it immediately firstChunkEndTime = Date.now(); firstChunkTime = (firstChunkEndTime - supertonicStartTime) / 1000; const initialWav = writeWavFile(audioData, sampleRate); const initialBlob = new Blob([initialWav], { type: 'audio/wav' }); const initialUrl = URL.createObjectURL(initialBlob); const totalDurationSoFar = result.audioDuration; const processedChars = chunks[0].length; onFirstChunkReady(initialUrl, totalDurationSoFar, text, chunks.length, firstChunkTime, processedChars); } else if (i > 0 && onChunkAdded) { // Subsequent chunks - send just the new chunk const chunkWav = writeWavFile(audioData, sampleRate); const chunkBlob = new Blob([chunkWav], { type: 'audio/wav' }); const chunkUrl = URL.createObjectURL(chunkBlob); const totalDurationSoFar = durations.slice(0, i + 1).reduce((sum, dur) => sum + dur, 0) + silenceDuration * i; const currentProcessingTime = (Date.now() - supertonicStartTime) / 1000; const processedChars = chunks.slice(0, i + 1).reduce((sum, chunk) => sum + chunk.length, 0); onChunkAdded(chunkUrl, totalDurationSoFar, i + 1, chunks.length, currentProcessingTime, processedChars); } } // Concatenate all audio chunks with silence for final result const totalDuration = durations.reduce((sum, dur) => sum + dur, 0) + silenceDuration * (chunks.length - 1); // Calculate total samples needed let totalSamples = 0; for (let i = 0; i < audioDataArrays.length; i++) { totalSamples += audioDataArrays[i].length; if (i < audioDataArrays.length - 1) { totalSamples += silenceSamples; } } const wavCat = new Float32Array(totalSamples); let currentIdx = 0; for (let i = 0; i < audioDataArrays.length; i++) { // Copy audio data const audioData = audioDataArrays[i]; wavCat.set(audioData, currentIdx); currentIdx += audioData.length; // Add silence if not the last chunk if (i < audioDataArrays.length - 1) { // Silence is already zeros in Float32Array, just skip the indices currentIdx += silenceSamples; } } // Create final WAV file const wavBuffer = writeWavFile(wavCat, sampleRate); const blob = new Blob([wavBuffer], { type: 'audio/wav' }); const url = URL.createObjectURL(blob); const supertonicEndTime = Date.now(); const supertonicProcessingTime = (supertonicEndTime - supertonicStartTime) / 1000; return { success: true, processingTime: supertonicProcessingTime, audioDuration: totalDuration, url: url, text: text, firstChunkTime: firstChunkTime }; } catch (error) { return { success: false, error: error.message, text: text }; } } // Main synthesis function async function generateSpeech() { const text = demoTextInput.value.trim(); // Validate text input const validation = validateTextInput(text); if (!validation.valid) { showDemoError(validation.message); return; } if (!models || !cfgs || !processors) { showDemoError('Models are still loading. Please wait.'); return; } if (!currentStyleTtlTensor || !currentStyleDpTensor) { showDemoError('Reference embeddings are not ready. Please wait.'); return; } // Validate characters before generation const charValidation = validateCharacters(text); if (!charValidation.valid && charValidation.unsupportedChars.length > 0) { const charList = charValidation.unsupportedChars.map(c => `"${c}"`).join(', '); showDemoError(`Cannot generate speech: Unsupported characters found: ${charList}`); return; } const elevenlabsApiKey = demoElevenLabsApiKey.value.trim(); const openaiApiKey = demoSecondaryApiKey.value.trim(); const geminiApiKey = demoTertiaryApiKey.value.trim(); const hasComparison = !!elevenlabsApiKey || !!openaiApiKey || !!geminiApiKey; isComparisonMode = hasComparison; document.body.classList.toggle('comparison-mode', hasComparison); currentGenerationTextLength = text.length; // Show billing confirmation if API keys are provided if (hasComparison) { const apiProviders = []; if (elevenlabsApiKey) apiProviders.push('ElevenLabs Flash v2.5'); if (openaiApiKey) apiProviders.push('OpenAI TTS-1'); if (geminiApiKey) apiProviders.push('Gemini 2.5 Flash TTS'); const userConfirmed = await showBillingConfirmation(text.length, apiProviders); if (!userConfirmed) { return; } } if (!hasComparison && demoComparisonSection) { demoComparisonSection.style.display = 'none'; } try { isGenerating = true; demoGenerateBtn.disabled = true; // Disable voice toggle during generation const voiceToggleTexts = document.querySelectorAll('.voice-toggle-text'); voiceToggleTexts.forEach(text => text.classList.add('disabled')); hideDemoError(); hideDemoStatus(); // Hide the status box when starting generation // Clean up previous audio playback if (audioContext) { // Stop all scheduled sources scheduledSources.forEach(source => { try { source.stop(); } catch (e) { // Already stopped } }); scheduledSources = []; // Close audio context if (audioContext.state !== 'closed') { audioContext.close(); } audioContext = null; } // Cancel animation frame if (animationFrameId) { cancelAnimationFrame(animationFrameId); animationFrameId = null; } // Clean up all custom audio players (ElevenLabs, etc.) customAudioPlayers.forEach(player => { if (player.cleanup) { player.cleanup(); } }); customAudioPlayers = []; // Reset state audioChunks = []; totalDuration = 0; startTime = 0; pauseTime = 0; isPaused = false; isPlaying = false; firstChunkGenerationTime = 0; // Processing time for first chunk totalChunks = 0; nextScheduledTime = 0; // Next time to schedule audio chunk // Show result shell(s) immediately const createInitialResultItem = (system, titleMain, titleSub, titleColor, includeStatus) => { const titleStatus = includeStatus ? `⏳ Running...` : ''; return `
${titleMain} ${titleSub} ${titleStatus}
--
Processing Time
--
Chars/sec
--
RTF
Generating speech...
`; }; const supertonicInitial = createInitialResultItem( 'supertonic', 'Supertonic', 'On-Device', 'var(--supertone_blue)', isComparisonMode ); const initialItems = [supertonicInitial]; if (elevenlabsApiKey) { const elevenInitial = createInitialResultItem( 'elevenlabs', 'ElevenLabs Flash v2.5', 'Cloud API', '#999', true ); initialItems.push(elevenInitial); } if (openaiApiKey) { const openaiInitial = createInitialResultItem( 'openai', 'OpenAI TTS-1', 'Cloud API', '#999', true ); initialItems.push(openaiInitial); } if (geminiApiKey) { const geminiInitial = createInitialResultItem( 'gemini', 'Gemini 2.5 Flash TTS', 'Cloud API', '#999', true ); initialItems.push(geminiInitial); } demoResults.style.display = 'flex'; demoResults.innerHTML = initialItems.join(''); // Reset comparison table if (hasComparison) { demoComparisonSection.style.display = 'block'; document.getElementById('supertonicStatus').textContent = '⏳ Running...'; document.getElementById('supertonicStatus').className = 'demo-comparison-cell status-running'; document.getElementById('supertonicTime').textContent = '-'; document.getElementById('supertonicDuration').textContent = '-'; document.getElementById('supertonicRTF').textContent = '-'; if (elevenlabsApiKey) { document.getElementById('elevenlabsStatus').textContent = '⏳ Running...'; document.getElementById('elevenlabsStatus').className = 'demo-comparison-cell status-running'; document.getElementById('elevenlabsTime').textContent = '-'; document.getElementById('elevenlabsDuration').textContent = '-'; document.getElementById('elevenlabsRTF').textContent = '-'; } if (openaiApiKey) { document.getElementById('openaiStatus').textContent = '⏳ Running...'; document.getElementById('openaiStatus').className = 'demo-comparison-cell status-running'; document.getElementById('openaiTime').textContent = '-'; document.getElementById('openaiDuration').textContent = '-'; document.getElementById('openaiRTF').textContent = '-'; } if (geminiApiKey) { document.getElementById('geminiStatus').textContent = '⏳ Running...'; document.getElementById('geminiStatus').className = 'demo-comparison-cell status-running'; document.getElementById('geminiTime').textContent = '-'; document.getElementById('geminiDuration').textContent = '-'; document.getElementById('geminiRTF').textContent = '-'; } // Remove winner classes document.querySelector('.supertonic-row').classList.remove('winner'); const elevenlabsRow = document.querySelector('.elevenlabs-row'); const openaiRow = document.querySelector('.openai-row'); const geminiRow = document.querySelector('.gemini-row'); if (elevenlabsRow) elevenlabsRow.classList.remove('winner'); if (openaiRow) openaiRow.classList.remove('winner'); if (geminiRow) geminiRow.classList.remove('winner'); } const totalStep = parseInt(demoTotalSteps.value); const durationFactor = parseFloat(demoDurationFactor.value); // Track which one finishes first let firstFinished = false; let supertonicResult = null; let elevenlabsResult = null; let openaiResult = null; let geminiResult = null; let latestSupertonicProcessedChars = 0; // Helper functions for custom player const formatTime = (seconds, { trimMobile = false } = {}) => { const mins = Math.floor(seconds / 60); const secs = seconds % 60; const secString = secs.toFixed(2).padStart(5, '0'); let formatted = `${mins}:${secString}`; if (trimMobile) { formatted = trimDecimalsForMobile(formatted); } return formatted; }; const updateProgress = () => { if (!isPlaying || !audioContext) return; const currentTime = isPaused ? pauseTime : (audioContext.currentTime - startTime); const progress = totalDuration > 0 ? (currentTime / totalDuration) * 100 : 0; if (progressFill) { progressFill.style.width = `${Math.min(progress, 100)}%`; } if (currentTimeDisplay) { currentTimeDisplay.textContent = formatTime(Math.min(currentTime, totalDuration), { trimMobile: true }); } if (currentTime < totalDuration) { animationFrameId = requestAnimationFrame(updateProgress); } else { // Playback finished isPlaying = false; isPaused = false; if (playPauseBtn) { playPauseBtn.innerHTML = PLAY_ICON_SVG; } } }; const togglePlayPause = () => { if (!audioContext || audioChunks.length === 0) return; if (isPaused) { // Resume from paused position pauseAllPlayersExcept(supertonicPlayerRecord); const seekTime = pauseTime; // Find which chunk we should start from let accumulatedTime = 0; let startChunkIndex = 0; let offsetInChunk = seekTime; for (let i = 0; i < audioChunks.length; i++) { const chunkDuration = audioChunks[i].buffer.duration; if (accumulatedTime + chunkDuration > seekTime) { startChunkIndex = i; offsetInChunk = seekTime - accumulatedTime; break; } accumulatedTime += chunkDuration + 0.3; } // Stop any existing sources scheduledSources.forEach(source => { try { source.stop(); } catch (e) { // Already stopped } }); scheduledSources = []; // Resume AudioContext if suspended if (audioContext.state === 'suspended') { audioContext.resume(); } // Reschedule from the pause point startTime = audioContext.currentTime - seekTime; let nextStartTime = audioContext.currentTime; for (let i = startChunkIndex; i < audioChunks.length; i++) { const source = audioContext.createBufferSource(); source.buffer = audioChunks[i].buffer; source.connect(audioContext.destination); if (i === startChunkIndex) { source.start(nextStartTime, offsetInChunk); nextStartTime += (audioChunks[i].buffer.duration - offsetInChunk); } else { source.start(nextStartTime); nextStartTime += audioChunks[i].buffer.duration; } if (i < audioChunks.length - 1) { nextStartTime += 0.3; } scheduledSources.push(source); } nextScheduledTime = nextStartTime; isPaused = false; isPlaying = true; playPauseBtn.innerHTML = PAUSE_ICON_SVG; updateProgress(); } else if (isPlaying) { // Pause playback pauseTime = audioContext.currentTime - startTime; audioContext.suspend(); isPaused = true; playPauseBtn.innerHTML = PLAY_ICON_SVG; if (animationFrameId) { cancelAnimationFrame(animationFrameId); } } else { // Was finished, restart from beginning pauseAllPlayersExcept(supertonicPlayerRecord); pauseTime = 0; // Resume AudioContext if suspended if (audioContext.state === 'suspended') { audioContext.resume(); } // Stop any existing sources scheduledSources.forEach(source => { try { source.stop(); } catch (e) { // Already stopped } }); scheduledSources = []; // Restart from beginning startTime = audioContext.currentTime; let nextStartTime = audioContext.currentTime; for (let i = 0; i < audioChunks.length; i++) { const source = audioContext.createBufferSource(); source.buffer = audioChunks[i].buffer; source.connect(audioContext.destination); source.start(nextStartTime); nextStartTime += audioChunks[i].buffer.duration; if (i < audioChunks.length - 1) { nextStartTime += 0.3; } scheduledSources.push(source); } nextScheduledTime = nextStartTime; isPlaying = true; isPaused = false; playPauseBtn.innerHTML = PAUSE_ICON_SVG; updateProgress(); } }; const seekTo = (percentage) => { if (!audioContext || audioChunks.length === 0) return; const seekTime = (percentage / 100) * totalDuration; // Remember current playing state const wasPlaying = isPlaying; const wasPaused = isPaused; // Stop all current sources scheduledSources.forEach(source => { try { source.stop(); } catch (e) { // Already stopped } }); scheduledSources = []; // Cancel animation if (animationFrameId) { cancelAnimationFrame(animationFrameId); } // Find which chunk we should start from let accumulatedTime = 0; let startChunkIndex = 0; let offsetInChunk = seekTime; for (let i = 0; i < audioChunks.length; i++) { const chunkDuration = audioChunks[i].buffer.duration; if (accumulatedTime + chunkDuration > seekTime) { startChunkIndex = i; offsetInChunk = seekTime - accumulatedTime; break; } accumulatedTime += chunkDuration + 0.3; // Include silence } // If paused or finished, just update the pause position if (wasPaused || !wasPlaying) { pauseTime = seekTime; // Update UI if (progressFill) { const progress = (seekTime / totalDuration) * 100; progressFill.style.width = `${Math.min(progress, 100)}%`; } if (currentTimeDisplay) { currentTimeDisplay.textContent = formatTime(seekTime, { trimMobile: true }); } // Set to paused state so play button will resume from seek position isPaused = true; isPlaying = true; // Valid state for playback if (playPauseBtn) { playPauseBtn.innerHTML = PLAY_ICON_SVG; } return; } // Resume AudioContext if it was suspended if (audioContext.state === 'suspended') { audioContext.resume(); } // Reschedule from the seek point startTime = audioContext.currentTime - seekTime; let nextStartTime = audioContext.currentTime; for (let i = startChunkIndex; i < audioChunks.length; i++) { const source = audioContext.createBufferSource(); source.buffer = audioChunks[i].buffer; source.connect(audioContext.destination); if (i === startChunkIndex) { // Start from offset source.start(nextStartTime, offsetInChunk); nextStartTime += (audioChunks[i].buffer.duration - offsetInChunk); } else { source.start(nextStartTime); nextStartTime += audioChunks[i].buffer.duration; } // Add silence between chunks if (i < audioChunks.length - 1) { nextStartTime += 0.3; } scheduledSources.push(source); } // Update nextScheduledTime for any future chunks nextScheduledTime = nextStartTime; // Resume playing state isPlaying = true; isPaused = false; if (playPauseBtn) { playPauseBtn.innerHTML = PAUSE_ICON_SVG; } // Restart progress animation updateProgress(); }; // Callback for first chunk ready - create custom player and start playback const onFirstChunkReady = async (url, duration, text, numChunks, firstChunkTime, processedChars) => { totalChunks = numChunks; firstChunkGenerationTime = firstChunkTime; const container = document.getElementById('demoResults'); if (!firstFinished) { firstFinished = true; } const textLength = currentGenerationTextLength > 0 ? currentGenerationTextLength : (text ? text.length : 0); const isBatch = textLength >= MAX_CHUNK_LENGTH; const processingTimeStr = isBatch && firstChunkTime ? `${formatTimeDetailed(firstChunkTime)} / ${formatTimeDetailed(firstChunkTime)}` : formatTimeDetailed(firstChunkTime); const safeInitialChars = typeof processedChars === 'number' ? processedChars : 0; const displayedInitialChars = textLength > 0 ? Math.min(safeInitialChars, textLength) : safeInitialChars; const charsPerSec = firstChunkTime > 0 && displayedInitialChars > 0 ? (displayedInitialChars / firstChunkTime).toFixed(1) : '0.0'; const rtf = duration > 0 && firstChunkTime > 0 ? (firstChunkTime / duration).toFixed(3) : '-'; const progressValue = textLength > 0 ? Math.min(100, (displayedInitialChars / textLength) * 100) : 0; const resultItemEl = document.getElementById('supertonic-result'); if (!resultItemEl) { console.warn('Supertonic result container not found.'); return; } resultItemEl.classList.remove('generating'); resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); const titleMainEl = resultItemEl.querySelector('.title-main'); if (titleMainEl) { titleMainEl.textContent = 'Supertonic'; titleMainEl.style.color = 'var(--supertone_blue)'; } const titleSubEl = resultItemEl.querySelector('.title-sub'); if (titleSubEl) { titleSubEl.textContent = 'On-Device'; } const infoContainer = resultItemEl.querySelector('.demo-result-info'); if (infoContainer) { infoContainer.classList.remove('error'); } const timeElInitial = document.getElementById('supertonic-time'); if (timeElInitial) { timeElInitial.innerHTML = formatStatValueWithSuffix(processingTimeStr, 's', { firstLabel: true }); } const cpsElInitial = document.getElementById('supertonic-cps'); if (cpsElInitial) { cpsElInitial.textContent = charsPerSec; } const rtfElInitial = document.getElementById('supertonic-rtf'); if (rtfElInitial) { rtfElInitial.innerHTML = formatStatValueWithSuffix(rtf, 'x'); } const playerContainer = resultItemEl.querySelector('.custom-audio-player'); if (playerContainer) { playerContainer.style.display = ''; playerContainer.innerHTML = `
0:00.00
${formatTime(duration, { trimMobile: true })}
`; } container.style.display = 'flex'; latestSupertonicProcessedChars = displayedInitialChars; // Get UI elements playPauseBtn = document.getElementById('play-pause-btn'); progressBar = document.getElementById('progress-container'); currentTimeDisplay = document.getElementById('current-time'); durationDisplay = document.getElementById('total-duration'); progressFill = document.getElementById('progress-fill'); // Initialize Web Audio API audioContext = new (window.AudioContext || window.webkitAudioContext)(); startTime = audioContext.currentTime; totalDuration = duration; isPlaying = true; isPaused = false; // Create Supertonic player record and register it const pausePlayback = () => { if (!audioContext || audioContext.state === 'closed') return; if (isPlaying) { pauseTime = audioContext.currentTime - startTime; scheduledSources.forEach(source => { try { source.stop(); } catch (e) { // Already stopped } }); scheduledSources = []; audioContext.suspend(); isPaused = true; isPlaying = false; if (playPauseBtn) { playPauseBtn.innerHTML = PLAY_ICON_SVG; } if (animationFrameId) { cancelAnimationFrame(animationFrameId); } } }; supertonicPlayerRecord = { audioContext: audioContext, pausePlayback: pausePlayback }; // Remove old Supertonic player if exists and add new one customAudioPlayers = customAudioPlayers.filter(p => p !== supertonicPlayerRecord && p.audioContext !== audioContext); customAudioPlayers.push(supertonicPlayerRecord); // Pause all other players before starting Supertonic pauseAllPlayersExcept(supertonicPlayerRecord); // Fetch and decode first chunk const response = await fetch(url); const arrayBuffer = await response.arrayBuffer(); const audioBuffer = await audioContext.decodeAudioData(arrayBuffer); audioChunks.push({ buffer: audioBuffer, duration: audioBuffer.duration }); // Play first chunk immediately const source = audioContext.createBufferSource(); source.buffer = audioBuffer; source.connect(audioContext.destination); source.start(audioContext.currentTime); scheduledSources.push(source); // Set next scheduled time for additional chunks nextScheduledTime = audioContext.currentTime + audioBuffer.duration + 0.3; // Add silence gap // Setup player controls playPauseBtn.addEventListener('click', togglePlayPause); progressBar.addEventListener('click', (e) => { const rect = progressBar.getBoundingClientRect(); const percentage = ((e.clientX - rect.left) / rect.width) * 100; seekTo(percentage); }); // Start progress animation updateProgress(); // Clean up URL URL.revokeObjectURL(url); }; // Callback for each additional chunk - schedule seamlessly const onChunkAdded = async (url, duration, chunkIndex, totalChunks, currentProcessingTime, processedChars) => { if (!audioContext) return; // Fetch and decode the new chunk const response = await fetch(url); const arrayBuffer = await response.arrayBuffer(); const audioBuffer = await audioContext.decodeAudioData(arrayBuffer); const chunkDuration = audioBuffer.duration; audioChunks.push({ buffer: audioBuffer, duration: chunkDuration }); // Schedule the new chunk at the pre-calculated time const source = audioContext.createBufferSource(); source.buffer = audioBuffer; source.connect(audioContext.destination); source.start(nextScheduledTime); scheduledSources.push(source); // Update next scheduled time for the next chunk nextScheduledTime = nextScheduledTime + audioBuffer.duration + 0.3; // Add silence gap // Update total duration totalDuration = duration; // Update duration display with smooth animation if (durationDisplay) { durationDisplay.textContent = formatTime(duration, { trimMobile: true }); durationDisplay.style.transition = 'color 0.3s'; durationDisplay.style.color = 'var(--supertone_blue)'; setTimeout(() => { durationDisplay.style.color = ''; }, 300); } // Update info display const textLengthCandidate = currentGenerationTextLength > 0 ? currentGenerationTextLength : demoTextInput.value.trim().length; const textLength = textLengthCandidate; const isBatch = textLength >= MAX_CHUNK_LENGTH; const timeEl = document.getElementById('supertonic-time'); const durationEl = document.getElementById('supertonic-duration'); const cpsEl = document.getElementById('supertonic-cps'); const rtfEl = document.getElementById('supertonic-rtf'); const effectiveProcessedChars = typeof processedChars === 'number' ? processedChars : latestSupertonicProcessedChars; if (effectiveProcessedChars < latestSupertonicProcessedChars) { URL.revokeObjectURL(url); return; } const clampedProcessedChars = textLength > 0 ? Math.min(effectiveProcessedChars, textLength) : effectiveProcessedChars; const progressValue = textLength > 0 ? Math.min(100, (clampedProcessedChars / textLength) * 100) : 0; if (durationEl) { durationEl.textContent = formatTimeDetailed(duration); } if (timeEl && isBatch && firstChunkGenerationTime > 0 && currentProcessingTime) { const timeDisplay = `${formatTimeDetailed(firstChunkGenerationTime)} / ${formatTimeDetailed(currentProcessingTime)}`; timeEl.innerHTML = formatStatValueWithSuffix(timeDisplay, 's', { firstLabel: true }); } if (cpsEl && currentProcessingTime > 0 && clampedProcessedChars >= 0) { const charsPerSec = (clampedProcessedChars / currentProcessingTime).toFixed(1); cpsEl.textContent = charsPerSec; } if (rtfEl && duration > 0 && currentProcessingTime > 0) { const rtf = (currentProcessingTime / duration).toFixed(3); rtfEl.innerHTML = formatStatValueWithSuffix(rtf, 'x'); } const resultItemEl = document.getElementById('supertonic-result'); if (resultItemEl) { resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); } latestSupertonicProcessedChars = clampedProcessedChars; // Clean up URL URL.revokeObjectURL(url); }; // Start all syntheses simultaneously const supertonicPromise = generateSupertonicSpeechChunked( text, totalStep, durationFactor, onFirstChunkReady, onChunkAdded ); const elevenlabsPromise = elevenlabsApiKey ? generateSpeechElevenLabs(text, elevenlabsApiKey) : null; const openaiPromise = openaiApiKey ? generateSpeechOpenAI(text, openaiApiKey) : null; const geminiPromise = geminiApiKey ? generateSpeechGemini(text, geminiApiKey) : null; // Handle results as they arrive supertonicPromise.then(result => { supertonicResult = result; if (result.success) { const textLength = result.text ? result.text.length : 0; const isBatch = textLength >= MAX_CHUNK_LENGTH; const processingTimeStr = isBatch && firstChunkGenerationTime > 0 ? `${formatTimeDetailed(firstChunkGenerationTime)} / ${formatTimeDetailed(result.processingTime)}` : formatTimeDetailed(result.processingTime); const charsPerSec = result.processingTime > 0 ? (textLength / result.processingTime).toFixed(1) : '0.0'; const progressValue = textLength > 0 ? 100 : 0; const progressDisplay = progressValue.toFixed(1); const timeEl = document.getElementById('supertonic-time'); const durationEl = document.getElementById('supertonic-duration'); const cpsEl = document.getElementById('supertonic-cps'); const rtfEl = document.getElementById('supertonic-rtf'); if (timeEl) timeEl.innerHTML = formatStatValueWithSuffix(processingTimeStr, 's', { firstLabel: true }); if (durationEl) durationEl.textContent = formatTimeDetailed(result.audioDuration); latestSupertonicProcessedChars = textLength; if (cpsEl) cpsEl.textContent = charsPerSec; if (rtfEl) { const rtf = result.audioDuration > 0 ? (result.processingTime / result.audioDuration).toFixed(3) : '-'; rtfEl.innerHTML = formatStatValueWithSuffix(rtf, 'x'); } const resultItemEl = document.getElementById('supertonic-result'); if (resultItemEl) { resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); } latestSupertonicProcessedChars = textLength; // Final duration update (if custom player was used) if (audioContext && audioChunks.length > 0) { totalDuration = result.audioDuration; if (durationDisplay) { durationDisplay.textContent = formatTime(result.audioDuration, { trimMobile: true }); } } // Always show download button const downloadBtn = document.getElementById('supertonic-download'); if (downloadBtn) { downloadBtn.parentElement.style.display = 'block'; downloadBtn.onclick = () => downloadDemoAudio(result.url, 'supertonic_speech.wav'); } } // Update comparison table immediately if (hasComparison) { updateComparisonRow('supertonic', result); // Highlight winner if all are done const allResults = [supertonicResult, elevenlabsResult, openaiResult, geminiResult].filter(r => r !== null); const allFinished = (!elevenlabsApiKey || elevenlabsResult) && (!openaiApiKey || openaiResult) && (!geminiApiKey || geminiResult); if (allFinished && allResults.length > 1) { highlightWinner(allResults); } } }); if (elevenlabsPromise) { elevenlabsPromise.then(result => { elevenlabsResult = result; renderResult('elevenlabs', result, !firstFinished); if (!firstFinished) firstFinished = true; // Update comparison table immediately updateComparisonRow('elevenlabs', result); // Highlight winner if all are done const allResults = [supertonicResult, elevenlabsResult, openaiResult, geminiResult].filter(r => r !== null); const allFinished = (!elevenlabsApiKey || elevenlabsResult) && (!openaiApiKey || openaiResult) && (!geminiApiKey || geminiResult); if (allFinished && allResults.length > 1) { highlightWinner(allResults); } }); } if (openaiPromise) { openaiPromise.then(result => { openaiResult = result; renderResult('openai', result, !firstFinished); if (!firstFinished) firstFinished = true; // Update comparison table immediately updateComparisonRow('openai', result); // Highlight winner if all are done const allResults = [supertonicResult, elevenlabsResult, openaiResult, geminiResult].filter(r => r !== null); const allFinished = (!elevenlabsApiKey || elevenlabsResult) && (!openaiApiKey || openaiResult) && (!geminiApiKey || geminiResult); if (allFinished && allResults.length > 1) { highlightWinner(allResults); } }); } if (geminiPromise) { geminiPromise.then(result => { geminiResult = result; renderResult('gemini', result, !firstFinished); if (!firstFinished) firstFinished = true; // Update comparison table immediately updateComparisonRow('gemini', result); // Highlight winner if all are done const allResults = [supertonicResult, elevenlabsResult, openaiResult, geminiResult].filter(r => r !== null); const allFinished = (!elevenlabsApiKey || elevenlabsResult) && (!openaiApiKey || openaiResult) && (!geminiApiKey || geminiResult); if (allFinished && allResults.length > 1) { highlightWinner(allResults); } }); } // Wait for all to complete await Promise.allSettled([supertonicPromise, elevenlabsPromise, openaiPromise, geminiPromise].filter(p => p !== null)); // If no API key, mark as skipped if (!elevenlabsApiKey && hasComparison) { const elevenlabsStatus = document.getElementById('elevenlabsStatus'); const elevenlabsTime = document.getElementById('elevenlabsTime'); if (elevenlabsStatus) { elevenlabsStatus.textContent = '⏭️ Skipped'; elevenlabsStatus.className = 'demo-comparison-cell'; } if (elevenlabsTime) { elevenlabsTime.textContent = 'No API key'; } } if (!openaiApiKey && hasComparison) { const openaiStatus = document.getElementById('openaiStatus'); const openaiTime = document.getElementById('openaiTime'); if (openaiStatus) { openaiStatus.textContent = '⏭️ Skipped'; openaiStatus.className = 'demo-comparison-cell'; } if (openaiTime) { openaiTime.textContent = 'No API key'; } } if (!geminiApiKey && hasComparison) { const geminiStatus = document.getElementById('geminiStatus'); const geminiTime = document.getElementById('geminiTime'); if (geminiStatus) { geminiStatus.textContent = '⏭️ Skipped'; geminiStatus.className = 'demo-comparison-cell'; } if (geminiTime) { geminiTime.textContent = 'No API key'; } } } catch (error) { showDemoStatus(`Error: ${error.message}`, 'error'); showDemoError(`Error during synthesis: ${error.message}`); console.error('Synthesis error:', error); // Restore placeholder demoResults.style.display = 'none'; demoResults.innerHTML = `
🎙️

Your generated speech will appear here

`; } finally { isGenerating = false; demoGenerateBtn.disabled = false; // Re-enable voice toggle after generation const voiceToggleTexts = document.querySelectorAll('.voice-toggle-text'); voiceToggleTexts.forEach(text => text.classList.remove('disabled')); } } // Download handler (make it global) window.downloadDemoAudio = function(url, filename) { const a = document.createElement('a'); a.href = url; a.download = filename; a.click(); }; // Update slider value displays function updateSliderValues() { demoTotalStepsValue.textContent = demoTotalSteps.value; // Remove unnecessary trailing zeros (1.00 -> 1, 0.80 -> 0.8, 0.75 -> 0.75) demoDurationFactorValue.textContent = parseFloat(parseFloat(demoDurationFactor.value).toFixed(2)); } // Attach slider event listeners demoTotalSteps.addEventListener('input', updateSliderValues); demoDurationFactor.addEventListener('input', updateSliderValues); // Initialize slider values updateSliderValues(); // Attach generate function to button demoGenerateBtn.addEventListener('click', generateSpeech); // Preset text buttons (defined before input listener to share scope) const presetButtons = document.querySelectorAll('[data-preset]'); const freeformBtn = document.getElementById('freeformBtn'); let currentPreset = 'quote'; // Initialize with quote let isPresetChanging = false; // Flag to track if text change is from preset button // Helper function to update active button state function updateActiveButton(presetType) { // Remove active from all buttons presetButtons.forEach(btn => btn.classList.remove('active')); // Add active to the specified button if (presetType) { const targetBtn = document.querySelector(`[data-preset="${presetType}"]`); if (targetBtn) { targetBtn.classList.add('active'); } } currentPreset = presetType; updateQuoteModeState(presetType === 'quote'); } function updateQuoteModeState(isQuote) { if (!demoResults) return; demoResults.classList.toggle('quote-mode', Boolean(isQuote)); } // Initialize with quote button active updateActiveButton('quote'); presetButtons.forEach(btn => { btn.addEventListener('click', () => { const presetType = btn.getAttribute('data-preset'); if (presetType === 'freeform') { // Freeform button: clear text isPresetChanging = true; demoTextInput.value = ''; updateCharCounter(); updateActiveButton('freeform'); isPresetChanging = false; } else { // Other preset buttons: set text const text = presetTexts[presetType]; if (text) { isPresetChanging = true; demoTextInput.value = text; updateCharCounter(); updateActiveButton(presetType); isPresetChanging = false; } } }); }); // Update character counter on input let previousTextValue = demoTextInput.value; demoTextInput.addEventListener('input', () => { updateCharCounter(); // If text was modified by user (not from preset button), switch to freeform if (!isPresetChanging && demoTextInput.value !== previousTextValue) { updateActiveButton('freeform'); } previousTextValue = demoTextInput.value; }); // Update font size when window is resized (for responsive width-based font sizing) let resizeTimeout; window.addEventListener('resize', () => { clearTimeout(resizeTimeout); resizeTimeout = setTimeout(() => { updateCharCounter(); }, 100); }); // Initialize character counter updateCharCounter(); // Voice toggle button handlers const voiceToggleTexts = document.querySelectorAll('.voice-toggle-text'); // Disable voice toggle texts initially voiceToggleTexts.forEach(text => text.classList.add('disabled')); voiceToggleTexts.forEach(text => { text.addEventListener('click', async () => { if (text.classList.contains('disabled')) return; const selectedVoice = text.getAttribute('data-voice'); // Don't reload if already selected if (selectedVoice === currentVoice) { return; } // Update UI voiceToggleTexts.forEach(t => t.classList.remove('active')); text.classList.add('active'); // Disable all controls while loading const wasDisabled = demoGenerateBtn.disabled; demoGenerateBtn.disabled = true; voiceToggleTexts.forEach(t => t.classList.add('disabled')); try { await switchVoice(selectedVoice); // Re-enable texts if models are loaded if (models && cfgs && processors) { demoGenerateBtn.disabled = false; voiceToggleTexts.forEach(t => t.classList.remove('disabled')); } } catch (error) { console.error('Failed to switch voice:', error); // Revert UI on error voiceToggleTexts.forEach(t => t.classList.remove('active')); document.querySelector(`[data-voice="${currentVoice}"]`).classList.add('active'); // Re-enable texts voiceToggleTexts.forEach(t => t.classList.remove('disabled')); if (!wasDisabled) demoGenerateBtn.disabled = false; } }); }); // Title animation setup const demoTitleLeft = document.querySelector('.demo-title-left'); const demoTitleRight = document.querySelector('.demo-title-right'); const demoInputSection = document.querySelector('.demo-input-section'); const demoOutputSection = document.querySelector('.demo-output-section'); // Initialize Text with letters wrapped in spans if (demoTitleLeft) { const text = demoTitleLeft.textContent.trim(); demoTitleLeft.innerHTML = text.split('').map(char => char === ' ' ? ' ' : `${char}` ).join(''); } // Text animation on demo-input-section click if (demoInputSection && demoTitleLeft) { demoInputSection.addEventListener('click', () => { const letters = demoTitleLeft.querySelectorAll('.letter'); // Reset all letters letters.forEach(letter => { letter.classList.remove('visible'); }); // Show letters one by one (total 0.25s = 0.125s / 2) letters.forEach((letter, index) => { setTimeout(() => { letter.classList.add('visible'); }, index * 0.0625 * 1000); // 0.0625s delay between each letter }); }); } // Speech animation on demo-output-section click if (demoOutputSection && demoTitleRight) { demoOutputSection.addEventListener('click', (event) => { if (event.target.closest('#demoGenerateBtn')) { return; } demoTitleRight.classList.remove('animate-speech'); // Trigger reflow void demoTitleRight.offsetWidth; demoTitleRight.classList.add('animate-speech'); }); } function getProviderSlugForAudio(providerName) { if (!providerName) return null; const normalized = providerName.toLowerCase(); switch (normalized) { case 'supertone': case 'supertonic': return 'supertone'; case 'elevenlabs': return 'elevenlabs'; case 'openai': return 'openai'; case 'gemini': return 'gemini'; case 'microsoft': return 'microsoft'; default: return normalized; } } function updateTextHandlingPlayButtonState(cardState, isPlaying) { if (!cardState.playButton) return; cardState.playButton.classList.toggle('is-playing', isPlaying); cardState.playButton.setAttribute('aria-pressed', String(isPlaying)); const action = isPlaying ? 'Pause' : 'Play'; cardState.playButton.setAttribute('aria-label', `${action} ${cardState.sampleTitle} sample`); } function handleTextHandlingAudioEnded(cardState, audioEl) { if (cardState.currentAudio !== audioEl) { return; } cardState.isPlaying = false; cardState.isPaused = false; audioEl.currentTime = 0; updateTextHandlingPlayButtonState(cardState, false); } function getOrCreateTextHandlingAudio(cardState, providerSlug) { if (!cardState.audioElements.has(providerSlug)) { // Microsoft uses .wav files, others use .mp3 const audioExtension = providerSlug === 'microsoft' ? 'wav' : 'mp3'; const audioPath = `audio/${providerSlug}_speech-${cardState.audioNumber}.${audioExtension}`; const audioEl = new Audio(audioPath); audioEl.preload = 'auto'; audioEl.addEventListener('ended', () => handleTextHandlingAudioEnded(cardState, audioEl)); cardState.audioElements.set(providerSlug, audioEl); } return cardState.audioElements.get(providerSlug); } function pauseTextHandlingAudio(cardState, { reset = false } = {}) { const audioEl = cardState.currentAudio; if (!audioEl) { cardState.isPlaying = false; if (reset) { cardState.isPaused = false; } return; } try { audioEl.pause(); } catch (error) { console.warn('Failed to pause audio', error); } if (reset) { audioEl.currentTime = 0; cardState.currentAudio = null; cardState.isPaused = false; } else { cardState.isPaused = audioEl.currentTime > 0 && audioEl.currentTime < audioEl.duration; } cardState.isPlaying = false; updateTextHandlingPlayButtonState(cardState, false); } function playTextHandlingAudio(cardState, { restart = false } = {}) { const providerName = cardState.currentProvider; const providerSlug = getProviderSlugForAudio(providerName); if (!providerSlug) { return; } const audioEl = getOrCreateTextHandlingAudio(cardState, providerSlug); if (!audioEl) { return; } if (cardState.currentAudio && cardState.currentAudio !== audioEl) { pauseTextHandlingAudio(cardState, { reset: true }); } cardState.currentAudio = audioEl; if (restart || audioEl.ended) { audioEl.currentTime = 0; } pauseTextHandlingPlayersExcept(cardState.playerRecord); const playPromise = audioEl.play(); if (playPromise && typeof playPromise.catch === 'function') { playPromise.catch(error => { console.warn('Failed to play text-handling audio', error); }); } cardState.isPlaying = true; cardState.isPaused = false; updateTextHandlingPlayButtonState(cardState, true); } function handleTextHandlingProviderSelection(cardState, option) { cardState.providerOptions.forEach(btn => { const isActive = btn === option; btn.classList.toggle('active', isActive); btn.setAttribute('aria-pressed', String(isActive)); }); const providerName = option.dataset.provider || option.textContent.trim(); cardState.currentProvider = providerName; if (cardState.textModelLabel) { cardState.textModelLabel.textContent = getProviderLabel(providerName); } if (cardState.textModel) { cardState.textModel.setAttribute('data-selected-provider', providerName); } cardState.card.style.setProperty('--provider-color', getProviderColor(providerName)); playTextHandlingAudio(cardState, { restart: true }); } function handleTextHandlingPlayClick(cardState) { const activeOption = cardState.card.querySelector('.provider-option.active'); if (!activeOption) { const defaultOption = cardState.providerOptions[0]; if (defaultOption) { defaultOption.click(); } return; } if (cardState.isPlaying) { pauseTextHandlingAudio(cardState); return; } if (cardState.isPaused && cardState.currentAudio) { pauseTextHandlingPlayersExcept(cardState.playerRecord); const resumePromise = cardState.currentAudio.play(); if (resumePromise && typeof resumePromise.catch === 'function') { resumePromise.catch(error => console.warn('Failed to resume audio', error)); } cardState.isPlaying = true; cardState.isPaused = false; updateTextHandlingPlayButtonState(cardState, true); return; } playTextHandlingAudio(cardState, { restart: false }); } function initTextHandlingCards() { const cards = document.querySelectorAll('.text-handling-card'); if (!cards.length) { return; } cards.forEach((card, index) => { const providerOptions = Array.from(card.querySelectorAll('.provider-option')); const textModel = card.querySelector('.text-model'); const textModelLabel = card.querySelector('.text-model-label'); const playButton = card.querySelector('.text-handling-player'); const sampleTitle = card.querySelector('.text-handling-label')?.textContent?.trim() || 'sample'; const cardState = { card, providerOptions, textModel, textModelLabel, playButton, sampleTitle, audioNumber: TEXT_HANDLING_CARD_AUDIO_MAP[index] || index + 1, audioElements: new Map(), currentProvider: null, currentAudio: null, isPlaying: false, isPaused: false, playerRecord: null }; const playerRecord = { pausePlayback: () => pauseTextHandlingAudio(cardState) }; cardState.playerRecord = playerRecord; textHandlingAudioPlayers.push(playerRecord); providerOptions.forEach(option => { option.addEventListener('click', () => handleTextHandlingProviderSelection(cardState, option)); }); if (playButton) { playButton.addEventListener('click', () => handleTextHandlingPlayClick(cardState)); updateTextHandlingPlayButtonState(cardState, false); } card.style.setProperty('--provider-color', getProviderColor('Supertone')); }); } function getProviderColor(provider) { switch (provider) { case 'Supertone': case 'supertone': return getComputedStyle(document.documentElement).getPropertyValue('--supertone_blue') || '#227CFF'; case 'ElevenLabs': return getComputedStyle(document.documentElement).getPropertyValue('--brand-elevenlabs') || '#999999'; case 'OpenAI': return getComputedStyle(document.documentElement).getPropertyValue('--brand-openai') || '#52a584'; case 'Gemini': return getComputedStyle(document.documentElement).getPropertyValue('--brand-gemini') || '#887eca'; case 'Microsoft': return getComputedStyle(document.documentElement).getPropertyValue('--brand-microsoft') || '#00A4EF'; default: return getComputedStyle(document.documentElement).getPropertyValue('--primary') || '#227CFF'; } } function getProviderLabel(provider) { switch ((provider || '').toLowerCase()) { case 'supertone': return 'Supertonic'; case 'elevenlabs': return 'Flash v2.5'; case 'openai': return 'TTS-1'; case 'gemini': return '2.5 Flash TTS'; case 'microsoft': return 'VibeVoice Realtime 0.5B'; default: return provider || 'Supertonic'; } } initTextHandlingCards(); // Initialize models initializeModels(); })();