diff --git "a/script.js" "b/script.js" new file mode 100644--- /dev/null +++ "b/script.js" @@ -0,0 +1,3073 @@ +import * as ort from 'onnxruntime-web'; +const presetTexts = window.presetTexts || {}; + +const PLAY_ICON_SVG = ``; +const PAUSE_ICON_SVG = ``; +const STOP_ICON_SVG = ``; + +// Lightning background parallax +(function initLightningParallax() { + if (typeof document === 'undefined') { + return; + } + + const runBlink = (className, onComplete) => { + let remaining = 1 + Math.round(Math.random()); + const blink = () => { + if (remaining-- <= 0) { + if (typeof onComplete === 'function') { + onComplete(); + } + return; + } + const wait = 20 + Math.random() * 80; + document.body.classList.add(className); + setTimeout(() => { + document.body.classList.remove(className); + setTimeout(blink, wait); + }, wait); + }; + blink(); + }; + + const schedule = () => { + setTimeout(() => runBlink('lightning-flicker', schedule), Math.random() * 10000); + }; + schedule(); +})(); + +function escapeHtml(value) { + return value.replace(/[&<>"']/g, (match) => { + switch (match) { + case '&': return '&'; + case '<': return '<'; + case '>': return '>'; + case '"': return '"'; + case "'": return '''; + default: return match; + } + }); +} + +function formatStatValueWithSuffix(value, suffix, options = {}) { + const { firstLabel = false } = options; + if (value === undefined || value === null) { + return ''; + } + if (!suffix) { + const raw = `${value}`; + return escapeHtml(raw); + } + const raw = `${value}`.trim(); + if (!raw || raw === '--' || raw === '-' || raw.toLowerCase() === 'error') { + return escapeHtml(raw); + } + const appendSuffix = (segment, includePrefix = false) => { + const trimmed = segment.trim(); + if (!trimmed) { + return ''; + } + const escapedValue = `${escapeHtml(trimmed)}`; + const suffixSpan = `${escapeHtml(suffix)}`; + const prefixSpan = includePrefix && firstLabel + ? `First` + : ''; + const segmentClass = includePrefix && firstLabel + ? 'stat-value-segment has-prefix' + : 'stat-value-segment'; + return `${prefixSpan}${escapedValue}${suffixSpan}`; + }; + if (raw.includes('/')) { + const parts = raw.split('/'); + const segments = parts.map((part, index) => appendSuffix(part, index === 0)); + return segments.join(' / '); + } + return appendSuffix(raw); +} + +/** + * Unicode text processor + */ +export class UnicodeProcessor { + constructor(indexer) { + this.indexer = indexer; + } + + call(textList, lang = null) { + const processedTexts = textList.map(t => preprocessText(t, lang)); + const textIdsLengths = processedTexts.map(t => t.length); + const maxLen = Math.max(...textIdsLengths); + + const textIds = []; + const unsupportedChars = new Set(); + + for (let i = 0; i < processedTexts.length; i++) { + const row = new Array(maxLen).fill(0); + const unicodeVals = textToUnicodeValues(processedTexts[i]); + for (let j = 0; j < unicodeVals.length; j++) { + const indexValue = this.indexer[unicodeVals[j]]; + // Check if character is supported (not -1, undefined, or null) + if (indexValue === undefined || indexValue === null || indexValue === -1) { + unsupportedChars.add(processedTexts[i][j]); + row[j] = 0; // Use 0 as fallback + } else { + row[j] = indexValue; + } + } + textIds.push(row); + } + + const textMask = getTextMask(textIdsLengths); + return { textIds, textMask, unsupportedChars: Array.from(unsupportedChars) }; + } +} + +const AVAILABLE_LANGS = ["en", "ko", "es", "pt", "fr"]; + +/** + * Language detection based on character patterns and language-specific markers + * Returns the detected language code or null if uncertain + */ +export function detectLanguage(text) { + if (!text || text.trim().length < 3) { + return null; + } + + // Only consider last 100 characters for efficiency + const sampleText = text.length > 100 ? text.substring(text.length - 100) : text; + + // Normalize text for analysis + const normalizedText = sampleText.normalize('NFC').toLowerCase(); + + // Korean detection: Hangul characters (most reliable) + const koreanRegex = /[\uAC00-\uD7AF\u1100-\u11FF\u3130-\u318F\uA960-\uA97F\uD7B0-\uD7FF]/g; + const koreanMatches = normalizedText.match(koreanRegex) || []; + if (koreanMatches.length >= 2) { + return 'ko'; + } + + // Scoring system for Latin-based languages + const scores = { en: 0, es: 0, fr: 0, pt: 0 }; + + // 1. Highly distinctive characters (definitive markers) + if (/ñ/.test(normalizedText)) scores.es += 15; + if (/[¿¡]/.test(normalizedText)) scores.es += 12; + if (/ã/.test(normalizedText)) scores.pt += 15; + if (/õ/.test(normalizedText)) scores.pt += 15; + if (/œ/.test(normalizedText)) scores.fr += 15; + if (/[ùû]/.test(normalizedText)) scores.fr += 10; + + // ç is shared between French and Portuguese + if (/ç/.test(normalizedText)) { + scores.fr += 4; + scores.pt += 4; + } + + // French-specific accent patterns + if (/[èêë]/.test(normalizedText)) scores.fr += 5; + if (/[àâ]/.test(normalizedText)) scores.fr += 3; + if (/[îï]/.test(normalizedText)) scores.fr += 4; + if (/ô/.test(normalizedText)) scores.fr += 3; + + // 2. Exclusive stopwords (words unique to one language) + const exclusiveWords = { + en: ['the', 'is', 'are', 'was', 'were', 'have', 'has', 'been', 'will', 'would', 'could', 'should', 'this', 'that', 'with', 'from', 'they', 'what', 'which', 'there', 'their', 'about', 'these', 'other', 'into', 'just', 'your', 'some', 'than', 'them', 'then', 'only', 'being', 'through', 'after', 'before'], + es: ['el', 'los', 'las', 'es', 'está', 'están', 'porque', 'pero', 'muy', 'también', 'más', 'este', 'esta', 'estos', 'estas', 'ese', 'esa', 'yo', 'tú', 'nosotros', 'ellos', 'ellas', 'hola', 'gracias', 'buenos', 'buenas', 'ahora', 'siempre', 'nunca', 'todo', 'nada', 'algo', 'alguien'], + fr: ['le', 'les', 'est', 'sont', 'dans', 'ce', 'cette', 'ces', 'il', 'elle', 'ils', 'elles', 'je', 'tu', 'nous', 'vous', 'avec', 'sur', 'ne', 'pas', 'plus', 'tout', 'bien', 'fait', 'être', 'avoir', 'donc', 'car', 'ni', 'jamais', 'toujours', 'rien', 'quelque', 'encore', 'aussi', 'très', 'peu', 'ici'], + pt: ['os', 'as', 'é', 'são', 'está', 'estão', 'não', 'na', 'no', 'da', 'do', 'das', 'dos', 'ao', 'aos', 'ele', 'ela', 'eles', 'elas', 'eu', 'nós', 'você', 'vocês', 'seu', 'sua', 'seus', 'suas', 'muito', 'também', 'já', 'foi', 'só', 'mesmo', 'ter', 'até', 'isso', 'olá', 'obrigado', 'obrigada', 'bom', 'boa', 'agora', 'sempre', 'nunca', 'tudo', 'nada', 'algo', 'alguém'] + }; + + // Extract words from text + const words = normalizedText.match(/[a-záàâãäåçéèêëíìîïñóòôõöúùûüýÿœæ]+/g) || []; + + for (const word of words) { + for (const [lang, wordList] of Object.entries(exclusiveWords)) { + if (wordList.includes(word)) { + scores[lang] += 3; + } + } + } + + // 3. Common n-grams (character patterns) + const ngramPatterns = { + en: [/th/g, /ing/g, /tion/g, /ight/g, /ould/g], + es: [/ción/g, /mente/g, /ado/g, /ido/g], + fr: [/tion/g, /ment/g, /eau/g, /aux/g, /eux/g, /oir/g, /ais/g, /ait/g, /ont/g], + pt: [/ção/g, /ões/g, /mente/g, /ado/g, /ido/g, /nh/g, /lh/g] + }; + + for (const [lang, patterns] of Object.entries(ngramPatterns)) { + for (const pattern of patterns) { + const matches = normalizedText.match(pattern) || []; + scores[lang] += matches.length * 2; + } + } + + // 4. French contractions and apostrophes + const frenchContractions = /[cdjlmnst]'[aeiouéèêàâîïôûù]/g; + const frenchContractionMatches = normalizedText.match(frenchContractions) || []; + scores.fr += frenchContractionMatches.length * 5; + + // 5. Article patterns that help distinguish + // "the" is very English, "el/la" Spanish, "le/la" French, "o/a" Portuguese + if (/\bthe\b/.test(normalizedText)) scores.en += 5; + if (/\b(el|los)\b/.test(normalizedText)) scores.es += 4; + if (/\b(le|les)\b/.test(normalizedText)) scores.fr += 4; + if (/\b(o|os)\b/.test(normalizedText)) scores.pt += 3; + + // Find the language with the highest score + let maxScore = 0; + let detectedLang = null; + + for (const [lang, score] of Object.entries(scores)) { + if (score > maxScore) { + maxScore = score; + detectedLang = lang; + } + } + + // Only return if we have enough confidence (minimum threshold) + if (maxScore >= 4) { + return detectedLang; + } + + return null; +} + +// Language display names for toast notification +const LANGUAGE_NAMES = { + 'en': 'English', + 'ko': 'Korean', + 'es': 'Spanish', + 'pt': 'Portuguese', + 'fr': 'French' +}; + +export function preprocessText(text, lang = null) { + // Normalize unicode characters + text = text.normalize('NFKD'); + + // Remove emojis + text = text.replace(/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F700}-\u{1F77F}\u{1F780}-\u{1F7FF}\u{1F800}-\u{1F8FF}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1F1E6}-\u{1F1FF}]+/gu, ''); + + // Replace various dashes and symbols + const replacements = { + "–": "-", + "‑": "-", + "—": "-", + "_": " ", + "\u201C": '"', // " + "\u201D": '"', // " + "\u2018": "'", // ' + "\u2019": "'", // ' + "´": "'", + "`": "'", + "[": " ", + "]": " ", + "|": " ", + "/": " ", // FIXME: `/` should be pronounced. + "#": " ", // FIXME: `#` should be pronounced. + "→": " ", + "←": " ", + }; + + for (const [k, v] of Object.entries(replacements)) { + text = text.replaceAll(k, v); + } + + // Remove special symbols + text = text.replace(/[♥☆♡©\\]/g, ""); + + // Replace known expressions + const exprReplacements = { + "@": " at ", + "e.g.,": "for example,", + "i.e.,": "that is,", + }; + + for (const [k, v] of Object.entries(exprReplacements)) { + text = text.replaceAll(k, v); + } + + // Fix spacing around punctuation + text = text.replace(/ ,/g, ","); + text = text.replace(/ \./g, "."); + text = text.replace(/ !/g, "!"); + text = text.replace(/ \?/g, "?"); + text = text.replace(/ ;/g, ";"); + text = text.replace(/ :/g, ":"); + text = text.replace(/ '/g, "'"); + + // Remove duplicate quotes + while (text.includes('""')) { + text = text.replace(/""/g, '"'); + } + while (text.includes("''")) { + text = text.replace(/''/g, "'"); + } + while (text.includes("``")) { + text = text.replace(/``/g, "`"); + } + + // Remove extra spaces + text = text.replace(/\s+/g, " ").trim(); + + // If text doesn't end with punctuation, quotes, or closing brackets, add a period + if (!/[.!?;:,'"')\]}…。」』】〉》›»]$/.test(text)) { + text += "."; + } + + // Add language tags + if (lang !== null) { + if (!AVAILABLE_LANGS.includes(lang)) { + throw new Error(`Invalid language: ${lang}`); + } + text = `<${lang}>` + text + ``; + } else { + text = `` + text + ``; + } + + return text; +} + +export function textToUnicodeValues(text) { + return Array.from(text).map(char => char.charCodeAt(0)); +} + +export function lengthToMask(lengths, maxLen = null) { + maxLen = maxLen || Math.max(...lengths); + const mask = []; + for (let i = 0; i < lengths.length; i++) { + const row = []; + for (let j = 0; j < maxLen; j++) { + row.push(j < lengths[i] ? 1.0 : 0.0); + } + mask.push([row]); + } + return mask; +} + +export function getTextMask(textIdsLengths) { + return lengthToMask(textIdsLengths); +} + +export function getLatentMask(wavLengths, cfgs) { + const baseChunkSize = cfgs.ae.base_chunk_size; + const chunkCompressFactor = cfgs.ttl.chunk_compress_factor; + const latentSize = baseChunkSize * chunkCompressFactor; + const latentLengths = wavLengths.map(len => + Math.floor((len + latentSize - 1) / latentSize) + ); + return lengthToMask(latentLengths); +} + +export function sampleNoisyLatent(duration, cfgs) { + const sampleRate = cfgs.ae.sample_rate; + const baseChunkSize = cfgs.ae.base_chunk_size; + const chunkCompressFactor = cfgs.ttl.chunk_compress_factor; + const ldim = cfgs.ttl.latent_dim; + + const wavLenMax = Math.max(...duration.map(d => d[0][0])) * sampleRate; + const wavLengths = duration.map(d => Math.floor(d[0][0] * sampleRate)); + const chunkSize = baseChunkSize * chunkCompressFactor; + const latentLen = Math.floor((wavLenMax + chunkSize - 1) / chunkSize); + const latentDim = ldim * chunkCompressFactor; + + const noisyLatent = []; + for (let b = 0; b < duration.length; b++) { + const batch = []; + for (let d = 0; d < latentDim; d++) { + const row = []; + for (let t = 0; t < latentLen; t++) { + const u1 = Math.random(); + const u2 = Math.random(); + const randNormal = Math.sqrt(-2.0 * Math.log(u1)) * Math.cos(2.0 * Math.PI * u2); + row.push(randNormal); + } + batch.push(row); + } + noisyLatent.push(batch); + } + + const latentMask = getLatentMask(wavLengths, cfgs); + + for (let b = 0; b < noisyLatent.length; b++) { + for (let d = 0; d < noisyLatent[b].length; d++) { + for (let t = 0; t < noisyLatent[b][d].length; t++) { + noisyLatent[b][d][t] *= latentMask[b][0][t]; + } + } + } + + return { noisyLatent, latentMask }; +} + +export async function loadOnnx(onnxPath, opts) { + return await ort.InferenceSession.create(onnxPath, opts); +} + +export async function loadOnnxAll(basePath, opts, onProgress) { + const models = [ + { name: 'Duration Predictor', path: `${basePath}/duration_predictor.onnx`, key: 'dpOrt' }, + { name: 'Text Encoder', path: `${basePath}/text_encoder.onnx`, key: 'textEncOrt' }, + { name: 'Vector Estimator', path: `${basePath}/vector_estimator.onnx`, key: 'vectorEstOrt' }, + { name: 'Vocoder', path: `${basePath}/vocoder.onnx`, key: 'vocoderOrt' } + ]; + + const result = {}; + let loadedCount = 0; + + // Load all models in parallel + const loadPromises = models.map(async (model) => { + const session = await loadOnnx(model.path, opts); + loadedCount++; + if (onProgress) { + onProgress(model.name, loadedCount, models.length); + } + return { key: model.key, session }; + }); + + // Wait for all models to load + const loadedModels = await Promise.all(loadPromises); + + // Organize results + loadedModels.forEach(({ key, session }) => { + result[key] = session; + }); + + try { + // Download counting + await fetch('https://huggingface.co/Supertone/supertonic-2/resolve/main/config.json'); + } catch (error) { + console.warn('Failed to update download count:', error); + } + return result; +} + +export async function loadCfgs(basePath) { + const response = await fetch(`${basePath}/tts.json`); + return await response.json(); +} + +export async function loadProcessors(basePath) { + const response = await fetch(`${basePath}/unicode_indexer.json`); + const unicodeIndexerData = await response.json(); + const textProcessor = new UnicodeProcessor(unicodeIndexerData); + + return { textProcessor }; +} + +function parseWavFile(buffer) { + const view = new DataView(buffer); + + // Check RIFF header + const riff = String.fromCharCode(view.getUint8(0), view.getUint8(1), view.getUint8(2), view.getUint8(3)); + if (riff !== 'RIFF') { + throw new Error('Not a valid WAV file'); + } + + const wave = String.fromCharCode(view.getUint8(8), view.getUint8(9), view.getUint8(10), view.getUint8(11)); + if (wave !== 'WAVE') { + throw new Error('Not a valid WAV file'); + } + + let offset = 12; + let fmtChunk = null; + let dataChunk = null; + + while (offset < buffer.byteLength) { + const chunkId = String.fromCharCode( + view.getUint8(offset), + view.getUint8(offset + 1), + view.getUint8(offset + 2), + view.getUint8(offset + 3) + ); + const chunkSize = view.getUint32(offset + 4, true); + + if (chunkId === 'fmt ') { + fmtChunk = { + audioFormat: view.getUint16(offset + 8, true), + numChannels: view.getUint16(offset + 10, true), + sampleRate: view.getUint32(offset + 12, true), + bitsPerSample: view.getUint16(offset + 22, true) + }; + } else if (chunkId === 'data') { + dataChunk = { + offset: offset + 8, + size: chunkSize + }; + break; + } + + offset += 8 + chunkSize; + } + + if (!fmtChunk || !dataChunk) { + throw new Error('Invalid WAV file format'); + } + + const bytesPerSample = fmtChunk.bitsPerSample / 8; + const numSamples = Math.floor(dataChunk.size / (bytesPerSample * fmtChunk.numChannels)); + const audioData = new Float32Array(numSamples); + + if (fmtChunk.bitsPerSample === 16) { + for (let i = 0; i < numSamples; i++) { + let sample = 0; + for (let ch = 0; ch < fmtChunk.numChannels; ch++) { + const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 2; + sample += view.getInt16(sampleOffset, true); + } + audioData[i] = (sample / fmtChunk.numChannels) / 32768.0; + } + } else if (fmtChunk.bitsPerSample === 24) { + // Support 24-bit PCM + for (let i = 0; i < numSamples; i++) { + let sample = 0; + for (let ch = 0; ch < fmtChunk.numChannels; ch++) { + const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 3; + // Read 3 bytes and convert to signed 24-bit integer + const byte1 = view.getUint8(sampleOffset); + const byte2 = view.getUint8(sampleOffset + 1); + const byte3 = view.getUint8(sampleOffset + 2); + let value = (byte3 << 16) | (byte2 << 8) | byte1; + // Convert to signed (two's complement) + if (value & 0x800000) { + value = value - 0x1000000; + } + sample += value; + } + audioData[i] = (sample / fmtChunk.numChannels) / 8388608.0; // 2^23 + } + } else if (fmtChunk.bitsPerSample === 32) { + for (let i = 0; i < numSamples; i++) { + let sample = 0; + for (let ch = 0; ch < fmtChunk.numChannels; ch++) { + const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 4; + sample += view.getFloat32(sampleOffset, true); + } + audioData[i] = sample / fmtChunk.numChannels; + } + } else { + throw new Error(`Unsupported bit depth: ${fmtChunk.bitsPerSample}. Supported formats: 16-bit, 24-bit, 32-bit`); + } + + return { + sampleRate: fmtChunk.sampleRate, + audioData: audioData + }; +} + +export function arrayToTensor(array, dims) { + const flat = array.flat(Infinity); + return new ort.Tensor('float32', Float32Array.from(flat), dims); +} + +export function intArrayToTensor(array, dims) { + const flat = array.flat(Infinity); + return new ort.Tensor('int64', BigInt64Array.from(flat.map(x => BigInt(x))), dims); +} + +export function writeWavFile(audioData, sampleRate) { + const numChannels = 1; + const bitsPerSample = 16; + const byteRate = sampleRate * numChannels * bitsPerSample / 8; + const blockAlign = numChannels * bitsPerSample / 8; + const dataSize = audioData.length * bitsPerSample / 8; + + const buffer = new ArrayBuffer(44 + dataSize); + const view = new DataView(buffer); + + // RIFF header + view.setUint8(0, 'R'.charCodeAt(0)); + view.setUint8(1, 'I'.charCodeAt(0)); + view.setUint8(2, 'F'.charCodeAt(0)); + view.setUint8(3, 'F'.charCodeAt(0)); + view.setUint32(4, 36 + dataSize, true); + view.setUint8(8, 'W'.charCodeAt(0)); + view.setUint8(9, 'A'.charCodeAt(0)); + view.setUint8(10, 'V'.charCodeAt(0)); + view.setUint8(11, 'E'.charCodeAt(0)); + + // fmt chunk + view.setUint8(12, 'f'.charCodeAt(0)); + view.setUint8(13, 'm'.charCodeAt(0)); + view.setUint8(14, 't'.charCodeAt(0)); + view.setUint8(15, ' '.charCodeAt(0)); + view.setUint32(16, 16, true); + view.setUint16(20, 1, true); // PCM + view.setUint16(22, numChannels, true); + view.setUint32(24, sampleRate, true); + view.setUint32(28, byteRate, true); + view.setUint16(32, blockAlign, true); + view.setUint16(34, bitsPerSample, true); + + // data chunk + view.setUint8(36, 'd'.charCodeAt(0)); + view.setUint8(37, 'a'.charCodeAt(0)); + view.setUint8(38, 't'.charCodeAt(0)); + view.setUint8(39, 'a'.charCodeAt(0)); + view.setUint32(40, dataSize, true); + + // Write audio data + for (let i = 0; i < audioData.length; i++) { + const sample = Math.max(-1, Math.min(1, audioData[i])); + const intSample = Math.floor(sample * 32767); + view.setInt16(44 + i * 2, intSample, true); + } + + return buffer; +} + + + +// Smooth scroll functionality +document.addEventListener('DOMContentLoaded', () => { + // Smooth scroll for anchor links + document.querySelectorAll('a[href^="#"]').forEach(anchor => { + anchor.addEventListener('click', function (e) { + e.preventDefault(); + const href = this.getAttribute('href'); + const target = document.querySelector(href); + if (target) { + // Update URL with anchor + if (history.pushState) { + history.pushState(null, null, href); + } + target.scrollIntoView({ + behavior: 'smooth', + block: 'start' + }); + } + }); + }); + + // Add scroll animation for sections + const observerOptions = { + threshold: 0.1, + rootMargin: '0px 0px -100px 0px' + }; + + const observer = new IntersectionObserver((entries) => { + entries.forEach(entry => { + if (entry.isIntersecting) { + entry.target.style.opacity = '1'; + entry.target.style.transform = 'translateY(0)'; + } + }); + }, observerOptions); + +}); + +// TTS Demo functionality +(async function() { + // Check if we're on a page with the TTS demo + const demoTextInput = document.getElementById('demoTextInput'); + if (!demoTextInput) return; + + // Configure ONNX Runtime for WebGPU support + ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/'; + ort.env.wasm.numThreads = 1; + + + // Configuration + const REF_EMBEDDING_PATHS = { + 'F1': 'assets/voice_styles/F1.json', + 'F2': 'assets/voice_styles/F2.json', + 'F3': 'assets/voice_styles/F3.json', + 'F4': 'assets/voice_styles/F4.json', + 'F5': 'assets/voice_styles/F5.json', + 'M1': 'assets/voice_styles/M1.json', + 'M2': 'assets/voice_styles/M2.json', + 'M3': 'assets/voice_styles/M3.json', + 'M4': 'assets/voice_styles/M4.json', + 'M5': 'assets/voice_styles/M5.json' + }; + + // Voice descriptions + const VOICE_DESCRIPTIONS = { + 'F1': 'Sarah - A calm female voice with a slightly low tone; steady and composed.', + 'F2': 'Lily - A bright, cheerful female voice; lively, playful, and youthful with spirited energy.', + 'F3': 'Jessica - A clear, professional announcer-style female voice; articulate and broadcast-ready.', + 'F4': 'Olivia - A crisp, confident female voice; distinct and expressive with strong delivery.', + 'F5': 'Emily - A kind, gentle female voice; soft-spoken, calm, and naturally soothing.', + 'M1': 'Alex - A lively, upbeat male voice with confident energy and a standard, clear tone.', + 'M2': 'James - A deep, robust male voice; calm, composed, and serious with a grounded presence.', + 'M3': 'Robert - A polished, authoritative male voice; confident and trustworthy with strong presentation quality.', + 'M4': 'Sam - A soft, neutral-toned male voice; gentle and approachable with a youthful, friendly quality.', + 'M5': 'Daniel - A warm, soft-spoken male voice; calm and soothing with a natural storytelling quality.' + }; + + // Global state + let models = null; + let cfgs = null; + let processors = null; + let currentVoice = 'M3'; // Default to Robert voice + + // Detect browser language and set initial language + function detectBrowserLanguage() { + // Get browser language (works in Chrome, Safari, Edge, Firefox, Opera, Samsung Internet) + const browserLang = navigator.language || navigator.userLanguage || 'en'; + + // Extract language code (e.g., 'en-US' -> 'en', 'ko-KR' -> 'ko') + const langCode = browserLang.split('-')[0].toLowerCase(); + + // Supported languages + const supportedLangs = ['en', 'es', 'pt', 'fr', 'ko']; + + // Return detected language if supported, otherwise default to English + return supportedLangs.includes(langCode) ? langCode : 'en'; + } + + let currentLanguage = detectBrowserLanguage(); // Auto-detect from browser + let refEmbeddingCache = {}; // Cache for embeddings + let currentStyleTtlTensor = null; + let currentStyleDpTensor = null; + let modelsLoading = false; // Track if models are currently loading + let modelsLoaded = false; // Track if models are fully loaded + let modelsLoadPromise = null; // Promise for model loading + + // UI Elements + const demoStatusBox = document.getElementById('demoStatusBox'); + const demoStatusText = document.getElementById('demoStatusText'); + const wasmWarningBanner = document.getElementById('wasmWarningBanner'); + const demoGenerateBtn = document.getElementById('demoGenerateBtn'); + const demoTotalSteps = document.getElementById('demoTotalSteps'); + const demoSpeed = document.getElementById('demoSpeed'); + const demoTotalStepsValue = document.getElementById('demoTotalStepsValue'); + const demoSpeedValue = document.getElementById('demoSpeedValue'); + const demoResults = document.getElementById('demoResults'); + const demoError = document.getElementById('demoError'); + const demoCharCount = document.getElementById('demoCharCount'); + const demoCharCounter = document.getElementById('demoCharCounter'); + const demoCharWarning = document.getElementById('demoCharWarning'); + + // Text validation constants + const MIN_CHARS = 10; + const MAX_CHUNK_LENGTH_DEFAULT = 300; // Maximum length for each chunk (default) + const MAX_CHUNK_LENGTH_KO = 120; // Maximum length for Korean + function getMaxChunkLength() { + return currentLanguage === 'ko' ? MAX_CHUNK_LENGTH_KO : MAX_CHUNK_LENGTH_DEFAULT; + } + + // Custom audio player state (shared across generations) + let audioContext = null; + let scheduledSources = []; + let audioChunks = []; + let totalDuration = 0; + let startTime = 0; + let pauseTime = 0; + let isPaused = false; + let isPlaying = false; + let animationFrameId = null; + let playPauseBtn = null; + let progressBar = null; + let currentTimeDisplay = null; + let durationDisplay = null; + let progressFill = null; + let firstChunkGenerationTime = 0; // Processing time for first chunk + let totalChunks = 0; + let nextScheduledTime = 0; // Next time to schedule audio chunk + let currentGenerationTextLength = 0; + let supertonicPlayerRecord = null; // Supertonic player record for cross-player pause management + let isGenerating = false; // Track if speech generation is in progress + + // Track all custom audio players + let customAudioPlayers = []; + + const isMobileViewport = () => window.matchMedia('(max-width: 768px)').matches; + // Check if device actually supports touch (not just viewport size) + const isTouchDevice = () => 'ontouchstart' in window || navigator.maxTouchPoints > 0; + const trimDecimalsForMobile = (formatted) => { + if (!formatted) return formatted; + return isMobileViewport() ? formatted.replace(/\.\d{2}$/, '') : formatted; + }; + + function pauseAllPlayersExcept(currentPlayer) { + customAudioPlayers.forEach(player => { + if (player !== currentPlayer && player && typeof player.pausePlayback === 'function') { + player.pausePlayback(); + } + }); + } + + + /** + * Chunk text into smaller pieces based on sentence boundaries + * @param {string} text - The text to chunk + * @param {number} maxLen - Maximum length for each chunk + * @returns {Array} - Array of text chunks + */ + function chunkText(text, maxLen = getMaxChunkLength()) { + // Split by paragraph (two or more newlines) + const paragraphs = text.trim().split(/\n\s*\n+/).filter(p => p.trim()); + + const chunks = []; + + for (let paragraph of paragraphs) { + paragraph = paragraph.trim(); + if (!paragraph) continue; + + // Split by sentence boundaries (period, question mark, exclamation mark followed by space) + // But exclude common abbreviations like Mr., Mrs., Dr., etc. and single capital letters like F. + const sentences = paragraph.split(/(?= 0 && progress <= 100) { + const clampedProgress = Math.max(0, Math.min(progress, 100)); + demoStatusBox.style.setProperty('--status-progress', `${clampedProgress}%`); + demoStatusBox.classList.toggle('complete', clampedProgress >= 100); + } else if (type === 'success' || type === 'error') { + demoStatusBox.style.removeProperty('--status-progress'); + demoStatusBox.classList.remove('complete'); + } else { + demoStatusBox.style.removeProperty('--status-progress'); + demoStatusBox.classList.remove('complete'); + } + } + + function hideDemoStatus() { + demoStatusBox.style.display = 'none'; + } + + function showDemoError(message) { + demoError.textContent = message; + demoError.classList.add('active'); + } + + function hideDemoError() { + demoError.classList.remove('active'); + } + + // Language toast notification + const languageToast = document.getElementById('languageToast'); + const languageToastMessage = document.getElementById('languageToastMessage'); + let languageToastTimeout = null; + + function showLanguageToast(fromLang, toLang) { + if (!languageToast || !languageToastMessage) return; + + const fromName = LANGUAGE_NAMES[fromLang] || fromLang; + const toName = LANGUAGE_NAMES[toLang] || toLang; + + languageToastMessage.innerHTML = `Language auto-detected: ${toName}`; + + // Clear any existing timeout + if (languageToastTimeout) { + clearTimeout(languageToastTimeout); + } + + // Show toast + languageToast.classList.add('show'); + + // Hide after 3 seconds + languageToastTimeout = setTimeout(() => { + languageToast.classList.remove('show'); + }, 3000); + } + + function showWasmWarning() { + if (wasmWarningBanner) { + wasmWarningBanner.style.display = 'flex'; + } + } + + // Validate characters in text + function validateCharacters(text) { + if (!processors || !processors.textProcessor) { + return { valid: true, unsupportedChars: [] }; + } + + try { + // Extract unique characters to minimize preprocessText calls + const uniqueChars = [...new Set(text)]; + + // Build mapping for unique chars only (much faster for long texts) + // For example, Korean '간' -> 'ㄱㅏㄴ', so we map 'ㄱ','ㅏ','ㄴ' -> '간' + const processedToOriginal = new Map(); + const charToProcessed = new Map(); + + for (const char of uniqueChars) { + const processedChar = preprocessText(char); + charToProcessed.set(char, processedChar); + + // Map each processed character back to its original + for (const pc of processedChar) { + if (!processedToOriginal.has(pc)) { + processedToOriginal.set(pc, new Set()); + } + processedToOriginal.get(pc).add(char); + } + } + + // Build full processed text using cached mappings + const fullProcessedText = Array.from(text).map(c => charToProcessed.get(c)).join(''); + + // Check the entire processed text once (efficient) + const { unsupportedChars } = processors.textProcessor.call([fullProcessedText]); + + // Map unsupported processed chars back to original chars + const unsupportedOriginalChars = new Set(); + if (unsupportedChars && unsupportedChars.length > 0) { + for (const unsupportedChar of unsupportedChars) { + const originalChars = processedToOriginal.get(unsupportedChar); + if (originalChars) { + originalChars.forEach(c => unsupportedOriginalChars.add(c)); + } + } + } + + const unsupportedCharsArray = Array.from(unsupportedOriginalChars); + return { + valid: unsupportedCharsArray.length === 0, + unsupportedChars: unsupportedCharsArray + }; + } catch (error) { + return { valid: true, unsupportedChars: [] }; + } + } + + // Update character counter and validate text length + function updateCharCounter() { + const rawText = demoTextInput.textContent || demoTextInput.innerText || ''; + const text = rawText.replace(/\n$/g, ''); // Remove trailing newline that browsers may add + const length = text.length; + + demoCharCount.textContent = length; + + // Get the actual width of the textarea + const textareaWidth = demoTextInput.offsetWidth; + // Max width reference: 1280px (container max-width) / 2 (grid column) - padding/gap ≈ 638px + // Using 640px as reference for easier calculation + const maxWidthRef = 640; + + // Calculate font size based on width ratio + // Original rem values at max-width (640px): + // 5rem = 80px @ 16px base → 80/640 = 12.5% + // 4rem = 64px → 64/640 = 10% + // 3rem = 48px → 48/640 = 7.5% + // 2.5rem = 40px → 40/640 = 6.25% + // 2rem = 32px → 32/640 = 5% + // 1.5rem = 24px → 24/640 = 3.75% + // 1rem = 16px → 16/640 = 2.5% + + // Check if mobile (572px or less) for 2x font size scaling + const isMobile = window.innerWidth <= 572; + const mobileMultiplier = isMobile ? 2 : 1; + + let fontSizeRatio; + if (length <= 100) { + fontSizeRatio = 0.055 * mobileMultiplier; // 5.5% of width + } else if (length <= 200) { + fontSizeRatio = 0.04 * mobileMultiplier; // 4% of width + } else if (length < 240) { + fontSizeRatio = 0.053125 * mobileMultiplier; // ~5.3125% of width (scaled from 2.5rem) + } else if (length < 400) { + fontSizeRatio = 0.0425 * mobileMultiplier; // ~4.25% of width (scaled from 2rem) + } else if (length < 700) { + fontSizeRatio = 0.031875 * mobileMultiplier; // ~3.1875% of width (scaled from 1.5rem) + } else { + fontSizeRatio = 0.025 * mobileMultiplier; // 2.5% of width (minimum stays the same) + } + + // Calculate font size based on actual width + const fontSize = textareaWidth * fontSizeRatio; + demoTextInput.style.fontSize = `${fontSize}px`; + + // Remove all status classes + demoCharCounter.classList.remove('error', 'warning', 'valid'); + + // Check for unsupported characters first (only if models are loaded) + let hasUnsupportedChars = false; + if (models && processors && length > 0) { + const validation = validateCharacters(text); + if (!validation.valid && validation.unsupportedChars.length > 0) { + hasUnsupportedChars = true; + const charList = validation.unsupportedChars.slice(0, 5).map(c => `"${c}"`).join(', '); + const moreChars = validation.unsupportedChars.length > 5 ? ` and ${validation.unsupportedChars.length - 5} more` : ''; + showDemoError(`Unsupported characters detected: ${charList}${moreChars}. Please remove them before generating speech.`); + } else { + hideDemoError(); + } + } + + // Update status based on length and character validation + if (length < MIN_CHARS) { + demoCharCounter.classList.add('error'); + demoCharWarning.textContent = '(At least 10 characters)'; + demoGenerateBtn.disabled = true; + } else if (hasUnsupportedChars) { + demoCharCounter.classList.add('error'); + demoCharWarning.textContent = '(Unsupported characters)'; + demoGenerateBtn.disabled = true; + } else { + demoCharCounter.classList.add('valid'); + demoCharWarning.textContent = ''; + // Enable only if models are loaded AND not currently generating + demoGenerateBtn.disabled = !models || isGenerating; + } + } + + // Validate text input + function validateTextInput(text) { + if (!text || text.trim().length === 0) { + return { valid: false, message: 'Please enter some text.' }; + } + if (text.length < MIN_CHARS) { + return { valid: false, message: `Text must be at least ${MIN_CHARS} characters long. (Currently ${text.length})` }; + } + return { valid: true }; + } + + // Load pre-extracted style embeddings from JSON + async function loadStyleEmbeddings(voice) { + try { + // Check if already cached + if (refEmbeddingCache[voice]) { + return refEmbeddingCache[voice]; + } + + const embeddingPath = REF_EMBEDDING_PATHS[voice]; + if (!embeddingPath) { + throw new Error(`No embedding path configured for voice: ${voice}`); + } + + const response = await fetch(embeddingPath); + if (!response.ok) { + throw new Error(`Failed to fetch embedding: ${response.statusText}`); + } + + const embeddingData = await response.json(); + + // Convert JSON data to ONNX tensors + // Flatten nested arrays before creating Float32Array + const styleTtlData = embeddingData.style_ttl.data.flat(Infinity); + const styleTtlTensor = new ort.Tensor( + embeddingData.style_ttl.type || 'float32', + Float32Array.from(styleTtlData), + embeddingData.style_ttl.dims + ); + + const styleDpData = embeddingData.style_dp.data.flat(Infinity); + const styleDpTensor = new ort.Tensor( + embeddingData.style_dp.type || 'float32', + Float32Array.from(styleDpData), + embeddingData.style_dp.dims + ); + + const embeddings = { + styleTtl: styleTtlTensor, + styleDp: styleDpTensor + }; + + // Cache the embeddings + refEmbeddingCache[voice] = embeddings; + + return embeddings; + } catch (error) { + throw error; + } + } + + // Switch to a different voice + async function switchVoice(voice) { + try { + const embeddings = await loadStyleEmbeddings(voice); + + currentStyleTtlTensor = embeddings.styleTtl; + currentStyleDpTensor = embeddings.styleDp; + currentVoice = voice; + + // Update active speaker in UI + if (typeof window.updateActiveSpeaker === 'function') { + window.updateActiveSpeaker(voice); + } + + // Re-validate text after switching voice + updateCharCounter(); + } catch (error) { + showDemoError(`Failed to load voice ${voice}: ${error.message}`); + throw error; + } + } + + // Check WebGPU support more thoroughly + async function checkWebGPUSupport() { + try { + // Detect iOS/Safari + const isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent) || + (navigator.platform === 'MacIntel' && navigator.maxTouchPoints > 1); + const isSafari = /^((?!chrome|crios|android|edg|firefox).)*safari/i.test(navigator.userAgent); + + // iOS and Safari have incomplete WebGPU support + if (isIOS) { + return { supported: false, reason: 'iOS does not support the required WebGPU features' }; + } + if (isSafari) { + // Desktop Safari might work, but check carefully + return { supported: false, reason: 'Safari does not support the required WebGPU features' }; + } + + // Check if WebGPU is available in the browser + if (!navigator.gpu) { + return { supported: false, reason: 'WebGPU not available in this browser' }; + } + + // Request adapter + const adapter = await navigator.gpu.requestAdapter(); + if (!adapter) { + return { supported: false, reason: 'No WebGPU adapter found' }; + } + + // Check adapter info + try { + const adapterInfo = await adapter.requestAdapterInfo(); + } catch (infoError) { + // Ignore adapter info errors + } + + // Request device to test if it actually works + const device = await adapter.requestDevice(); + if (!device) { + return { supported: false, reason: 'Failed to create WebGPU device' }; + } + + return { supported: true, adapter, device }; + } catch (error) { + // Handle specific iOS/Safari errors + const errorMsg = error.message || ''; + if (errorMsg.includes('subgroupMinSize') || errorMsg.includes('subgroup')) { + return { supported: false, reason: 'iOS/Safari does not support required WebGPU features (subgroup operations)' }; + } + return { supported: false, reason: error.message }; + } + } + + // Warmup models with dummy inference (no audio playback, no UI updates) + async function warmupModels() { + try { + const dummyText = 'Looking to integrate Supertonic into your product? We offer customized on-device SDK solutions tailored to your business needs. Our lightweight, high-performance TTS technology can be seamlessly integrated into mobile apps, IoT devices, automotive systems, and more. Try it now, and enjoy its speed.'; + const totalStep = 5; // Use minimal steps for faster warmup + const durationFactor = 1.0; + + const textList = [dummyText]; + const bsz = 1; + + // Use pre-computed style embeddings + const styleTtlTensor = currentStyleTtlTensor; + const styleDpTensor = currentStyleDpTensor; + + // Step 1: Estimate duration + const { textIds, textMask } = processors.textProcessor.call(textList, currentLanguage); + + const textIdsShape = [bsz, textIds[0].length]; + const textMaskShape = [bsz, 1, textMask[0][0].length]; + const textMaskTensor = arrayToTensor(textMask, textMaskShape); + + const dpResult = await models.dpOrt.run({ + text_ids: intArrayToTensor(textIds, textIdsShape), + style_dp: styleDpTensor, + text_mask: textMaskTensor + }); + + const durOnnx = Array.from(dpResult.duration.data); + for (let i = 0; i < durOnnx.length; i++) { + durOnnx[i] *= durationFactor; + } + const durReshaped = []; + for (let b = 0; b < bsz; b++) { + durReshaped.push([[durOnnx[b]]]); + } + + // Step 2: Encode text + const textEncResult = await models.textEncOrt.run({ + text_ids: intArrayToTensor(textIds, textIdsShape), + style_ttl: styleTtlTensor, + text_mask: textMaskTensor + }); + + const textEmbTensor = textEncResult.text_emb; + + // Step 3: Denoising + let { noisyLatent, latentMask } = sampleNoisyLatent(durReshaped, cfgs); + const latentShape = [bsz, noisyLatent[0].length, noisyLatent[0][0].length]; + const latentMaskShape = [bsz, 1, latentMask[0][0].length]; + const latentMaskTensor = arrayToTensor(latentMask, latentMaskShape); + + const totalStepArray = new Array(bsz).fill(totalStep); + const scalarShape = [bsz]; + const totalStepTensor = arrayToTensor(totalStepArray, scalarShape); + + for (let step = 0; step < totalStep; step++) { + const currentStepArray = new Array(bsz).fill(step); + + const vectorEstResult = await models.vectorEstOrt.run({ + noisy_latent: arrayToTensor(noisyLatent, latentShape), + text_emb: textEmbTensor, + style_ttl: styleTtlTensor, + text_mask: textMaskTensor, + latent_mask: latentMaskTensor, + total_step: totalStepTensor, + current_step: arrayToTensor(currentStepArray, scalarShape) + }); + + const denoisedLatent = Array.from(vectorEstResult.denoised_latent.data); + + // Update latent + let idx = 0; + for (let b = 0; b < noisyLatent.length; b++) { + for (let d = 0; d < noisyLatent[b].length; d++) { + for (let t = 0; t < noisyLatent[b][d].length; t++) { + noisyLatent[b][d][t] = denoisedLatent[idx++]; + } + } + } + } + + // Step 4: Generate waveform + const vocoderResult = await models.vocoderOrt.run({ + latent: arrayToTensor(noisyLatent, latentShape) + }); + + // Warmup complete - no need to process the audio further + } catch (error) { + console.warn('Warmup failed (non-critical):', error.message); + // Don't throw - warmup failure shouldn't prevent normal usage + } + } + + // Load models on page load + async function initializeModels() { + // If models are already loading, return the existing promise + if (modelsLoading && modelsLoadPromise) { + return modelsLoadPromise; + } + + // If models are already loaded, return immediately + if (modelsLoaded && models) { + return; + } + + modelsLoading = true; + // Disable speaker selection during model loading + const speakerItemsForLoading = document.querySelectorAll('.speaker-item[data-voice]'); + speakerItemsForLoading.forEach(item => item.classList.add('disabled')); + + // Disable language selection during model loading + const languageItemsForLoading = document.querySelectorAll('.speaker-item[data-language]'); + languageItemsForLoading.forEach(item => item.classList.add('disabled')); + + modelsLoadPromise = (async () => { + try { + showDemoStatus('Loading configuration...', 'info', 5); + + const basePath = 'assets/onnx'; + + // Load config + cfgs = await loadCfgs(basePath); + + // Check WebGPU support first + showDemoStatus('Checking WebGPU support...', 'info', 8); + const webgpuCheck = await checkWebGPUSupport(); + + // Determine execution provider based on WebGPU support + const useWebGPU = webgpuCheck.supported; + const executionProvider = useWebGPU ? 'webgpu' : 'wasm'; + + // If WebGPU is not supported, show subtle warning banner + if (!useWebGPU) { + showWasmWarning(); + } + + // Load models with appropriate backend + const backendName = useWebGPU ? 'WebGPU' : 'WASM'; + showDemoStatus(`${backendName} detected! Loading models...`, 'info', 10); + + const modelsLoadPromise = loadOnnxAll(basePath, { + executionProviders: [executionProvider], + graphOptimizationLevel: 'all' + }, (modelName, current, total) => { + const progress = 10 + (current / total) * 70; // 10-80% for model loading + showDemoStatus(`Loading models with ${backendName} (${current}/${total}): ${modelName}...`, 'info', progress); + }); + + // Load processors in parallel with models + const [loadedModels, loadedProcessors] = await Promise.all([ + modelsLoadPromise, + loadProcessors(basePath) + ]); + + models = loadedModels; + processors = loadedProcessors; + showDemoStatus('Loading reference embeddings...', 'info', 85); + + // Load pre-extracted embeddings for default voice + const embeddings = await loadStyleEmbeddings(currentVoice); + currentStyleTtlTensor = embeddings.styleTtl; + currentStyleDpTensor = embeddings.styleDp; + + showDemoStatus('Warming up models...', 'info', 90); + + // Warmup step: run inference once in background with dummy text + await warmupModels(); + + hideDemoStatus(); + + demoGenerateBtn.disabled = false; + demoTotalSteps.disabled = false; + demoSpeed.disabled = false; + + // Enable voice toggle buttons after models are loaded + const voiceToggleTexts = document.querySelectorAll('.voice-toggle-text'); + voiceToggleTexts.forEach(text => text.classList.remove('disabled')); + + // Validate initial text now that models are loaded + updateCharCounter(); + + // Mark models as loaded + modelsLoaded = true; + modelsLoading = false; + + // Re-enable speaker selection after model loading + speakerItemsForLoading.forEach(item => item.classList.remove('disabled')); + + // Re-enable language selection after model loading + languageItemsForLoading.forEach(item => item.classList.remove('disabled')); + + } catch (error) { + modelsLoading = false; + // Re-enable speaker selection on error too + speakerItemsForLoading.forEach(item => item.classList.remove('disabled')); + + // Re-enable language selection on error too + languageItemsForLoading.forEach(item => item.classList.remove('disabled')); + showDemoStatus(`Error: ${error.message}`, 'error'); + showDemoError(`Failed to initialize: ${error.message}. Check console for details.`); + throw error; + } + })(); + + return modelsLoadPromise; + } + + + // Supertonic synthesis function (extracted for parallel execution) + async function generateSupertonicSpeech(text, totalStep, durationFactor) { + const supertonicStartTime = Date.now(); + + try { + const textList = [text]; + const bsz = 1; + const sampleRate = cfgs.ae.sample_rate; + + // Use pre-computed style embeddings + const styleTtlTensor = currentStyleTtlTensor; + const styleDpTensor = currentStyleDpTensor; + + // Step 1: Estimate duration + const { textIds, textMask, unsupportedChars } = processors.textProcessor.call(textList, currentLanguage); + + // Check for unsupported characters + if (unsupportedChars && unsupportedChars.length > 0) { + const charList = unsupportedChars.map(c => `"${c}"`).join(', '); + throw new Error(`Unsupported characters: ${charList}`); + } + + const textIdsShape = [bsz, textIds[0].length]; + const textMaskShape = [bsz, 1, textMask[0][0].length]; + const textMaskTensor = arrayToTensor(textMask, textMaskShape); + + const dpResult = await models.dpOrt.run({ + text_ids: intArrayToTensor(textIds, textIdsShape), + style_dp: styleDpTensor, + text_mask: textMaskTensor + }); + + const durOnnx = Array.from(dpResult.duration.data); + // Apply duration factor to adjust speech length (once) + for (let i = 0; i < durOnnx.length; i++) { + durOnnx[i] *= durationFactor; + } + const durReshaped = []; + for (let b = 0; b < bsz; b++) { + durReshaped.push([[durOnnx[b]]]); + } + + // Step 2: Encode text + const textEncResult = await models.textEncOrt.run({ + text_ids: intArrayToTensor(textIds, textIdsShape), + style_ttl: styleTtlTensor, + text_mask: textMaskTensor + }); + + const textEmbTensor = textEncResult.text_emb; + + // Step 3: Denoising + let { noisyLatent, latentMask } = sampleNoisyLatent(durReshaped, cfgs); + const latentDim = noisyLatent[0].length; + const latentLen = noisyLatent[0][0].length; + const latentShape = [bsz, latentDim, latentLen]; + const latentMaskShape = [bsz, 1, latentMask[0][0].length]; + const latentMaskTensor = arrayToTensor(latentMask, latentMaskShape); + + // Pre-allocate flat buffer for latent data to avoid repeated allocations + const latentBufferSize = bsz * latentDim * latentLen; + const latentBuffer = new Float32Array(latentBufferSize); + + // Initialize latent buffer from noisyLatent + let initIdx = 0; + for (let b = 0; b < bsz; b++) { + for (let d = 0; d < latentDim; d++) { + for (let t = 0; t < latentLen; t++) { + latentBuffer[initIdx++] = noisyLatent[b][d][t]; + } + } + } + + // Prepare constant tensors + const scalarShape = [bsz]; + const totalStepTensor = arrayToTensor(new Array(bsz).fill(totalStep), scalarShape); + + // Pre-create all step tensors to avoid repeated allocations + const stepTensors = []; + for (let step = 0; step < totalStep; step++) { + stepTensors.push(arrayToTensor(new Array(bsz).fill(step), scalarShape)); + } + + for (let step = 0; step < totalStep; step++) { + // Create tensor from pre-allocated buffer + const noisyLatentTensor = new ort.Tensor('float32', latentBuffer, latentShape); + + const vectorEstResult = await models.vectorEstOrt.run({ + noisy_latent: noisyLatentTensor, + text_emb: textEmbTensor, + style_ttl: styleTtlTensor, + text_mask: textMaskTensor, + latent_mask: latentMaskTensor, + total_step: totalStepTensor, + current_step: stepTensors[step] + }); + + // Copy denoised result directly into pre-allocated buffer + const denoisedData = vectorEstResult.denoised_latent.data; + latentBuffer.set(denoisedData); + } + + // Step 4: Generate waveform - use latentBuffer directly + const vocoderResult = await models.vocoderOrt.run({ + latent: new ort.Tensor('float32', latentBuffer, latentShape) + }); + + const wavBatch = vocoderResult.wav_tts.data; + const wavLen = Math.floor(sampleRate * durOnnx[0]); + // Create a copy of the audio data (not a view) to prevent buffer reuse issues + const audioData = wavBatch.slice(0, wavLen); + + // Calculate times for Supertonic + const supertonicEndTime = Date.now(); + const supertonicProcessingTime = (supertonicEndTime - supertonicStartTime) / 1000; + const audioDurationSec = durOnnx[0]; + + return { + success: true, + processingTime: supertonicProcessingTime, + audioDuration: audioDurationSec, + audioData: audioData, + sampleRate: sampleRate, + text: text + }; + } catch (error) { + return { + success: false, + error: error.message, + text: text + }; + } + } + + // Format time: 60초 미만 -> 00.00, 60분 미만 -> 00:00.00, 60분 이상 -> 00:00:00.00 + function formatTimeDetailed(seconds) { + const hours = Math.floor(seconds / 3600); + const mins = Math.floor((seconds % 3600) / 60); + const secs = seconds % 60; + const ms = Math.floor((secs % 1) * 100); + const wholeSecs = Math.floor(secs); + + if (seconds < 60) { + return `${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`; + } else if (seconds < 3600) { + return `${mins.toString().padStart(2, '0')}:${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`; + } else { + return `${hours.toString().padStart(2, '0')}:${mins.toString().padStart(2, '0')}:${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`; + } + } + + // Generate Supertonic speech with chunking support and progressive playback + async function generateSupertonicSpeechChunked(text, totalStep, durationFactor, onFirstChunkReady, onChunkAdded) { + const supertonicStartTime = Date.now(); + const sampleRate = cfgs.ae.sample_rate; + const silenceDuration = 0.3; // 0.3 seconds of silence between chunks + + try { + // Split text into chunks + const chunks = chunkText(text); + + const audioDataArrays = []; + const durations = []; + const silenceSamples = Math.floor(silenceDuration * sampleRate); + let firstChunkEndTime = 0; + let firstChunkTime = 0; + + // Generate speech for each chunk + for (let i = 0; i < chunks.length; i++) { + const chunkText = chunks[i]; + + const result = await generateSupertonicSpeech(chunkText, totalStep, durationFactor); + + if (!result.success) { + throw new Error(`Failed to generate chunk ${i + 1}: ${result.error}`); + } + + // Use raw Float32Array directly - no WAV encode/decode round-trip + const audioData = result.audioData; + + audioDataArrays.push(audioData); + durations.push(result.audioDuration); + + // Progressive playback: pass raw Float32Array directly to callbacks + if (i === 0 && onFirstChunkReady) { + // First chunk ready - send it immediately + firstChunkEndTime = Date.now(); + firstChunkTime = (firstChunkEndTime - supertonicStartTime) / 1000; + + const totalDurationSoFar = result.audioDuration; + const processedChars = chunks[0].length; + // Pass raw audio data and sample rate directly + onFirstChunkReady(audioData, sampleRate, totalDurationSoFar, text, chunks.length, firstChunkTime, processedChars); + } else if (i > 0 && onChunkAdded) { + // Subsequent chunks - send just the new chunk + const totalDurationSoFar = durations.slice(0, i + 1).reduce((sum, dur) => sum + dur, 0) + silenceDuration * i; + const currentProcessingTime = (Date.now() - supertonicStartTime) / 1000; + const processedChars = chunks.slice(0, i + 1).reduce((sum, chunk) => sum + chunk.length, 0); + // Pass raw audio data and sample rate directly + onChunkAdded(audioData, sampleRate, totalDurationSoFar, i + 1, chunks.length, currentProcessingTime, processedChars); + } + } + + // Concatenate all audio chunks with silence for final result + const totalDuration = durations.reduce((sum, dur) => sum + dur, 0) + silenceDuration * (chunks.length - 1); + + // Calculate total samples needed + let totalSamples = 0; + for (let i = 0; i < audioDataArrays.length; i++) { + totalSamples += audioDataArrays[i].length; + if (i < audioDataArrays.length - 1) { + totalSamples += silenceSamples; + } + } + + const wavCat = new Float32Array(totalSamples); + + let currentIdx = 0; + for (let i = 0; i < audioDataArrays.length; i++) { + // Copy audio data + const audioData = audioDataArrays[i]; + wavCat.set(audioData, currentIdx); + currentIdx += audioData.length; + + // Add silence if not the last chunk + if (i < audioDataArrays.length - 1) { + // Silence is already zeros in Float32Array, just skip the indices + currentIdx += silenceSamples; + } + } + + // Create final WAV file + const wavBuffer = writeWavFile(wavCat, sampleRate); + const blob = new Blob([wavBuffer], { type: 'audio/wav' }); + const url = URL.createObjectURL(blob); + + const supertonicEndTime = Date.now(); + const supertonicProcessingTime = (supertonicEndTime - supertonicStartTime) / 1000; + + return { + success: true, + processingTime: supertonicProcessingTime, + audioDuration: totalDuration, + url: url, + text: text, + firstChunkTime: firstChunkTime + }; + } catch (error) { + return { + success: false, + error: error.message, + text: text + }; + } + } + + // Main synthesis function + async function generateSpeech() { + let text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); + + // Validate text input + const validation = validateTextInput(text); + if (!validation.valid) { + showDemoError(validation.message); + return; + } + + if (!models || !cfgs || !processors) { + showDemoError('Models are still loading. Please wait.'); + return; + } + + if (!currentStyleTtlTensor || !currentStyleDpTensor) { + showDemoError('Reference embeddings are not ready. Please wait.'); + return; + } + + // Validate characters before generation + const charValidation = validateCharacters(text); + if (!charValidation.valid && charValidation.unsupportedChars.length > 0) { + const charList = charValidation.unsupportedChars.map(c => `"${c}"`).join(', '); + showDemoError(`Cannot generate speech: Unsupported characters found: ${charList}`); + return; + } + + currentGenerationTextLength = text.length; + + try { + isGenerating = true; + demoGenerateBtn.disabled = true; + + // Disable speaker selection during generation + const speakerItemsForGeneration = document.querySelectorAll('.speaker-item[data-voice]'); + speakerItemsForGeneration.forEach(item => item.classList.add('disabled')); + + // Disable language selection during generation + const languageItemsForGeneration = document.querySelectorAll('.speaker-item[data-language]'); + languageItemsForGeneration.forEach(item => item.classList.add('disabled')); + + hideDemoError(); + hideDemoStatus(); // Hide the status box when starting generation + + // Clean up previous audio playback + if (audioContext) { + // Stop all scheduled sources + scheduledSources.forEach(source => { + try { + source.stop(); + } catch (e) { + // Already stopped + } + }); + scheduledSources = []; + + // Close audio context + if (audioContext.state !== 'closed') { + audioContext.close(); + } + audioContext = null; + } + + // Cancel animation frame + if (animationFrameId) { + cancelAnimationFrame(animationFrameId); + animationFrameId = null; + } + + // Clean up all custom audio players + customAudioPlayers.forEach(player => { + if (player.cleanup) { + player.cleanup(); + } + }); + customAudioPlayers = []; + + // Reset state + audioChunks = []; + totalDuration = 0; + startTime = 0; + pauseTime = 0; + isPaused = false; + isPlaying = false; + firstChunkGenerationTime = 0; // Processing time for first chunk + totalChunks = 0; + nextScheduledTime = 0; // Next time to schedule audio chunk + + // Show result shell(s) immediately + const createInitialResultItem = (system, titleMain, titleSub, titleColor, includeStatus) => { + const titleStatus = includeStatus + ? `⏳ Running...` + : ''; + return ` +
+
+ ${titleMain} + ${titleSub} + ${titleStatus} +
+
+ +
+
--
+
Processing Time
+
+
+
--
+
Chars/sec
+
+
+
--
+
RTF
+
+
+
+
Generating speech...
+
+
+ `; + }; + const supertonicInitial = createInitialResultItem( + 'supertonic', + 'Supertonic', + 'On-Device', + 'var(--accent-yellow)', + false + ); + demoResults.style.display = 'flex'; + demoResults.innerHTML = supertonicInitial; + + const totalStep = parseInt(demoTotalSteps.value); + const speed = parseFloat(demoSpeed.value); + const durationFactor = speedToDurationFactor(speed); + + // Track which one finishes first + let latestSupertonicProcessedChars = 0; + + // Helper functions for custom player + const formatTime = (seconds, { trimMobile = false } = {}) => { + const mins = Math.floor(seconds / 60); + const secs = seconds % 60; + const secString = secs.toFixed(2).padStart(5, '0'); + let formatted = `${mins}:${secString}`; + if (trimMobile) { + formatted = trimDecimalsForMobile(formatted); + } + return formatted; + }; + + const updateProgress = () => { + if (!isPlaying || !audioContext) return; + + const currentTime = isPaused ? pauseTime : (audioContext.currentTime - startTime); + const progress = totalDuration > 0 ? (currentTime / totalDuration) * 100 : 0; + + if (progressFill) { + progressFill.style.width = `${Math.min(progress, 100)}%`; + } + if (currentTimeDisplay) { + currentTimeDisplay.textContent = formatTime(Math.min(currentTime, totalDuration), { trimMobile: true }); + } + + if (currentTime < totalDuration) { + animationFrameId = requestAnimationFrame(updateProgress); + } else { + // Playback finished + isPlaying = false; + isPaused = false; + if (playPauseBtn) { + playPauseBtn.innerHTML = PLAY_ICON_SVG; + } + } + }; + + const togglePlayPause = () => { + if (!audioContext || audioChunks.length === 0) return; + + if (isPaused) { + // Resume from paused position + pauseAllPlayersExcept(supertonicPlayerRecord); + + const seekTime = pauseTime; + + // Find which chunk we should start from + let accumulatedTime = 0; + let startChunkIndex = 0; + let offsetInChunk = seekTime; + + for (let i = 0; i < audioChunks.length; i++) { + const chunkDuration = audioChunks[i].buffer.duration; + if (accumulatedTime + chunkDuration > seekTime) { + startChunkIndex = i; + offsetInChunk = seekTime - accumulatedTime; + break; + } + accumulatedTime += chunkDuration + 0.3; + } + + // Stop any existing sources + scheduledSources.forEach(source => { + try { + source.stop(); + } catch (e) { + // Already stopped + } + }); + scheduledSources = []; + + // Resume AudioContext if suspended + if (audioContext.state === 'suspended') { + audioContext.resume(); + } + + // Reschedule from the pause point + startTime = audioContext.currentTime - seekTime; + let nextStartTime = audioContext.currentTime; + + for (let i = startChunkIndex; i < audioChunks.length; i++) { + const source = audioContext.createBufferSource(); + source.buffer = audioChunks[i].buffer; + source.connect(audioContext.destination); + + if (i === startChunkIndex) { + source.start(nextStartTime, offsetInChunk); + nextStartTime += (audioChunks[i].buffer.duration - offsetInChunk); + } else { + source.start(nextStartTime); + nextStartTime += audioChunks[i].buffer.duration; + } + + if (i < audioChunks.length - 1) { + nextStartTime += 0.3; + } + + scheduledSources.push(source); + } + + nextScheduledTime = nextStartTime; + + isPaused = false; + isPlaying = true; + playPauseBtn.innerHTML = PAUSE_ICON_SVG; + updateProgress(); + } else if (isPlaying) { + // Pause playback + pauseTime = audioContext.currentTime - startTime; + audioContext.suspend(); + isPaused = true; + playPauseBtn.innerHTML = PLAY_ICON_SVG; + if (animationFrameId) { + cancelAnimationFrame(animationFrameId); + } + } else { + // Was finished, restart from beginning + pauseAllPlayersExcept(supertonicPlayerRecord); + + pauseTime = 0; + + // Resume AudioContext if suspended + if (audioContext.state === 'suspended') { + audioContext.resume(); + } + + // Stop any existing sources + scheduledSources.forEach(source => { + try { + source.stop(); + } catch (e) { + // Already stopped + } + }); + scheduledSources = []; + + // Restart from beginning + startTime = audioContext.currentTime; + let nextStartTime = audioContext.currentTime; + + for (let i = 0; i < audioChunks.length; i++) { + const source = audioContext.createBufferSource(); + source.buffer = audioChunks[i].buffer; + source.connect(audioContext.destination); + source.start(nextStartTime); + nextStartTime += audioChunks[i].buffer.duration; + + if (i < audioChunks.length - 1) { + nextStartTime += 0.3; + } + + scheduledSources.push(source); + } + + nextScheduledTime = nextStartTime; + + isPlaying = true; + isPaused = false; + playPauseBtn.innerHTML = PAUSE_ICON_SVG; + updateProgress(); + } + }; + + const seekTo = (percentage) => { + if (!audioContext || audioChunks.length === 0) return; + + const seekTime = (percentage / 100) * totalDuration; + + // Remember current playing state + const wasPlaying = isPlaying; + const wasPaused = isPaused; + + // Stop all current sources + scheduledSources.forEach(source => { + try { + source.stop(); + } catch (e) { + // Already stopped + } + }); + scheduledSources = []; + + // Cancel animation + if (animationFrameId) { + cancelAnimationFrame(animationFrameId); + } + + // Find which chunk we should start from + let accumulatedTime = 0; + let startChunkIndex = 0; + let offsetInChunk = seekTime; + + for (let i = 0; i < audioChunks.length; i++) { + const chunkDuration = audioChunks[i].buffer.duration; + if (accumulatedTime + chunkDuration > seekTime) { + startChunkIndex = i; + offsetInChunk = seekTime - accumulatedTime; + break; + } + accumulatedTime += chunkDuration + 0.3; // Include silence + } + + // If paused or finished, just update the pause position + if (wasPaused || !wasPlaying) { + pauseTime = seekTime; + + // Update UI + if (progressFill) { + const progress = (seekTime / totalDuration) * 100; + progressFill.style.width = `${Math.min(progress, 100)}%`; + } + if (currentTimeDisplay) { + currentTimeDisplay.textContent = formatTime(seekTime, { trimMobile: true }); + } + + // Set to paused state so play button will resume from seek position + isPaused = true; + isPlaying = true; // Valid state for playback + + if (playPauseBtn) { + playPauseBtn.innerHTML = PLAY_ICON_SVG; + } + + return; + } + + // Resume AudioContext if it was suspended + if (audioContext.state === 'suspended') { + audioContext.resume(); + } + + // Reschedule from the seek point + startTime = audioContext.currentTime - seekTime; + let nextStartTime = audioContext.currentTime; + + for (let i = startChunkIndex; i < audioChunks.length; i++) { + const source = audioContext.createBufferSource(); + source.buffer = audioChunks[i].buffer; + source.connect(audioContext.destination); + + if (i === startChunkIndex) { + // Start from offset + source.start(nextStartTime, offsetInChunk); + nextStartTime += (audioChunks[i].buffer.duration - offsetInChunk); + } else { + source.start(nextStartTime); + nextStartTime += audioChunks[i].buffer.duration; + } + + // Add silence between chunks + if (i < audioChunks.length - 1) { + nextStartTime += 0.3; + } + + scheduledSources.push(source); + } + + // Update nextScheduledTime for any future chunks + nextScheduledTime = nextStartTime; + + // Resume playing state + isPlaying = true; + isPaused = false; + if (playPauseBtn) { + playPauseBtn.innerHTML = PAUSE_ICON_SVG; + } + + // Restart progress animation + updateProgress(); + }; + + // Callback for first chunk ready - create custom player and start playback + // Helper function to create AudioBuffer directly from Float32Array + const createAudioBufferFromFloat32 = (audioData, sampleRate) => { + const audioBuffer = audioContext.createBuffer(1, audioData.length, sampleRate); + audioBuffer.getChannelData(0).set(audioData); + return audioBuffer; + }; + + const onFirstChunkReady = async (audioData, sampleRate, duration, text, numChunks, firstChunkTime, processedChars) => { + totalChunks = numChunks; + firstChunkGenerationTime = firstChunkTime; + + const container = document.getElementById('demoResults'); + + + const textLength = currentGenerationTextLength > 0 + ? currentGenerationTextLength + : (text ? text.length : 0); + const isBatch = textLength >= getMaxChunkLength(); + const processingTimeStr = isBatch && firstChunkTime + ? `${formatTimeDetailed(firstChunkTime)} / ${formatTimeDetailed(firstChunkTime)}` + : formatTimeDetailed(firstChunkTime); + const safeInitialChars = typeof processedChars === 'number' ? processedChars : 0; + const displayedInitialChars = textLength > 0 ? Math.min(safeInitialChars, textLength) : safeInitialChars; + const charsPerSec = firstChunkTime > 0 && displayedInitialChars > 0 + ? (displayedInitialChars / firstChunkTime).toFixed(1) + : '0.0'; + const rtf = duration > 0 && firstChunkTime > 0 ? (firstChunkTime / duration).toFixed(3) : '-'; + const progressValue = textLength > 0 ? Math.min(100, (displayedInitialChars / textLength) * 100) : 0; + + const resultItemEl = document.getElementById('supertonic-result'); + if (!resultItemEl) { + console.warn('Supertonic result container not found.'); + return; + } + + resultItemEl.classList.remove('generating'); + resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); + + const titleMainEl = resultItemEl.querySelector('.title-main'); + if (titleMainEl) { + titleMainEl.textContent = 'Supertonic'; + titleMainEl.style.color = 'var(--accent-yellow)'; + } + const titleSubEl = resultItemEl.querySelector('.title-sub'); + if (titleSubEl) { + titleSubEl.textContent = 'On-Device'; + } + + const infoContainer = resultItemEl.querySelector('.demo-result-info'); + if (infoContainer) { + infoContainer.classList.remove('error'); + } + const timeElInitial = document.getElementById('supertonic-time'); + if (timeElInitial) { + timeElInitial.innerHTML = formatStatValueWithSuffix(processingTimeStr, 's', { firstLabel: true }); + } + const cpsElInitial = document.getElementById('supertonic-cps'); + if (cpsElInitial) { + cpsElInitial.textContent = charsPerSec; + } + const rtfElInitial = document.getElementById('supertonic-rtf'); + if (rtfElInitial) { + rtfElInitial.innerHTML = formatStatValueWithSuffix(rtf, 'x'); + } + + const playerContainer = resultItemEl.querySelector('.custom-audio-player'); + if (playerContainer) { + playerContainer.style.display = ''; + playerContainer.innerHTML = ` + +
0:00.00
+
+
+
+
+
+
${formatTime(duration, { trimMobile: true })}
+ + `; + } + + container.style.display = 'flex'; + latestSupertonicProcessedChars = displayedInitialChars; + + // Get UI elements + playPauseBtn = document.getElementById('play-pause-btn'); + progressBar = document.getElementById('progress-container'); + currentTimeDisplay = document.getElementById('current-time'); + durationDisplay = document.getElementById('total-duration'); + progressFill = document.getElementById('progress-fill'); + + // Initialize Web Audio API + audioContext = new (window.AudioContext || window.webkitAudioContext)(); + startTime = audioContext.currentTime; + totalDuration = duration; + isPlaying = true; + isPaused = false; + + // Create Supertonic player record and register it + const pausePlayback = () => { + if (!audioContext || audioContext.state === 'closed') return; + if (isPlaying) { + pauseTime = audioContext.currentTime - startTime; + scheduledSources.forEach(source => { + try { + source.stop(); + } catch (e) { + // Already stopped + } + }); + scheduledSources = []; + audioContext.suspend(); + isPaused = true; + isPlaying = false; + if (playPauseBtn) { + playPauseBtn.innerHTML = PLAY_ICON_SVG; + } + if (animationFrameId) { + cancelAnimationFrame(animationFrameId); + } + } + }; + + supertonicPlayerRecord = { + audioContext: audioContext, + pausePlayback: pausePlayback + }; + + // Remove old Supertonic player if exists and add new one + customAudioPlayers = customAudioPlayers.filter(p => p !== supertonicPlayerRecord && p.audioContext !== audioContext); + customAudioPlayers.push(supertonicPlayerRecord); + + // Pause all other players before starting Supertonic + pauseAllPlayersExcept(supertonicPlayerRecord); + + // Create AudioBuffer directly from Float32Array - no WAV encode/decode + const audioBuffer = createAudioBufferFromFloat32(audioData, sampleRate); + + audioChunks.push({ buffer: audioBuffer, duration: audioBuffer.duration }); + + // Play first chunk immediately + const source = audioContext.createBufferSource(); + source.buffer = audioBuffer; + source.connect(audioContext.destination); + source.start(audioContext.currentTime); + scheduledSources.push(source); + + // Set next scheduled time for additional chunks + nextScheduledTime = audioContext.currentTime + audioBuffer.duration + 0.3; // Add silence gap + + // Setup player controls + playPauseBtn.addEventListener('click', togglePlayPause); + + progressBar.addEventListener('click', (e) => { + const rect = progressBar.getBoundingClientRect(); + const percentage = ((e.clientX - rect.left) / rect.width) * 100; + seekTo(percentage); + }); + + // Start progress animation + updateProgress(); + }; + + // Callback for each additional chunk - schedule seamlessly + const onChunkAdded = async (audioData, sampleRate, duration, chunkIndex, totalChunks, currentProcessingTime, processedChars) => { + if (!audioContext) return; + + // Create AudioBuffer directly from Float32Array - no WAV encode/decode + const audioBuffer = createAudioBufferFromFloat32(audioData, sampleRate); + + const chunkDuration = audioBuffer.duration; + audioChunks.push({ buffer: audioBuffer, duration: chunkDuration }); + + // Schedule the new chunk at the pre-calculated time + const source = audioContext.createBufferSource(); + source.buffer = audioBuffer; + source.connect(audioContext.destination); + source.start(nextScheduledTime); + scheduledSources.push(source); + + // Update next scheduled time for the next chunk + nextScheduledTime = nextScheduledTime + audioBuffer.duration + 0.3; // Add silence gap + + // Update total duration + totalDuration = duration; + + // Update duration display with smooth animation + if (durationDisplay) { + durationDisplay.textContent = formatTime(duration, { trimMobile: true }); + durationDisplay.style.transition = 'color 0.3s'; + durationDisplay.style.color = '#ffffff'; + setTimeout(() => { + durationDisplay.style.color = ''; + }, 300); + } + + // Update info display + const textLengthCandidate = currentGenerationTextLength > 0 + ? currentGenerationTextLength + : (demoTextInput.textContent || demoTextInput.innerText || '').trim().length; + const textLength = textLengthCandidate; + const isBatch = textLength >= getMaxChunkLength(); + const timeEl = document.getElementById('supertonic-time'); + const durationEl = document.getElementById('supertonic-duration'); + const cpsEl = document.getElementById('supertonic-cps'); + const rtfEl = document.getElementById('supertonic-rtf'); + const effectiveProcessedChars = typeof processedChars === 'number' ? processedChars : latestSupertonicProcessedChars; + + if (effectiveProcessedChars < latestSupertonicProcessedChars) { + return; + } + + const clampedProcessedChars = textLength > 0 ? Math.min(effectiveProcessedChars, textLength) : effectiveProcessedChars; + const progressValue = textLength > 0 ? Math.min(100, (clampedProcessedChars / textLength) * 100) : 0; + if (durationEl) { + durationEl.textContent = formatTimeDetailed(duration); + } + if (timeEl && isBatch && firstChunkGenerationTime > 0 && currentProcessingTime) { + const timeDisplay = `${formatTimeDetailed(firstChunkGenerationTime)} / ${formatTimeDetailed(currentProcessingTime)}`; + timeEl.innerHTML = formatStatValueWithSuffix(timeDisplay, 's', { firstLabel: true }); + } + if (cpsEl && currentProcessingTime > 0 && clampedProcessedChars >= 0) { + const charsPerSec = (clampedProcessedChars / currentProcessingTime).toFixed(1); + cpsEl.textContent = charsPerSec; + } + if (rtfEl && duration > 0 && currentProcessingTime > 0) { + const rtf = (currentProcessingTime / duration).toFixed(3); + rtfEl.innerHTML = formatStatValueWithSuffix(rtf, 'x'); + } + const resultItemEl = document.getElementById('supertonic-result'); + if (resultItemEl) { + resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); + } + latestSupertonicProcessedChars = clampedProcessedChars; + }; + + // Start all syntheses simultaneously + const result = await generateSupertonicSpeechChunked( + text, + totalStep, + durationFactor, + onFirstChunkReady, + onChunkAdded + ); + + if (result.success) { + const textLength = result.text ? result.text.length : 0; + const isBatch = textLength >= getMaxChunkLength(); + const processingTimeStr = isBatch && firstChunkGenerationTime > 0 + ? `${formatTimeDetailed(firstChunkGenerationTime)} / ${formatTimeDetailed(result.processingTime)}` + : formatTimeDetailed(result.processingTime); + const charsPerSec = result.processingTime > 0 ? (textLength / result.processingTime).toFixed(1) : '0.0'; + const progressValue = textLength > 0 ? 100 : 0; + + const timeEl = document.getElementById('supertonic-time'); + const durationEl = document.getElementById('supertonic-duration'); + const cpsEl = document.getElementById('supertonic-cps'); + const rtfEl = document.getElementById('supertonic-rtf'); + + if (timeEl) timeEl.innerHTML = formatStatValueWithSuffix(processingTimeStr, 's', { firstLabel: true }); + if (durationEl) durationEl.textContent = formatTimeDetailed(result.audioDuration); + latestSupertonicProcessedChars = textLength; + if (cpsEl) cpsEl.textContent = charsPerSec; + if (rtfEl) { + const rtf = result.audioDuration > 0 ? (result.processingTime / result.audioDuration).toFixed(3) : '-'; + rtfEl.innerHTML = formatStatValueWithSuffix(rtf, 'x'); + } + const resultItemEl = document.getElementById('supertonic-result'); + if (resultItemEl) { + resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); + } + + // Final duration update (if custom player was used) + if (audioContext && audioChunks.length > 0) { + totalDuration = result.audioDuration; + if (durationDisplay) { + durationDisplay.textContent = formatTime(result.audioDuration, { trimMobile: true }); + } + } + + // Always show download button + const downloadBtn = document.getElementById('supertonic-download'); + if (downloadBtn) { + downloadBtn.parentElement.style.display = 'block'; + downloadBtn.onclick = () => downloadDemoAudio(result.url, 'supertonic_speech.wav'); + } + } + + } catch (error) { + showDemoStatus(`Error: ${error.message}`, 'error'); + showDemoError(`Error during synthesis: ${error.message}`); + console.error('Synthesis error:', error); + + // Restore placeholder + demoResults.style.display = 'none'; + demoResults.innerHTML = ` +
+
🎙️
+

Your generated speech will appear here

+
+ `; + } finally { + isGenerating = false; + demoGenerateBtn.disabled = false; + + // Re-enable speaker selection after generation + const speakerItemsForGeneration = document.querySelectorAll('.speaker-item[data-voice]'); + speakerItemsForGeneration.forEach(item => item.classList.remove('disabled')); + + // Re-enable language selection after generation + const languageItemsForGeneration = document.querySelectorAll('.speaker-item[data-language]'); + languageItemsForGeneration.forEach(item => item.classList.remove('disabled')); + } + } + + // Download handler (make it global) + window.downloadDemoAudio = function(url, filename) { + const a = document.createElement('a'); + a.href = url; + a.download = filename; + a.click(); + }; + + // Helper function to convert speed to durationFactor + function speedToDurationFactor(speed, offset=0.05) { + return 1 / (speed + offset); + } + + // Update slider value displays + function updateSliderValues() { + demoTotalStepsValue.textContent = demoTotalSteps.value + ' Steps'; + // Display speed with 'x' suffix (e.g., 1.0x, 0.7x, 1.5x) + const speed = parseFloat(demoSpeed.value); + demoSpeedValue.textContent = speed.toFixed(2) + 'x'; + } + + // Attach slider event listeners + demoTotalSteps.addEventListener('input', updateSliderValues); + demoSpeed.addEventListener('input', updateSliderValues); + + // Initialize slider values + updateSliderValues(); + + // Attach generate function to button + demoGenerateBtn.addEventListener('click', generateSpeech); + + // Preset text items (defined before input listener to share scope) + const presetItems = document.querySelectorAll('.preset-item[data-preset]'); + const freeformBtn = document.getElementById('freeformBtn'); + let currentPreset = 'quote'; // Initialize with quote + // currentLanguage is already declared above (line 902) + let isPresetChanging = false; // Flag to track if text change is from preset button + + // Helper function to update active button state + function updateActiveButton(presetType) { + // Remove active from all preset items + presetItems.forEach(item => item.classList.remove('active')); + + // Add active to the specified item + if (presetType) { + const targetItem = document.querySelector(`.preset-item[data-preset="${presetType}"]`); + if (targetItem) { + targetItem.classList.add('active'); + } + } + currentPreset = presetType; + updateQuoteModeState(presetType === 'quote'); + } + + function updateQuoteModeState(isQuote) { + if (!demoResults) return; + demoResults.classList.toggle('quote-mode', Boolean(isQuote)); + } + + // Initialize quote button active state + updateActiveButton('quote'); + if (presetTexts.quote && typeof presetTexts.quote === 'object' && presetTexts.quote[currentLanguage]) { + demoTextInput.textContent = presetTexts.quote[currentLanguage]; + updateCharCounter(); + } + + presetItems.forEach(item => { + item.addEventListener('click', () => { + const presetType = item.getAttribute('data-preset'); + + if (presetType === 'freeform') { + // Freeform item: clear text + isPresetChanging = true; + demoTextInput.textContent = ''; + updateCharCounter(); + updateActiveButton('freeform'); + isPresetChanging = false; + } else { + // Other preset items: set text + const preset = presetTexts[presetType]; + if (preset && typeof preset === 'object' && preset[currentLanguage]) { + const text = preset[currentLanguage]; + isPresetChanging = true; + demoTextInput.textContent = text; + updateCharCounter(); + updateActiveButton(presetType); + isPresetChanging = false; + } else if (preset && typeof preset === 'string') { + // Fallback for old format (shouldn't happen, but just in case) + isPresetChanging = true; + demoTextInput.textContent = preset; + updateCharCounter(); + updateActiveButton(presetType); + isPresetChanging = false; + } + } + }); + }); + + // Handle paste event to remove styles and paste only text + demoTextInput.addEventListener('paste', (e) => { + e.preventDefault(); + const text = (e.clipboardData || window.clipboardData).getData('text/plain'); + const selection = window.getSelection(); + if (!selection.rangeCount) return; + + const range = selection.getRangeAt(0); + range.deleteContents(); + const textNode = document.createTextNode(text); + range.insertNode(textNode); + range.setStartAfter(textNode); + range.collapse(true); + selection.removeAllRanges(); + selection.addRange(range); + + // Trigger input event to update character counter + demoTextInput.dispatchEvent(new Event('input', { bubbles: true })); + }); + + // Update character counter on input + let previousTextValue = demoTextInput.textContent || demoTextInput.innerText || ''; + // Update left border line height to match demo-input-section height + const demoInputSection = document.querySelector('.demo-input-section'); + function updateLeftBorderHeight() { + if (demoInputSection) { + const height = demoInputSection.offsetHeight; + demoInputSection.style.setProperty('--demo-text-input-height', `${height}px`); + } + } + + // Initialize and observe height changes + updateLeftBorderHeight(); + const resizeObserver = new ResizeObserver(() => { + updateLeftBorderHeight(); + }); + if (demoInputSection) { + resizeObserver.observe(demoInputSection); + } + + // Auto-calculate text input height for screens wider than 768px + function calculateTextInputHeight() { + if (window.innerWidth <= 768) { + // Reset to default height for screens 768px and below + demoTextInput.style.height = ''; + return; + } + + const viewportHeight = window.innerHeight; + const interactiveDemoEl = document.querySelector('.interactive-demo'); + const containerEl = document.querySelector('.container'); + const headerWrapperEl = document.querySelector('.demo-header-wrapper'); + const controlsEl = document.querySelector('.demo-controls'); + const inputLabelEl = document.querySelector('.demo-input-label'); + const presetRowEl = document.querySelector('#presetControlsRow'); + const outputSectionEl = document.querySelector('.demo-output-section'); + const contentEl = document.querySelector('.demo-content'); + + // Get computed styles for gaps and paddings + const interactiveDemoStyle = window.getComputedStyle(interactiveDemoEl || document.body); + const containerStyle = window.getComputedStyle(containerEl || document.body); + const contentStyle = window.getComputedStyle(contentEl || document.body); + + // Calculate total height of elements above and below text input + let totalHeight = 0; + + // Interactive demo padding + const interactiveDemoPaddingTop = parseFloat(interactiveDemoStyle.paddingTop) || 0; + const interactiveDemoPaddingBottom = parseFloat(interactiveDemoStyle.paddingBottom) || 0; + totalHeight += interactiveDemoPaddingTop + interactiveDemoPaddingBottom; + + // Container padding + const containerPaddingTop = parseFloat(containerStyle.paddingTop) || 0; + const containerPaddingBottom = parseFloat(containerStyle.paddingBottom) || 0; + totalHeight += containerPaddingTop + containerPaddingBottom; + + // Header wrapper + if (headerWrapperEl) { + totalHeight += headerWrapperEl.offsetHeight; + } + + // Demo controls + if (controlsEl) { + totalHeight += controlsEl.offsetHeight; + } + + // Demo content gap (top) + const contentGap = parseFloat(contentStyle.gap) || 0; + totalHeight += contentGap; + + // Input label + if (inputLabelEl) { + totalHeight += inputLabelEl.offsetHeight; + } + + // Preset controls row + if (presetRowEl) { + totalHeight += presetRowEl.offsetHeight; + } + + // Demo content gap (bottom) + totalHeight += contentGap; + + // Output section + if (outputSectionEl) { + totalHeight += outputSectionEl.offsetHeight; + } + + // Calculate available height for text input + const availableHeight = viewportHeight - totalHeight - 275; // Subtract 275px + + // Set minimum height (e.g., 200px) and maximum height + const minHeight = 200; + const maxHeight = availableHeight - 20; // 20px buffer + + if (availableHeight > minHeight) { + demoTextInput.style.height = `${Math.max(minHeight, maxHeight)}px`; + } else { + demoTextInput.style.height = `${minHeight}px`; + } + } + + // Calculate on load and resize + calculateTextInputHeight(); + window.addEventListener('resize', calculateTextInputHeight); + + // Observe elements that might change height + const heightObserver = new ResizeObserver(() => { + calculateTextInputHeight(); + }); + + const headerWrapperEl = document.querySelector('.demo-header-wrapper'); + const controlsEl = document.querySelector('.demo-controls'); + const presetRowEl = document.querySelector('#presetControlsRow'); + const outputSectionEl = document.querySelector('.demo-output-section'); + + if (headerWrapperEl) heightObserver.observe(headerWrapperEl); + if (controlsEl) heightObserver.observe(controlsEl); + if (presetRowEl) heightObserver.observe(presetRowEl); + if (outputSectionEl) heightObserver.observe(outputSectionEl); + + // Auto-hide scrollbar functionality + let scrollbarTimeout; + demoTextInput.addEventListener('scroll', () => { + // Add scrolling class to show scrollbar + demoTextInput.classList.add('scrolling'); + + // Clear existing timeout + if (scrollbarTimeout) { + clearTimeout(scrollbarTimeout); + } + + // Hide scrollbar after 1.5 seconds of no scrolling + scrollbarTimeout = setTimeout(() => { + demoTextInput.classList.remove('scrolling'); + }, 1500); + }); + + demoTextInput.addEventListener('input', () => { + updateCharCounter(); + + // If text was modified by user (not from preset button), switch to freeform + const currentText = demoTextInput.textContent || demoTextInput.innerText || ''; + if (!isPresetChanging && currentText !== previousTextValue) { + updateActiveButton('freeform'); + } + + if (currentPreset === 'freeform') { + // Auto-detect language when user is typing (not from preset) + const detectedLang = detectLanguage(currentText); + if (detectedLang && detectedLang !== currentLanguage) { + const previousLang = currentLanguage; + currentLanguage = detectedLang; + window.updateActiveLanguage(currentLanguage); + showLanguageToast(previousLang, detectedLang); + } + } + + previousTextValue = currentText; + }); + + // Update font size when window is resized (for responsive width-based font sizing) + let resizeTimeout; + window.addEventListener('resize', () => { + clearTimeout(resizeTimeout); + resizeTimeout = setTimeout(() => { + updateCharCounter(); + }, 100); + }); + + // Initialize character counter + updateCharCounter(); + + // Speaker list handler (replaces voice select dropdown) + const speakerList = document.getElementById('speakerList'); + const speakerItems = speakerList ? speakerList.querySelectorAll('.speaker-item[data-voice]') : []; + const createVoiceBtn = document.getElementById('createVoiceBtn'); + const comingSoonModal = document.getElementById('comingSoonModal'); + const comingSoonCloseBtn = document.getElementById('comingSoonCloseBtn'); + let voiceSelectDisabled = false; + + // Update active speaker item (global function for use in switchVoice) + window.updateActiveSpeaker = function(voice) { + if (!speakerList || !speakerItems) return; + speakerItems.forEach(item => { + if (item.dataset.voice === voice) { + item.classList.add('active'); + } else { + item.classList.remove('active'); + } + }); + }; + + // Initialize active speaker + if (speakerList && speakerItems.length > 0) { + window.updateActiveSpeaker(currentVoice); + } + + // Handle speaker item clicks and hover tooltips + const speakerTooltip = document.getElementById('speakerTooltip'); + + if (speakerList) { + speakerItems.forEach(item => { + // Track if click was triggered by touch event (to prevent double execution) + let clickFromTouch = false; + + // Click handler + item.addEventListener('click', async (e) => { + // On touch devices with mobile viewport, ignore native click events (we'll trigger manually from touchend) + // PC (even with narrow viewport) should always handle clicks + if (isTouchDevice() && isMobileViewport() && !clickFromTouch) { + return; + } + + // Reset flag + clickFromTouch = false; + + if (voiceSelectDisabled || modelsLoading || isGenerating) return; + + const selectedVoice = item.dataset.voice; + + // If already selected, just auto-generate and play + if (selectedVoice === currentVoice) { + const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); + if (text.length >= 10 && !isGenerating && models && cfgs && processors) { + generateSpeech(); + } + return; + } + + // Disable all controls while loading + const wasDisabled = demoGenerateBtn.disabled; + demoGenerateBtn.disabled = true; + voiceSelectDisabled = true; + + // Update UI immediately + window.updateActiveSpeaker(selectedVoice); + + try { + await switchVoice(selectedVoice); + // Re-enable if models are loaded + if (models && cfgs && processors) { + demoGenerateBtn.disabled = false; + voiceSelectDisabled = false; + + // Auto-generate and play after voice change + const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); + if (text.length >= 10 && !isGenerating) { + generateSpeech(); + } + } + } catch (error) { + console.error('Failed to switch voice:', error); + // Revert selection on error + window.updateActiveSpeaker(currentVoice); + voiceSelectDisabled = false; + if (!wasDisabled) demoGenerateBtn.disabled = false; + } + }); + + // Hover handler for tooltip + if (speakerTooltip) { + // Desktop hover events + item.addEventListener('mouseenter', (e) => { + if (isTouchDevice() && isMobileViewport()) return; // Skip on touch devices with mobile viewport + const voice = item.dataset.voice; + if (voice && VOICE_DESCRIPTIONS[voice]) { + speakerTooltip.textContent = VOICE_DESCRIPTIONS[voice]; + speakerTooltip.style.display = 'block'; + updateTooltipPosition(e, speakerTooltip); + } + }); + + item.addEventListener('mousemove', (e) => { + if (isTouchDevice() && isMobileViewport()) return; // Skip on touch devices with mobile viewport + if (speakerTooltip.style.display === 'block') { + updateTooltipPosition(e, speakerTooltip); + } + }); + + item.addEventListener('mouseleave', () => { + if (isTouchDevice() && isMobileViewport()) return; // Skip on touch devices with mobile viewport + speakerTooltip.style.display = 'none'; + }); + + // Mobile touch events + let touchStartTime = 0; + let touchHandled = false; + let touchStartY = 0; + const TOUCH_MOVE_THRESHOLD = 10; // pixels + + item.addEventListener('touchstart', (e) => { + if (!isTouchDevice() || !isMobileViewport()) return; + + touchHandled = false; + const touch = e.touches[0]; + touchStartTime = Date.now(); + touchStartY = touch.clientY; + + const voice = item.dataset.voice; + if (voice && VOICE_DESCRIPTIONS[voice]) { + // Prevent default to block text selection + e.preventDefault(); + + // Show tooltip with mobile styling + speakerTooltip.textContent = VOICE_DESCRIPTIONS[voice]; + speakerTooltip.style.display = 'block'; + updateTooltipPositionMobile(speakerTooltip, touch.clientY); + } + }, { passive: false }); + + item.addEventListener('touchmove', (e) => { + if (!isTouchDevice() || !isMobileViewport()) return; + + const touch = e.touches[0]; + const deltaY = Math.abs(touch.clientY - touchStartY); + + // Check if touch moved significantly + if (deltaY > TOUCH_MOVE_THRESHOLD) { + touchHandled = true; + // Hide tooltip if user moves finger + speakerTooltip.style.display = 'none'; + } + + // Prevent default to avoid scrolling while showing tooltip + e.preventDefault(); + }, { passive: false }); + + item.addEventListener('touchend', (e) => { + if (!isTouchDevice() || !isMobileViewport()) return; + + const touchEndTime = Date.now(); + const touchDuration = touchEndTime - touchStartTime; + + // Hide tooltip + speakerTooltip.style.display = 'none'; + + // Always prevent default to avoid text selection + e.preventDefault(); + + // Only allow click if it was a short tap without movement + if (!touchHandled && touchDuration < 500) { + // Short tap - trigger click event manually after a small delay + clickFromTouch = true; + setTimeout(() => { + const clickEvent = new MouseEvent('click', { + bubbles: true, + cancelable: true, + view: window + }); + item.dispatchEvent(clickEvent); + }, 50); + } else { + // Long press or moved - prevent click + touchHandled = true; + e.stopPropagation(); + } + }, { passive: false }); + + item.addEventListener('touchcancel', (e) => { + if (!isTouchDevice() || !isMobileViewport()) return; + + // Hide tooltip + speakerTooltip.style.display = 'none'; + touchHandled = true; + + // Prevent default + e.preventDefault(); + }, { passive: false }); + + // Prevent context menu (long press menu) + item.addEventListener('contextmenu', (e) => { + if (isTouchDevice() && isMobileViewport()) { + e.preventDefault(); + return false; + } + }); + } + }); + } + + // Function to update tooltip position (40px above mouse pointer) + function updateTooltipPosition(event, tooltip) { + const x = event.clientX; + const y = event.clientY - 40; // 40px above mouse pointer + + tooltip.style.left = x + 'px'; + tooltip.style.top = y + 'px'; + + // Adjust if tooltip goes off screen + const tooltipRect = tooltip.getBoundingClientRect(); + const windowWidth = window.innerWidth; + const windowHeight = window.innerHeight; + + if (tooltipRect.right > windowWidth) { + tooltip.style.left = (windowWidth - tooltipRect.width - 10) + 'px'; + } + if (tooltipRect.left < 0) { + tooltip.style.left = '10px'; + } + if (tooltipRect.top < 0) { + tooltip.style.top = (event.clientY + 40) + 'px'; + } + if (tooltipRect.bottom > windowHeight) { + tooltip.style.top = (windowHeight - tooltipRect.height - 10) + 'px'; + } + } + + // Function to update tooltip position for mobile (centered, 75px above touch point) + function updateTooltipPositionMobile(tooltip, touchY) { + const windowWidth = window.innerWidth; + const windowHeight = window.innerHeight; + + // Set mobile-specific styles + tooltip.style.width = '90%'; + tooltip.style.left = '5%'; // Center: (100% - 90%) / 2 = 5% + tooltip.style.right = 'auto'; + tooltip.style.marginLeft = '0'; + tooltip.style.marginRight = '0'; + tooltip.style.whiteSpace = 'normal'; + tooltip.style.textAlign = 'center'; + + // Position tooltip 75px above touch point (60px + 15px) + const y = touchY - 75; + tooltip.style.top = y + 'px'; + + // Adjust if tooltip goes off screen + const tooltipRect = tooltip.getBoundingClientRect(); + + if (tooltipRect.top < 10) { + // If tooltip goes above viewport, position it below touch point instead + tooltip.style.top = (touchY + 20) + 'px'; + } + if (tooltipRect.bottom > windowHeight - 10) { + tooltip.style.top = (windowHeight - tooltipRect.height - 10) + 'px'; + } + } + + // Handle "Create your own voice" button + if (createVoiceBtn && comingSoonModal) { + createVoiceBtn.addEventListener('click', () => { + comingSoonModal.classList.add('show'); + }); + } + + // Close modal handlers + if (comingSoonCloseBtn && comingSoonModal) { + comingSoonCloseBtn.addEventListener('click', () => { + comingSoonModal.classList.remove('show'); + }); + } + + if (comingSoonModal) { + const overlay = comingSoonModal.querySelector('.coming-soon-modal-overlay'); + if (overlay) { + overlay.addEventListener('click', () => { + comingSoonModal.classList.remove('show'); + }); + } + } + + // Language selection handler + const languageList = document.getElementById('languageList'); + const languageItems = languageList ? languageList.querySelectorAll('.speaker-item[data-language]') : []; + + // Update active language item (global function for use in language change) + window.updateActiveLanguage = function(language) { + if (!languageList || !languageItems) return; + languageItems.forEach(item => { + if (item.dataset.language === language) { + item.classList.add('active'); + } else { + item.classList.remove('active'); + } + }); + }; + + // Initialize active language + if (languageList && languageItems.length > 0) { + window.updateActiveLanguage(currentLanguage); + } + + // Handle language item clicks + if (languageList) { + languageItems.forEach(item => { + item.addEventListener('click', async (e) => { + // Don't allow language change during model loading or generation + if (modelsLoading || isGenerating) return; + + const selectedLanguage = item.dataset.language; + + // If already selected, just auto-generate and play + if (selectedLanguage === currentLanguage) { + const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); + if (text.length >= 10 && !isGenerating && models && cfgs && processors) { + generateSpeech(); + } + return; + } + + // Update language + currentLanguage = selectedLanguage; + window.updateActiveLanguage(currentLanguage); + + // Update text if we're on a preset (not freeform) + if (currentPreset && currentPreset !== 'freeform' && presetTexts[currentPreset]) { + const preset = presetTexts[currentPreset]; + if (preset && typeof preset === 'object' && preset[currentLanguage]) { + isPresetChanging = true; + demoTextInput.textContent = preset[currentLanguage]; + updateCharCounter(); + isPresetChanging = false; + } + } + + // Auto-generate and play after language change + // Wait a bit for UI to update + await new Promise(resolve => setTimeout(resolve, 100)); + const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); + if (text.length >= 10 && !isGenerating && models && cfgs && processors) { + generateSpeech(); + } + }); + }); + } + + // Title animation setup + const demoTitleLeft = document.querySelector('.demo-title-left'); + const demoTitleRight = document.querySelector('.demo-title-right'); + const demoOutputSection = document.querySelector('.demo-output-section'); + + // Initialize Text with letters wrapped in spans + if (demoTitleLeft) { + const text = demoTitleLeft.textContent.trim(); + demoTitleLeft.innerHTML = text.split('').map(char => + char === ' ' ? ' ' : `${char}` + ).join(''); + } + + // Text animation on demo-input-section click + if (demoInputSection && demoTitleLeft) { + demoInputSection.addEventListener('click', () => { + const letters = demoTitleLeft.querySelectorAll('.letter'); + // Reset all letters + letters.forEach(letter => { + letter.classList.remove('visible'); + }); + + // Show letters one by one (total 0.25s = 0.125s / 2) + letters.forEach((letter, index) => { + setTimeout(() => { + letter.classList.add('visible'); + }, index * 0.0625 * 1000); // 0.0625s delay between each letter + }); + }); + } + + // Speech animation on demo-output-section click + if (demoOutputSection && demoTitleRight) { + demoOutputSection.addEventListener('click', (event) => { + if (event.target.closest('#demoGenerateBtn')) { + return; + } + demoTitleRight.classList.remove('animate-speech'); + // Trigger reflow + void demoTitleRight.offsetWidth; + demoTitleRight.classList.add('animate-speech'); + }); + } + + // Initialize models + initializeModels(); +})();