|
|
import * as ort from 'onnxruntime-web'; |
|
|
const presetTexts = window.presetTexts || {}; |
|
|
|
|
|
const PLAY_ICON_SVG = `<svg width="24" height="24" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" focusable="false"><path d="M8 5v14l11-7-11-7z"></path></svg>`; |
|
|
const PAUSE_ICON_SVG = `<svg width="24" height="24" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" focusable="false"><path d="M8 6h3v12H8V6zm5 0h3v12h-3V6z"></path></svg>`; |
|
|
const STOP_ICON_SVG = `<svg width="24" height="24" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" focusable="false"><path d="M7 7h10v10H7V7z"></path></svg>`; |
|
|
|
|
|
|
|
|
(function initLightningParallax() { |
|
|
if (typeof document === 'undefined') { |
|
|
return; |
|
|
} |
|
|
|
|
|
const runBlink = (className, onComplete) => { |
|
|
let remaining = 1 + Math.round(Math.random()); |
|
|
const blink = () => { |
|
|
if (remaining-- <= 0) { |
|
|
if (typeof onComplete === 'function') { |
|
|
onComplete(); |
|
|
} |
|
|
return; |
|
|
} |
|
|
const wait = 20 + Math.random() * 80; |
|
|
document.body.classList.add(className); |
|
|
setTimeout(() => { |
|
|
document.body.classList.remove(className); |
|
|
setTimeout(blink, wait); |
|
|
}, wait); |
|
|
}; |
|
|
blink(); |
|
|
}; |
|
|
|
|
|
const schedule = () => { |
|
|
setTimeout(() => runBlink('lightning-flicker', schedule), Math.random() * 10000); |
|
|
}; |
|
|
schedule(); |
|
|
})(); |
|
|
|
|
|
function escapeHtml(value) { |
|
|
return value.replace(/[&<>"']/g, (match) => { |
|
|
switch (match) { |
|
|
case '&': return '&'; |
|
|
case '<': return '<'; |
|
|
case '>': return '>'; |
|
|
case '"': return '"'; |
|
|
case "'": return '''; |
|
|
default: return match; |
|
|
} |
|
|
}); |
|
|
} |
|
|
|
|
|
function formatStatValueWithSuffix(value, suffix, options = {}) { |
|
|
const { firstLabel = false } = options; |
|
|
if (value === undefined || value === null) { |
|
|
return ''; |
|
|
} |
|
|
if (!suffix) { |
|
|
const raw = `${value}`; |
|
|
return escapeHtml(raw); |
|
|
} |
|
|
const raw = `${value}`.trim(); |
|
|
if (!raw || raw === '--' || raw === '-' || raw.toLowerCase() === 'error') { |
|
|
return escapeHtml(raw); |
|
|
} |
|
|
const appendSuffix = (segment, includePrefix = false) => { |
|
|
const trimmed = segment.trim(); |
|
|
if (!trimmed) { |
|
|
return ''; |
|
|
} |
|
|
const escapedValue = `<span class="stat-value-number">${escapeHtml(trimmed)}</span>`; |
|
|
const suffixSpan = `<span class="stat-label stat-suffix">${escapeHtml(suffix)}</span>`; |
|
|
const prefixSpan = includePrefix && firstLabel |
|
|
? `<span class="stat-label stat-suffix stat-prefix">First</span>` |
|
|
: ''; |
|
|
const segmentClass = includePrefix && firstLabel |
|
|
? 'stat-value-segment has-prefix' |
|
|
: 'stat-value-segment'; |
|
|
return `<span class="${segmentClass}">${prefixSpan}${escapedValue}${suffixSpan}</span>`; |
|
|
}; |
|
|
if (raw.includes('/')) { |
|
|
const parts = raw.split('/'); |
|
|
const segments = parts.map((part, index) => appendSuffix(part, index === 0)); |
|
|
return segments.join(' / '); |
|
|
} |
|
|
return appendSuffix(raw); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export class UnicodeProcessor { |
|
|
constructor(indexer) { |
|
|
this.indexer = indexer; |
|
|
} |
|
|
|
|
|
call(textList, lang = null) { |
|
|
const processedTexts = textList.map(t => preprocessText(t, lang)); |
|
|
const textIdsLengths = processedTexts.map(t => t.length); |
|
|
const maxLen = Math.max(...textIdsLengths); |
|
|
|
|
|
const textIds = []; |
|
|
const unsupportedChars = new Set(); |
|
|
|
|
|
for (let i = 0; i < processedTexts.length; i++) { |
|
|
const row = new Array(maxLen).fill(0); |
|
|
const unicodeVals = textToUnicodeValues(processedTexts[i]); |
|
|
for (let j = 0; j < unicodeVals.length; j++) { |
|
|
const indexValue = this.indexer[unicodeVals[j]]; |
|
|
|
|
|
if (indexValue === undefined || indexValue === null || indexValue === -1) { |
|
|
unsupportedChars.add(processedTexts[i][j]); |
|
|
row[j] = 0; |
|
|
} else { |
|
|
row[j] = indexValue; |
|
|
} |
|
|
} |
|
|
textIds.push(row); |
|
|
} |
|
|
|
|
|
const textMask = getTextMask(textIdsLengths); |
|
|
return { textIds, textMask, unsupportedChars: Array.from(unsupportedChars) }; |
|
|
} |
|
|
} |
|
|
|
|
|
const AVAILABLE_LANGS = ["en", "ko", "es", "pt", "fr"]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function detectLanguage(text) { |
|
|
if (!text || text.trim().length < 3) { |
|
|
return null; |
|
|
} |
|
|
|
|
|
|
|
|
const sampleText = text.length > 100 ? text.substring(text.length - 100) : text; |
|
|
|
|
|
|
|
|
const normalizedText = sampleText.normalize('NFC').toLowerCase(); |
|
|
|
|
|
|
|
|
const koreanRegex = /[\uAC00-\uD7AF\u1100-\u11FF\u3130-\u318F\uA960-\uA97F\uD7B0-\uD7FF]/g; |
|
|
const koreanMatches = normalizedText.match(koreanRegex) || []; |
|
|
if (koreanMatches.length >= 2) { |
|
|
return 'ko'; |
|
|
} |
|
|
|
|
|
|
|
|
const scores = { en: 0, es: 0, fr: 0, pt: 0 }; |
|
|
|
|
|
|
|
|
if (/ñ/.test(normalizedText)) scores.es += 15; |
|
|
if (/[¿¡]/.test(normalizedText)) scores.es += 12; |
|
|
if (/ã/.test(normalizedText)) scores.pt += 15; |
|
|
if (/õ/.test(normalizedText)) scores.pt += 15; |
|
|
if (/œ/.test(normalizedText)) scores.fr += 15; |
|
|
if (/[ùû]/.test(normalizedText)) scores.fr += 10; |
|
|
|
|
|
|
|
|
if (/ç/.test(normalizedText)) { |
|
|
scores.fr += 4; |
|
|
scores.pt += 4; |
|
|
} |
|
|
|
|
|
|
|
|
if (/[èêë]/.test(normalizedText)) scores.fr += 5; |
|
|
if (/[àâ]/.test(normalizedText)) scores.fr += 3; |
|
|
if (/[îï]/.test(normalizedText)) scores.fr += 4; |
|
|
if (/ô/.test(normalizedText)) scores.fr += 3; |
|
|
|
|
|
|
|
|
const exclusiveWords = { |
|
|
en: ['the', 'is', 'are', 'was', 'were', 'have', 'has', 'been', 'will', 'would', 'could', 'should', 'this', 'that', 'with', 'from', 'they', 'what', 'which', 'there', 'their', 'about', 'these', 'other', 'into', 'just', 'your', 'some', 'than', 'them', 'then', 'only', 'being', 'through', 'after', 'before'], |
|
|
es: ['el', 'los', 'las', 'es', 'está', 'están', 'porque', 'pero', 'muy', 'también', 'más', 'este', 'esta', 'estos', 'estas', 'ese', 'esa', 'yo', 'tú', 'nosotros', 'ellos', 'ellas', 'hola', 'gracias', 'buenos', 'buenas', 'ahora', 'siempre', 'nunca', 'todo', 'nada', 'algo', 'alguien'], |
|
|
fr: ['le', 'les', 'est', 'sont', 'dans', 'ce', 'cette', 'ces', 'il', 'elle', 'ils', 'elles', 'je', 'tu', 'nous', 'vous', 'avec', 'sur', 'ne', 'pas', 'plus', 'tout', 'bien', 'fait', 'être', 'avoir', 'donc', 'car', 'ni', 'jamais', 'toujours', 'rien', 'quelque', 'encore', 'aussi', 'très', 'peu', 'ici'], |
|
|
pt: ['os', 'as', 'é', 'são', 'está', 'estão', 'não', 'na', 'no', 'da', 'do', 'das', 'dos', 'ao', 'aos', 'ele', 'ela', 'eles', 'elas', 'eu', 'nós', 'você', 'vocês', 'seu', 'sua', 'seus', 'suas', 'muito', 'também', 'já', 'foi', 'só', 'mesmo', 'ter', 'até', 'isso', 'olá', 'obrigado', 'obrigada', 'bom', 'boa', 'agora', 'sempre', 'nunca', 'tudo', 'nada', 'algo', 'alguém'] |
|
|
}; |
|
|
|
|
|
|
|
|
const words = normalizedText.match(/[a-záàâãäåçéèêëíìîïñóòôõöúùûüýÿœæ]+/g) || []; |
|
|
|
|
|
for (const word of words) { |
|
|
for (const [lang, wordList] of Object.entries(exclusiveWords)) { |
|
|
if (wordList.includes(word)) { |
|
|
scores[lang] += 3; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const ngramPatterns = { |
|
|
en: [/th/g, /ing/g, /tion/g, /ight/g, /ould/g], |
|
|
es: [/ción/g, /mente/g, /ado/g, /ido/g], |
|
|
fr: [/tion/g, /ment/g, /eau/g, /aux/g, /eux/g, /oir/g, /ais/g, /ait/g, /ont/g], |
|
|
pt: [/ção/g, /ões/g, /mente/g, /ado/g, /ido/g, /nh/g, /lh/g] |
|
|
}; |
|
|
|
|
|
for (const [lang, patterns] of Object.entries(ngramPatterns)) { |
|
|
for (const pattern of patterns) { |
|
|
const matches = normalizedText.match(pattern) || []; |
|
|
scores[lang] += matches.length * 2; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const frenchContractions = /[cdjlmnst]'[aeiouéèêàâîïôûù]/g; |
|
|
const frenchContractionMatches = normalizedText.match(frenchContractions) || []; |
|
|
scores.fr += frenchContractionMatches.length * 5; |
|
|
|
|
|
|
|
|
|
|
|
if (/\bthe\b/.test(normalizedText)) scores.en += 5; |
|
|
if (/\b(el|los)\b/.test(normalizedText)) scores.es += 4; |
|
|
if (/\b(le|les)\b/.test(normalizedText)) scores.fr += 4; |
|
|
if (/\b(o|os)\b/.test(normalizedText)) scores.pt += 3; |
|
|
|
|
|
|
|
|
let maxScore = 0; |
|
|
let detectedLang = null; |
|
|
|
|
|
for (const [lang, score] of Object.entries(scores)) { |
|
|
if (score > maxScore) { |
|
|
maxScore = score; |
|
|
detectedLang = lang; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (maxScore >= 4) { |
|
|
return detectedLang; |
|
|
} |
|
|
|
|
|
return null; |
|
|
} |
|
|
|
|
|
|
|
|
const LANGUAGE_NAMES = { |
|
|
'en': 'English', |
|
|
'ko': 'Korean', |
|
|
'es': 'Spanish', |
|
|
'pt': 'Portuguese', |
|
|
'fr': 'French' |
|
|
}; |
|
|
|
|
|
export function preprocessText(text, lang = null) { |
|
|
|
|
|
text = text.normalize('NFKD'); |
|
|
|
|
|
|
|
|
text = text.replace(/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F700}-\u{1F77F}\u{1F780}-\u{1F7FF}\u{1F800}-\u{1F8FF}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1F1E6}-\u{1F1FF}]+/gu, ''); |
|
|
|
|
|
|
|
|
const replacements = { |
|
|
"–": "-", |
|
|
"‑": "-", |
|
|
"—": "-", |
|
|
"_": " ", |
|
|
"\u201C": '"', |
|
|
"\u201D": '"', |
|
|
"\u2018": "'", |
|
|
"\u2019": "'", |
|
|
"´": "'", |
|
|
"`": "'", |
|
|
"[": " ", |
|
|
"]": " ", |
|
|
"|": " ", |
|
|
"/": " ", |
|
|
"#": " ", |
|
|
"→": " ", |
|
|
"←": " ", |
|
|
}; |
|
|
|
|
|
for (const [k, v] of Object.entries(replacements)) { |
|
|
text = text.replaceAll(k, v); |
|
|
} |
|
|
|
|
|
|
|
|
text = text.replace(/[♥☆♡©\\]/g, ""); |
|
|
|
|
|
|
|
|
const exprReplacements = { |
|
|
"@": " at ", |
|
|
"e.g.,": "for example,", |
|
|
"i.e.,": "that is,", |
|
|
}; |
|
|
|
|
|
for (const [k, v] of Object.entries(exprReplacements)) { |
|
|
text = text.replaceAll(k, v); |
|
|
} |
|
|
|
|
|
|
|
|
text = text.replace(/ ,/g, ","); |
|
|
text = text.replace(/ \./g, "."); |
|
|
text = text.replace(/ !/g, "!"); |
|
|
text = text.replace(/ \?/g, "?"); |
|
|
text = text.replace(/ ;/g, ";"); |
|
|
text = text.replace(/ :/g, ":"); |
|
|
text = text.replace(/ '/g, "'"); |
|
|
|
|
|
|
|
|
while (text.includes('""')) { |
|
|
text = text.replace(/""/g, '"'); |
|
|
} |
|
|
while (text.includes("''")) { |
|
|
text = text.replace(/''/g, "'"); |
|
|
} |
|
|
while (text.includes("``")) { |
|
|
text = text.replace(/``/g, "`"); |
|
|
} |
|
|
|
|
|
|
|
|
text = text.replace(/\s+/g, " ").trim(); |
|
|
|
|
|
|
|
|
if (!/[.!?;:,'"')\]}…。」』】〉》›»]$/.test(text)) { |
|
|
text += "."; |
|
|
} |
|
|
|
|
|
|
|
|
if (lang !== null) { |
|
|
if (!AVAILABLE_LANGS.includes(lang)) { |
|
|
throw new Error(`Invalid language: ${lang}`); |
|
|
} |
|
|
text = `<${lang}>` + text + `</${lang}>`; |
|
|
} else { |
|
|
text = `<na>` + text + `</na>`; |
|
|
} |
|
|
|
|
|
return text; |
|
|
} |
|
|
|
|
|
export function textToUnicodeValues(text) { |
|
|
return Array.from(text).map(char => char.charCodeAt(0)); |
|
|
} |
|
|
|
|
|
export function lengthToMask(lengths, maxLen = null) { |
|
|
maxLen = maxLen || Math.max(...lengths); |
|
|
const mask = []; |
|
|
for (let i = 0; i < lengths.length; i++) { |
|
|
const row = []; |
|
|
for (let j = 0; j < maxLen; j++) { |
|
|
row.push(j < lengths[i] ? 1.0 : 0.0); |
|
|
} |
|
|
mask.push([row]); |
|
|
} |
|
|
return mask; |
|
|
} |
|
|
|
|
|
export function getTextMask(textIdsLengths) { |
|
|
return lengthToMask(textIdsLengths); |
|
|
} |
|
|
|
|
|
export function getLatentMask(wavLengths, cfgs) { |
|
|
const baseChunkSize = cfgs.ae.base_chunk_size; |
|
|
const chunkCompressFactor = cfgs.ttl.chunk_compress_factor; |
|
|
const latentSize = baseChunkSize * chunkCompressFactor; |
|
|
const latentLengths = wavLengths.map(len => |
|
|
Math.floor((len + latentSize - 1) / latentSize) |
|
|
); |
|
|
return lengthToMask(latentLengths); |
|
|
} |
|
|
|
|
|
export function sampleNoisyLatent(duration, cfgs) { |
|
|
const sampleRate = cfgs.ae.sample_rate; |
|
|
const baseChunkSize = cfgs.ae.base_chunk_size; |
|
|
const chunkCompressFactor = cfgs.ttl.chunk_compress_factor; |
|
|
const ldim = cfgs.ttl.latent_dim; |
|
|
|
|
|
const wavLenMax = Math.max(...duration.map(d => d[0][0])) * sampleRate; |
|
|
const wavLengths = duration.map(d => Math.floor(d[0][0] * sampleRate)); |
|
|
const chunkSize = baseChunkSize * chunkCompressFactor; |
|
|
const latentLen = Math.floor((wavLenMax + chunkSize - 1) / chunkSize); |
|
|
const latentDim = ldim * chunkCompressFactor; |
|
|
|
|
|
const noisyLatent = []; |
|
|
for (let b = 0; b < duration.length; b++) { |
|
|
const batch = []; |
|
|
for (let d = 0; d < latentDim; d++) { |
|
|
const row = []; |
|
|
for (let t = 0; t < latentLen; t++) { |
|
|
const u1 = Math.random(); |
|
|
const u2 = Math.random(); |
|
|
const randNormal = Math.sqrt(-2.0 * Math.log(u1)) * Math.cos(2.0 * Math.PI * u2); |
|
|
row.push(randNormal); |
|
|
} |
|
|
batch.push(row); |
|
|
} |
|
|
noisyLatent.push(batch); |
|
|
} |
|
|
|
|
|
const latentMask = getLatentMask(wavLengths, cfgs); |
|
|
|
|
|
for (let b = 0; b < noisyLatent.length; b++) { |
|
|
for (let d = 0; d < noisyLatent[b].length; d++) { |
|
|
for (let t = 0; t < noisyLatent[b][d].length; t++) { |
|
|
noisyLatent[b][d][t] *= latentMask[b][0][t]; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
return { noisyLatent, latentMask }; |
|
|
} |
|
|
|
|
|
export async function loadOnnx(onnxPath, opts) { |
|
|
return await ort.InferenceSession.create(onnxPath, opts); |
|
|
} |
|
|
|
|
|
export async function loadOnnxAll(basePath, opts, onProgress) { |
|
|
const models = [ |
|
|
{ name: 'Duration Predictor', path: `${basePath}/duration_predictor.onnx`, key: 'dpOrt' }, |
|
|
{ name: 'Text Encoder', path: `${basePath}/text_encoder.onnx`, key: 'textEncOrt' }, |
|
|
{ name: 'Vector Estimator', path: `${basePath}/vector_estimator.onnx`, key: 'vectorEstOrt' }, |
|
|
{ name: 'Vocoder', path: `${basePath}/vocoder.onnx`, key: 'vocoderOrt' } |
|
|
]; |
|
|
|
|
|
const result = {}; |
|
|
let loadedCount = 0; |
|
|
|
|
|
|
|
|
const loadPromises = models.map(async (model) => { |
|
|
const session = await loadOnnx(model.path, opts); |
|
|
loadedCount++; |
|
|
if (onProgress) { |
|
|
onProgress(model.name, loadedCount, models.length); |
|
|
} |
|
|
return { key: model.key, session }; |
|
|
}); |
|
|
|
|
|
|
|
|
const loadedModels = await Promise.all(loadPromises); |
|
|
|
|
|
|
|
|
loadedModels.forEach(({ key, session }) => { |
|
|
result[key] = session; |
|
|
}); |
|
|
|
|
|
try { |
|
|
|
|
|
await fetch('https://huggingface.co/Supertone/supertonic-2/resolve/main/config.json'); |
|
|
} catch (error) { |
|
|
console.warn('Failed to update download count:', error); |
|
|
} |
|
|
return result; |
|
|
} |
|
|
|
|
|
export async function loadCfgs(basePath) { |
|
|
const response = await fetch(`${basePath}/tts.json`); |
|
|
return await response.json(); |
|
|
} |
|
|
|
|
|
export async function loadProcessors(basePath) { |
|
|
const response = await fetch(`${basePath}/unicode_indexer.json`); |
|
|
const unicodeIndexerData = await response.json(); |
|
|
const textProcessor = new UnicodeProcessor(unicodeIndexerData); |
|
|
|
|
|
return { textProcessor }; |
|
|
} |
|
|
|
|
|
function parseWavFile(buffer) { |
|
|
const view = new DataView(buffer); |
|
|
|
|
|
|
|
|
const riff = String.fromCharCode(view.getUint8(0), view.getUint8(1), view.getUint8(2), view.getUint8(3)); |
|
|
if (riff !== 'RIFF') { |
|
|
throw new Error('Not a valid WAV file'); |
|
|
} |
|
|
|
|
|
const wave = String.fromCharCode(view.getUint8(8), view.getUint8(9), view.getUint8(10), view.getUint8(11)); |
|
|
if (wave !== 'WAVE') { |
|
|
throw new Error('Not a valid WAV file'); |
|
|
} |
|
|
|
|
|
let offset = 12; |
|
|
let fmtChunk = null; |
|
|
let dataChunk = null; |
|
|
|
|
|
while (offset < buffer.byteLength) { |
|
|
const chunkId = String.fromCharCode( |
|
|
view.getUint8(offset), |
|
|
view.getUint8(offset + 1), |
|
|
view.getUint8(offset + 2), |
|
|
view.getUint8(offset + 3) |
|
|
); |
|
|
const chunkSize = view.getUint32(offset + 4, true); |
|
|
|
|
|
if (chunkId === 'fmt ') { |
|
|
fmtChunk = { |
|
|
audioFormat: view.getUint16(offset + 8, true), |
|
|
numChannels: view.getUint16(offset + 10, true), |
|
|
sampleRate: view.getUint32(offset + 12, true), |
|
|
bitsPerSample: view.getUint16(offset + 22, true) |
|
|
}; |
|
|
} else if (chunkId === 'data') { |
|
|
dataChunk = { |
|
|
offset: offset + 8, |
|
|
size: chunkSize |
|
|
}; |
|
|
break; |
|
|
} |
|
|
|
|
|
offset += 8 + chunkSize; |
|
|
} |
|
|
|
|
|
if (!fmtChunk || !dataChunk) { |
|
|
throw new Error('Invalid WAV file format'); |
|
|
} |
|
|
|
|
|
const bytesPerSample = fmtChunk.bitsPerSample / 8; |
|
|
const numSamples = Math.floor(dataChunk.size / (bytesPerSample * fmtChunk.numChannels)); |
|
|
const audioData = new Float32Array(numSamples); |
|
|
|
|
|
if (fmtChunk.bitsPerSample === 16) { |
|
|
for (let i = 0; i < numSamples; i++) { |
|
|
let sample = 0; |
|
|
for (let ch = 0; ch < fmtChunk.numChannels; ch++) { |
|
|
const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 2; |
|
|
sample += view.getInt16(sampleOffset, true); |
|
|
} |
|
|
audioData[i] = (sample / fmtChunk.numChannels) / 32768.0; |
|
|
} |
|
|
} else if (fmtChunk.bitsPerSample === 24) { |
|
|
|
|
|
for (let i = 0; i < numSamples; i++) { |
|
|
let sample = 0; |
|
|
for (let ch = 0; ch < fmtChunk.numChannels; ch++) { |
|
|
const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 3; |
|
|
|
|
|
const byte1 = view.getUint8(sampleOffset); |
|
|
const byte2 = view.getUint8(sampleOffset + 1); |
|
|
const byte3 = view.getUint8(sampleOffset + 2); |
|
|
let value = (byte3 << 16) | (byte2 << 8) | byte1; |
|
|
|
|
|
if (value & 0x800000) { |
|
|
value = value - 0x1000000; |
|
|
} |
|
|
sample += value; |
|
|
} |
|
|
audioData[i] = (sample / fmtChunk.numChannels) / 8388608.0; |
|
|
} |
|
|
} else if (fmtChunk.bitsPerSample === 32) { |
|
|
for (let i = 0; i < numSamples; i++) { |
|
|
let sample = 0; |
|
|
for (let ch = 0; ch < fmtChunk.numChannels; ch++) { |
|
|
const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 4; |
|
|
sample += view.getFloat32(sampleOffset, true); |
|
|
} |
|
|
audioData[i] = sample / fmtChunk.numChannels; |
|
|
} |
|
|
} else { |
|
|
throw new Error(`Unsupported bit depth: ${fmtChunk.bitsPerSample}. Supported formats: 16-bit, 24-bit, 32-bit`); |
|
|
} |
|
|
|
|
|
return { |
|
|
sampleRate: fmtChunk.sampleRate, |
|
|
audioData: audioData |
|
|
}; |
|
|
} |
|
|
|
|
|
export function arrayToTensor(array, dims) { |
|
|
const flat = array.flat(Infinity); |
|
|
return new ort.Tensor('float32', Float32Array.from(flat), dims); |
|
|
} |
|
|
|
|
|
export function intArrayToTensor(array, dims) { |
|
|
const flat = array.flat(Infinity); |
|
|
return new ort.Tensor('int64', BigInt64Array.from(flat.map(x => BigInt(x))), dims); |
|
|
} |
|
|
|
|
|
export function writeWavFile(audioData, sampleRate) { |
|
|
const numChannels = 1; |
|
|
const bitsPerSample = 16; |
|
|
const byteRate = sampleRate * numChannels * bitsPerSample / 8; |
|
|
const blockAlign = numChannels * bitsPerSample / 8; |
|
|
const dataSize = audioData.length * bitsPerSample / 8; |
|
|
|
|
|
const buffer = new ArrayBuffer(44 + dataSize); |
|
|
const view = new DataView(buffer); |
|
|
|
|
|
|
|
|
view.setUint8(0, 'R'.charCodeAt(0)); |
|
|
view.setUint8(1, 'I'.charCodeAt(0)); |
|
|
view.setUint8(2, 'F'.charCodeAt(0)); |
|
|
view.setUint8(3, 'F'.charCodeAt(0)); |
|
|
view.setUint32(4, 36 + dataSize, true); |
|
|
view.setUint8(8, 'W'.charCodeAt(0)); |
|
|
view.setUint8(9, 'A'.charCodeAt(0)); |
|
|
view.setUint8(10, 'V'.charCodeAt(0)); |
|
|
view.setUint8(11, 'E'.charCodeAt(0)); |
|
|
|
|
|
|
|
|
view.setUint8(12, 'f'.charCodeAt(0)); |
|
|
view.setUint8(13, 'm'.charCodeAt(0)); |
|
|
view.setUint8(14, 't'.charCodeAt(0)); |
|
|
view.setUint8(15, ' '.charCodeAt(0)); |
|
|
view.setUint32(16, 16, true); |
|
|
view.setUint16(20, 1, true); |
|
|
view.setUint16(22, numChannels, true); |
|
|
view.setUint32(24, sampleRate, true); |
|
|
view.setUint32(28, byteRate, true); |
|
|
view.setUint16(32, blockAlign, true); |
|
|
view.setUint16(34, bitsPerSample, true); |
|
|
|
|
|
|
|
|
view.setUint8(36, 'd'.charCodeAt(0)); |
|
|
view.setUint8(37, 'a'.charCodeAt(0)); |
|
|
view.setUint8(38, 't'.charCodeAt(0)); |
|
|
view.setUint8(39, 'a'.charCodeAt(0)); |
|
|
view.setUint32(40, dataSize, true); |
|
|
|
|
|
|
|
|
for (let i = 0; i < audioData.length; i++) { |
|
|
const sample = Math.max(-1, Math.min(1, audioData[i])); |
|
|
const intSample = Math.floor(sample * 32767); |
|
|
view.setInt16(44 + i * 2, intSample, true); |
|
|
} |
|
|
|
|
|
return buffer; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
document.addEventListener('DOMContentLoaded', () => { |
|
|
|
|
|
document.querySelectorAll('a[href^="#"]').forEach(anchor => { |
|
|
anchor.addEventListener('click', function (e) { |
|
|
e.preventDefault(); |
|
|
const href = this.getAttribute('href'); |
|
|
const target = document.querySelector(href); |
|
|
if (target) { |
|
|
|
|
|
if (history.pushState) { |
|
|
history.pushState(null, null, href); |
|
|
} |
|
|
target.scrollIntoView({ |
|
|
behavior: 'smooth', |
|
|
block: 'start' |
|
|
}); |
|
|
} |
|
|
}); |
|
|
}); |
|
|
|
|
|
|
|
|
const observerOptions = { |
|
|
threshold: 0.1, |
|
|
rootMargin: '0px 0px -100px 0px' |
|
|
}; |
|
|
|
|
|
const observer = new IntersectionObserver((entries) => { |
|
|
entries.forEach(entry => { |
|
|
if (entry.isIntersecting) { |
|
|
entry.target.style.opacity = '1'; |
|
|
entry.target.style.transform = 'translateY(0)'; |
|
|
} |
|
|
}); |
|
|
}, observerOptions); |
|
|
|
|
|
}); |
|
|
|
|
|
|
|
|
(async function() { |
|
|
|
|
|
const demoTextInput = document.getElementById('demoTextInput'); |
|
|
if (!demoTextInput) return; |
|
|
|
|
|
|
|
|
ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/'; |
|
|
ort.env.wasm.numThreads = 1; |
|
|
|
|
|
|
|
|
|
|
|
const REF_EMBEDDING_PATHS = { |
|
|
'F1': 'assets/voice_styles/F1.json', |
|
|
'F2': 'assets/voice_styles/F2.json', |
|
|
'F3': 'assets/voice_styles/F3.json', |
|
|
'F4': 'assets/voice_styles/F4.json', |
|
|
'F5': 'assets/voice_styles/F5.json', |
|
|
'M1': 'assets/voice_styles/M1.json', |
|
|
'M2': 'assets/voice_styles/M2.json', |
|
|
'M3': 'assets/voice_styles/M3.json', |
|
|
'M4': 'assets/voice_styles/M4.json', |
|
|
'M5': 'assets/voice_styles/M5.json' |
|
|
}; |
|
|
|
|
|
|
|
|
const VOICE_DESCRIPTIONS = { |
|
|
'F1': 'Sarah - A calm female voice with a slightly low tone; steady and composed.', |
|
|
'F2': 'Lily - A bright, cheerful female voice; lively, playful, and youthful with spirited energy.', |
|
|
'F3': 'Jessica - A clear, professional announcer-style female voice; articulate and broadcast-ready.', |
|
|
'F4': 'Olivia - A crisp, confident female voice; distinct and expressive with strong delivery.', |
|
|
'F5': 'Emily - A kind, gentle female voice; soft-spoken, calm, and naturally soothing.', |
|
|
'M1': 'Alex - A lively, upbeat male voice with confident energy and a standard, clear tone.', |
|
|
'M2': 'James - A deep, robust male voice; calm, composed, and serious with a grounded presence.', |
|
|
'M3': 'Robert - A polished, authoritative male voice; confident and trustworthy with strong presentation quality.', |
|
|
'M4': 'Sam - A soft, neutral-toned male voice; gentle and approachable with a youthful, friendly quality.', |
|
|
'M5': 'Daniel - A warm, soft-spoken male voice; calm and soothing with a natural storytelling quality.' |
|
|
}; |
|
|
|
|
|
|
|
|
let models = null; |
|
|
let cfgs = null; |
|
|
let processors = null; |
|
|
let currentVoice = 'M3'; |
|
|
|
|
|
|
|
|
function detectBrowserLanguage() { |
|
|
|
|
|
const browserLang = navigator.language || navigator.userLanguage || 'en'; |
|
|
|
|
|
|
|
|
const langCode = browserLang.split('-')[0].toLowerCase(); |
|
|
|
|
|
|
|
|
const supportedLangs = ['en', 'es', 'pt', 'fr', 'ko']; |
|
|
|
|
|
|
|
|
return supportedLangs.includes(langCode) ? langCode : 'en'; |
|
|
} |
|
|
|
|
|
let currentLanguage = detectBrowserLanguage(); |
|
|
let refEmbeddingCache = {}; |
|
|
let currentStyleTtlTensor = null; |
|
|
let currentStyleDpTensor = null; |
|
|
let modelsLoading = false; |
|
|
let modelsLoaded = false; |
|
|
let modelsLoadPromise = null; |
|
|
|
|
|
|
|
|
const demoStatusBox = document.getElementById('demoStatusBox'); |
|
|
const demoStatusText = document.getElementById('demoStatusText'); |
|
|
const wasmWarningBanner = document.getElementById('wasmWarningBanner'); |
|
|
const demoGenerateBtn = document.getElementById('demoGenerateBtn'); |
|
|
const demoTotalSteps = document.getElementById('demoTotalSteps'); |
|
|
const demoSpeed = document.getElementById('demoSpeed'); |
|
|
const demoTotalStepsValue = document.getElementById('demoTotalStepsValue'); |
|
|
const demoSpeedValue = document.getElementById('demoSpeedValue'); |
|
|
const demoResults = document.getElementById('demoResults'); |
|
|
const demoError = document.getElementById('demoError'); |
|
|
const demoCharCount = document.getElementById('demoCharCount'); |
|
|
const demoCharCounter = document.getElementById('demoCharCounter'); |
|
|
const demoCharWarning = document.getElementById('demoCharWarning'); |
|
|
|
|
|
|
|
|
const MIN_CHARS = 10; |
|
|
const MAX_CHUNK_LENGTH_DEFAULT = 300; |
|
|
const MAX_CHUNK_LENGTH_KO = 120; |
|
|
function getMaxChunkLength() { |
|
|
return currentLanguage === 'ko' ? MAX_CHUNK_LENGTH_KO : MAX_CHUNK_LENGTH_DEFAULT; |
|
|
} |
|
|
|
|
|
|
|
|
let audioContext = null; |
|
|
let scheduledSources = []; |
|
|
let audioChunks = []; |
|
|
let totalDuration = 0; |
|
|
let startTime = 0; |
|
|
let pauseTime = 0; |
|
|
let isPaused = false; |
|
|
let isPlaying = false; |
|
|
let animationFrameId = null; |
|
|
let playPauseBtn = null; |
|
|
let progressBar = null; |
|
|
let currentTimeDisplay = null; |
|
|
let durationDisplay = null; |
|
|
let progressFill = null; |
|
|
let firstChunkGenerationTime = 0; |
|
|
let totalChunks = 0; |
|
|
let nextScheduledTime = 0; |
|
|
let currentGenerationTextLength = 0; |
|
|
let supertonicPlayerRecord = null; |
|
|
let isGenerating = false; |
|
|
|
|
|
|
|
|
let customAudioPlayers = []; |
|
|
|
|
|
const isMobileViewport = () => window.matchMedia('(max-width: 768px)').matches; |
|
|
|
|
|
const isTouchDevice = () => 'ontouchstart' in window || navigator.maxTouchPoints > 0; |
|
|
const trimDecimalsForMobile = (formatted) => { |
|
|
if (!formatted) return formatted; |
|
|
return isMobileViewport() ? formatted.replace(/\.\d{2}$/, '') : formatted; |
|
|
}; |
|
|
|
|
|
function pauseAllPlayersExcept(currentPlayer) { |
|
|
customAudioPlayers.forEach(player => { |
|
|
if (player !== currentPlayer && player && typeof player.pausePlayback === 'function') { |
|
|
player.pausePlayback(); |
|
|
} |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function chunkText(text, maxLen = getMaxChunkLength()) { |
|
|
|
|
|
const paragraphs = text.trim().split(/\n\s*\n+/).filter(p => p.trim()); |
|
|
|
|
|
const chunks = []; |
|
|
|
|
|
for (let paragraph of paragraphs) { |
|
|
paragraph = paragraph.trim(); |
|
|
if (!paragraph) continue; |
|
|
|
|
|
|
|
|
|
|
|
const sentences = paragraph.split(/(?<!Mr\.|Mrs\.|Ms\.|Dr\.|Prof\.|Sr\.|Jr\.|Ph\.D\.|etc\.|e\.g\.|i\.e\.|vs\.|Inc\.|Ltd\.|Co\.|Corp\.|St\.|Ave\.|Blvd\.)(?<!\b[A-Z]\.)(?<=[.!?])\s+/); |
|
|
|
|
|
let currentChunk = ""; |
|
|
|
|
|
for (let sentence of sentences) { |
|
|
if (currentChunk.length + sentence.length + 1 <= maxLen) { |
|
|
currentChunk += (currentChunk ? " " : "") + sentence; |
|
|
} else { |
|
|
if (currentChunk) { |
|
|
chunks.push(currentChunk.trim()); |
|
|
} |
|
|
currentChunk = sentence; |
|
|
} |
|
|
} |
|
|
|
|
|
if (currentChunk) { |
|
|
chunks.push(currentChunk.trim()); |
|
|
} |
|
|
} |
|
|
|
|
|
return chunks; |
|
|
} |
|
|
|
|
|
function showDemoStatus(message, type = 'info', progress = null) { |
|
|
demoStatusText.innerHTML = message; |
|
|
demoStatusBox.className = 'demo-status-box'; |
|
|
demoStatusBox.style.removeProperty('--status-progress'); |
|
|
demoStatusBox.style.display = ''; |
|
|
|
|
|
if (type === 'success') { |
|
|
demoStatusBox.classList.add('success'); |
|
|
} else if (type === 'error') { |
|
|
demoStatusBox.classList.add('error'); |
|
|
} |
|
|
|
|
|
|
|
|
if (progress !== null && progress >= 0 && progress <= 100) { |
|
|
const clampedProgress = Math.max(0, Math.min(progress, 100)); |
|
|
demoStatusBox.style.setProperty('--status-progress', `${clampedProgress}%`); |
|
|
demoStatusBox.classList.toggle('complete', clampedProgress >= 100); |
|
|
} else if (type === 'success' || type === 'error') { |
|
|
demoStatusBox.style.removeProperty('--status-progress'); |
|
|
demoStatusBox.classList.remove('complete'); |
|
|
} else { |
|
|
demoStatusBox.style.removeProperty('--status-progress'); |
|
|
demoStatusBox.classList.remove('complete'); |
|
|
} |
|
|
} |
|
|
|
|
|
function hideDemoStatus() { |
|
|
demoStatusBox.style.display = 'none'; |
|
|
} |
|
|
|
|
|
function showDemoError(message) { |
|
|
demoError.textContent = message; |
|
|
demoError.classList.add('active'); |
|
|
} |
|
|
|
|
|
function hideDemoError() { |
|
|
demoError.classList.remove('active'); |
|
|
} |
|
|
|
|
|
|
|
|
const languageToast = document.getElementById('languageToast'); |
|
|
const languageToastMessage = document.getElementById('languageToastMessage'); |
|
|
let languageToastTimeout = null; |
|
|
|
|
|
function showLanguageToast(fromLang, toLang) { |
|
|
if (!languageToast || !languageToastMessage) return; |
|
|
|
|
|
const fromName = LANGUAGE_NAMES[fromLang] || fromLang; |
|
|
const toName = LANGUAGE_NAMES[toLang] || toLang; |
|
|
|
|
|
languageToastMessage.innerHTML = `Language auto-detected: <strong>${toName}</strong>`; |
|
|
|
|
|
|
|
|
if (languageToastTimeout) { |
|
|
clearTimeout(languageToastTimeout); |
|
|
} |
|
|
|
|
|
|
|
|
languageToast.classList.add('show'); |
|
|
|
|
|
|
|
|
languageToastTimeout = setTimeout(() => { |
|
|
languageToast.classList.remove('show'); |
|
|
}, 3000); |
|
|
} |
|
|
|
|
|
function showWasmWarning() { |
|
|
if (wasmWarningBanner) { |
|
|
wasmWarningBanner.style.display = 'flex'; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
function validateCharacters(text) { |
|
|
if (!processors || !processors.textProcessor) { |
|
|
return { valid: true, unsupportedChars: [] }; |
|
|
} |
|
|
|
|
|
try { |
|
|
|
|
|
const uniqueChars = [...new Set(text)]; |
|
|
|
|
|
|
|
|
|
|
|
const processedToOriginal = new Map(); |
|
|
const charToProcessed = new Map(); |
|
|
|
|
|
for (const char of uniqueChars) { |
|
|
const processedChar = preprocessText(char); |
|
|
charToProcessed.set(char, processedChar); |
|
|
|
|
|
|
|
|
for (const pc of processedChar) { |
|
|
if (!processedToOriginal.has(pc)) { |
|
|
processedToOriginal.set(pc, new Set()); |
|
|
} |
|
|
processedToOriginal.get(pc).add(char); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const fullProcessedText = Array.from(text).map(c => charToProcessed.get(c)).join(''); |
|
|
|
|
|
|
|
|
const { unsupportedChars } = processors.textProcessor.call([fullProcessedText]); |
|
|
|
|
|
|
|
|
const unsupportedOriginalChars = new Set(); |
|
|
if (unsupportedChars && unsupportedChars.length > 0) { |
|
|
for (const unsupportedChar of unsupportedChars) { |
|
|
const originalChars = processedToOriginal.get(unsupportedChar); |
|
|
if (originalChars) { |
|
|
originalChars.forEach(c => unsupportedOriginalChars.add(c)); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
const unsupportedCharsArray = Array.from(unsupportedOriginalChars); |
|
|
return { |
|
|
valid: unsupportedCharsArray.length === 0, |
|
|
unsupportedChars: unsupportedCharsArray |
|
|
}; |
|
|
} catch (error) { |
|
|
return { valid: true, unsupportedChars: [] }; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
function updateCharCounter() { |
|
|
const rawText = demoTextInput.textContent || demoTextInput.innerText || ''; |
|
|
const text = rawText.replace(/\n$/g, ''); |
|
|
const length = text.length; |
|
|
|
|
|
demoCharCount.textContent = length; |
|
|
|
|
|
|
|
|
const textareaWidth = demoTextInput.offsetWidth; |
|
|
|
|
|
|
|
|
const maxWidthRef = 640; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const isMobile = window.innerWidth <= 572; |
|
|
const mobileMultiplier = isMobile ? 2 : 1; |
|
|
|
|
|
let fontSizeRatio; |
|
|
if (length <= 100) { |
|
|
fontSizeRatio = 0.055 * mobileMultiplier; |
|
|
} else if (length <= 200) { |
|
|
fontSizeRatio = 0.04 * mobileMultiplier; |
|
|
} else if (length < 240) { |
|
|
fontSizeRatio = 0.053125 * mobileMultiplier; |
|
|
} else if (length < 400) { |
|
|
fontSizeRatio = 0.0425 * mobileMultiplier; |
|
|
} else if (length < 700) { |
|
|
fontSizeRatio = 0.031875 * mobileMultiplier; |
|
|
} else { |
|
|
fontSizeRatio = 0.025 * mobileMultiplier; |
|
|
} |
|
|
|
|
|
|
|
|
const fontSize = textareaWidth * fontSizeRatio; |
|
|
demoTextInput.style.fontSize = `${fontSize}px`; |
|
|
|
|
|
|
|
|
demoCharCounter.classList.remove('error', 'warning', 'valid'); |
|
|
|
|
|
|
|
|
let hasUnsupportedChars = false; |
|
|
if (models && processors && length > 0) { |
|
|
const validation = validateCharacters(text); |
|
|
if (!validation.valid && validation.unsupportedChars.length > 0) { |
|
|
hasUnsupportedChars = true; |
|
|
const charList = validation.unsupportedChars.slice(0, 5).map(c => `"${c}"`).join(', '); |
|
|
const moreChars = validation.unsupportedChars.length > 5 ? ` and ${validation.unsupportedChars.length - 5} more` : ''; |
|
|
showDemoError(`Unsupported characters detected: ${charList}${moreChars}. Please remove them before generating speech.`); |
|
|
} else { |
|
|
hideDemoError(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (length < MIN_CHARS) { |
|
|
demoCharCounter.classList.add('error'); |
|
|
demoCharWarning.textContent = '(At least 10 characters)'; |
|
|
demoGenerateBtn.disabled = true; |
|
|
} else if (hasUnsupportedChars) { |
|
|
demoCharCounter.classList.add('error'); |
|
|
demoCharWarning.textContent = '(Unsupported characters)'; |
|
|
demoGenerateBtn.disabled = true; |
|
|
} else { |
|
|
demoCharCounter.classList.add('valid'); |
|
|
demoCharWarning.textContent = ''; |
|
|
|
|
|
demoGenerateBtn.disabled = !models || isGenerating; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
function validateTextInput(text) { |
|
|
if (!text || text.trim().length === 0) { |
|
|
return { valid: false, message: 'Please enter some text.' }; |
|
|
} |
|
|
if (text.length < MIN_CHARS) { |
|
|
return { valid: false, message: `Text must be at least ${MIN_CHARS} characters long. (Currently ${text.length})` }; |
|
|
} |
|
|
return { valid: true }; |
|
|
} |
|
|
|
|
|
|
|
|
async function loadStyleEmbeddings(voice) { |
|
|
try { |
|
|
|
|
|
if (refEmbeddingCache[voice]) { |
|
|
return refEmbeddingCache[voice]; |
|
|
} |
|
|
|
|
|
const embeddingPath = REF_EMBEDDING_PATHS[voice]; |
|
|
if (!embeddingPath) { |
|
|
throw new Error(`No embedding path configured for voice: ${voice}`); |
|
|
} |
|
|
|
|
|
const response = await fetch(embeddingPath); |
|
|
if (!response.ok) { |
|
|
throw new Error(`Failed to fetch embedding: ${response.statusText}`); |
|
|
} |
|
|
|
|
|
const embeddingData = await response.json(); |
|
|
|
|
|
|
|
|
|
|
|
const styleTtlData = embeddingData.style_ttl.data.flat(Infinity); |
|
|
const styleTtlTensor = new ort.Tensor( |
|
|
embeddingData.style_ttl.type || 'float32', |
|
|
Float32Array.from(styleTtlData), |
|
|
embeddingData.style_ttl.dims |
|
|
); |
|
|
|
|
|
const styleDpData = embeddingData.style_dp.data.flat(Infinity); |
|
|
const styleDpTensor = new ort.Tensor( |
|
|
embeddingData.style_dp.type || 'float32', |
|
|
Float32Array.from(styleDpData), |
|
|
embeddingData.style_dp.dims |
|
|
); |
|
|
|
|
|
const embeddings = { |
|
|
styleTtl: styleTtlTensor, |
|
|
styleDp: styleDpTensor |
|
|
}; |
|
|
|
|
|
|
|
|
refEmbeddingCache[voice] = embeddings; |
|
|
|
|
|
return embeddings; |
|
|
} catch (error) { |
|
|
throw error; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
async function switchVoice(voice) { |
|
|
try { |
|
|
const embeddings = await loadStyleEmbeddings(voice); |
|
|
|
|
|
currentStyleTtlTensor = embeddings.styleTtl; |
|
|
currentStyleDpTensor = embeddings.styleDp; |
|
|
currentVoice = voice; |
|
|
|
|
|
|
|
|
if (typeof window.updateActiveSpeaker === 'function') { |
|
|
window.updateActiveSpeaker(voice); |
|
|
} |
|
|
|
|
|
|
|
|
updateCharCounter(); |
|
|
} catch (error) { |
|
|
showDemoError(`Failed to load voice ${voice}: ${error.message}`); |
|
|
throw error; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
async function checkWebGPUSupport() { |
|
|
try { |
|
|
|
|
|
const isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent) || |
|
|
(navigator.platform === 'MacIntel' && navigator.maxTouchPoints > 1); |
|
|
const isSafari = /^((?!chrome|crios|android|edg|firefox).)*safari/i.test(navigator.userAgent); |
|
|
|
|
|
|
|
|
if (isIOS) { |
|
|
return { supported: false, reason: 'iOS does not support the required WebGPU features' }; |
|
|
} |
|
|
if (isSafari) { |
|
|
|
|
|
return { supported: false, reason: 'Safari does not support the required WebGPU features' }; |
|
|
} |
|
|
|
|
|
|
|
|
if (!navigator.gpu) { |
|
|
return { supported: false, reason: 'WebGPU not available in this browser' }; |
|
|
} |
|
|
|
|
|
|
|
|
const adapter = await navigator.gpu.requestAdapter(); |
|
|
if (!adapter) { |
|
|
return { supported: false, reason: 'No WebGPU adapter found' }; |
|
|
} |
|
|
|
|
|
|
|
|
try { |
|
|
const adapterInfo = await adapter.requestAdapterInfo(); |
|
|
} catch (infoError) { |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
const device = await adapter.requestDevice(); |
|
|
if (!device) { |
|
|
return { supported: false, reason: 'Failed to create WebGPU device' }; |
|
|
} |
|
|
|
|
|
return { supported: true, adapter, device }; |
|
|
} catch (error) { |
|
|
|
|
|
const errorMsg = error.message || ''; |
|
|
if (errorMsg.includes('subgroupMinSize') || errorMsg.includes('subgroup')) { |
|
|
return { supported: false, reason: 'iOS/Safari does not support required WebGPU features (subgroup operations)' }; |
|
|
} |
|
|
return { supported: false, reason: error.message }; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
async function warmupModels() { |
|
|
try { |
|
|
const dummyText = 'Looking to integrate Supertonic into your product? We offer customized on-device SDK solutions tailored to your business needs. Our lightweight, high-performance TTS technology can be seamlessly integrated into mobile apps, IoT devices, automotive systems, and more. Try it now, and enjoy its speed.'; |
|
|
const totalStep = 5; |
|
|
const durationFactor = 1.0; |
|
|
|
|
|
const textList = [dummyText]; |
|
|
const bsz = 1; |
|
|
|
|
|
|
|
|
const styleTtlTensor = currentStyleTtlTensor; |
|
|
const styleDpTensor = currentStyleDpTensor; |
|
|
|
|
|
|
|
|
const { textIds, textMask } = processors.textProcessor.call(textList, currentLanguage); |
|
|
|
|
|
const textIdsShape = [bsz, textIds[0].length]; |
|
|
const textMaskShape = [bsz, 1, textMask[0][0].length]; |
|
|
const textMaskTensor = arrayToTensor(textMask, textMaskShape); |
|
|
|
|
|
const dpResult = await models.dpOrt.run({ |
|
|
text_ids: intArrayToTensor(textIds, textIdsShape), |
|
|
style_dp: styleDpTensor, |
|
|
text_mask: textMaskTensor |
|
|
}); |
|
|
|
|
|
const durOnnx = Array.from(dpResult.duration.data); |
|
|
for (let i = 0; i < durOnnx.length; i++) { |
|
|
durOnnx[i] *= durationFactor; |
|
|
} |
|
|
const durReshaped = []; |
|
|
for (let b = 0; b < bsz; b++) { |
|
|
durReshaped.push([[durOnnx[b]]]); |
|
|
} |
|
|
|
|
|
|
|
|
const textEncResult = await models.textEncOrt.run({ |
|
|
text_ids: intArrayToTensor(textIds, textIdsShape), |
|
|
style_ttl: styleTtlTensor, |
|
|
text_mask: textMaskTensor |
|
|
}); |
|
|
|
|
|
const textEmbTensor = textEncResult.text_emb; |
|
|
|
|
|
|
|
|
let { noisyLatent, latentMask } = sampleNoisyLatent(durReshaped, cfgs); |
|
|
const latentShape = [bsz, noisyLatent[0].length, noisyLatent[0][0].length]; |
|
|
const latentMaskShape = [bsz, 1, latentMask[0][0].length]; |
|
|
const latentMaskTensor = arrayToTensor(latentMask, latentMaskShape); |
|
|
|
|
|
const totalStepArray = new Array(bsz).fill(totalStep); |
|
|
const scalarShape = [bsz]; |
|
|
const totalStepTensor = arrayToTensor(totalStepArray, scalarShape); |
|
|
|
|
|
for (let step = 0; step < totalStep; step++) { |
|
|
const currentStepArray = new Array(bsz).fill(step); |
|
|
|
|
|
const vectorEstResult = await models.vectorEstOrt.run({ |
|
|
noisy_latent: arrayToTensor(noisyLatent, latentShape), |
|
|
text_emb: textEmbTensor, |
|
|
style_ttl: styleTtlTensor, |
|
|
text_mask: textMaskTensor, |
|
|
latent_mask: latentMaskTensor, |
|
|
total_step: totalStepTensor, |
|
|
current_step: arrayToTensor(currentStepArray, scalarShape) |
|
|
}); |
|
|
|
|
|
const denoisedLatent = Array.from(vectorEstResult.denoised_latent.data); |
|
|
|
|
|
|
|
|
let idx = 0; |
|
|
for (let b = 0; b < noisyLatent.length; b++) { |
|
|
for (let d = 0; d < noisyLatent[b].length; d++) { |
|
|
for (let t = 0; t < noisyLatent[b][d].length; t++) { |
|
|
noisyLatent[b][d][t] = denoisedLatent[idx++]; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const vocoderResult = await models.vocoderOrt.run({ |
|
|
latent: arrayToTensor(noisyLatent, latentShape) |
|
|
}); |
|
|
|
|
|
|
|
|
} catch (error) { |
|
|
console.warn('Warmup failed (non-critical):', error.message); |
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
async function initializeModels() { |
|
|
|
|
|
if (modelsLoading && modelsLoadPromise) { |
|
|
return modelsLoadPromise; |
|
|
} |
|
|
|
|
|
|
|
|
if (modelsLoaded && models) { |
|
|
return; |
|
|
} |
|
|
|
|
|
modelsLoading = true; |
|
|
|
|
|
const speakerItemsForLoading = document.querySelectorAll('.speaker-item[data-voice]'); |
|
|
speakerItemsForLoading.forEach(item => item.classList.add('disabled')); |
|
|
|
|
|
|
|
|
const languageItemsForLoading = document.querySelectorAll('.speaker-item[data-language]'); |
|
|
languageItemsForLoading.forEach(item => item.classList.add('disabled')); |
|
|
|
|
|
modelsLoadPromise = (async () => { |
|
|
try { |
|
|
showDemoStatus('<strong>Loading configuration...</strong>', 'info', 5); |
|
|
|
|
|
const basePath = 'assets/onnx'; |
|
|
|
|
|
|
|
|
cfgs = await loadCfgs(basePath); |
|
|
|
|
|
|
|
|
showDemoStatus('<strong>Checking WebGPU support...</strong>', 'info', 8); |
|
|
const webgpuCheck = await checkWebGPUSupport(); |
|
|
|
|
|
|
|
|
const useWebGPU = webgpuCheck.supported; |
|
|
const executionProvider = useWebGPU ? 'webgpu' : 'wasm'; |
|
|
|
|
|
|
|
|
if (!useWebGPU) { |
|
|
showWasmWarning(); |
|
|
} |
|
|
|
|
|
|
|
|
const backendName = useWebGPU ? 'WebGPU' : 'WASM'; |
|
|
showDemoStatus(`<strong>${backendName} detected! Loading models...</strong>`, 'info', 10); |
|
|
|
|
|
const modelsLoadPromise = loadOnnxAll(basePath, { |
|
|
executionProviders: [executionProvider], |
|
|
graphOptimizationLevel: 'all' |
|
|
}, (modelName, current, total) => { |
|
|
const progress = 10 + (current / total) * 70; |
|
|
showDemoStatus(`<strong>Loading models with ${backendName} (${current}/${total}):</strong> ${modelName}...`, 'info', progress); |
|
|
}); |
|
|
|
|
|
|
|
|
const [loadedModels, loadedProcessors] = await Promise.all([ |
|
|
modelsLoadPromise, |
|
|
loadProcessors(basePath) |
|
|
]); |
|
|
|
|
|
models = loadedModels; |
|
|
processors = loadedProcessors; |
|
|
showDemoStatus('<strong>Loading reference embeddings...</strong>', 'info', 85); |
|
|
|
|
|
|
|
|
const embeddings = await loadStyleEmbeddings(currentVoice); |
|
|
currentStyleTtlTensor = embeddings.styleTtl; |
|
|
currentStyleDpTensor = embeddings.styleDp; |
|
|
|
|
|
showDemoStatus('<strong>Warming up models...</strong>', 'info', 90); |
|
|
|
|
|
|
|
|
await warmupModels(); |
|
|
|
|
|
hideDemoStatus(); |
|
|
|
|
|
demoGenerateBtn.disabled = false; |
|
|
demoTotalSteps.disabled = false; |
|
|
demoSpeed.disabled = false; |
|
|
|
|
|
|
|
|
const voiceToggleTexts = document.querySelectorAll('.voice-toggle-text'); |
|
|
voiceToggleTexts.forEach(text => text.classList.remove('disabled')); |
|
|
|
|
|
|
|
|
updateCharCounter(); |
|
|
|
|
|
|
|
|
modelsLoaded = true; |
|
|
modelsLoading = false; |
|
|
|
|
|
|
|
|
speakerItemsForLoading.forEach(item => item.classList.remove('disabled')); |
|
|
|
|
|
|
|
|
languageItemsForLoading.forEach(item => item.classList.remove('disabled')); |
|
|
|
|
|
} catch (error) { |
|
|
modelsLoading = false; |
|
|
|
|
|
speakerItemsForLoading.forEach(item => item.classList.remove('disabled')); |
|
|
|
|
|
|
|
|
languageItemsForLoading.forEach(item => item.classList.remove('disabled')); |
|
|
showDemoStatus(`<strong>Error:</strong> ${error.message}`, 'error'); |
|
|
showDemoError(`Failed to initialize: ${error.message}. Check console for details.`); |
|
|
throw error; |
|
|
} |
|
|
})(); |
|
|
|
|
|
return modelsLoadPromise; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
async function generateSupertonicSpeech(text, totalStep, durationFactor) { |
|
|
const supertonicStartTime = Date.now(); |
|
|
|
|
|
try { |
|
|
const textList = [text]; |
|
|
const bsz = 1; |
|
|
const sampleRate = cfgs.ae.sample_rate; |
|
|
|
|
|
|
|
|
const styleTtlTensor = currentStyleTtlTensor; |
|
|
const styleDpTensor = currentStyleDpTensor; |
|
|
|
|
|
|
|
|
const { textIds, textMask, unsupportedChars } = processors.textProcessor.call(textList, currentLanguage); |
|
|
|
|
|
|
|
|
if (unsupportedChars && unsupportedChars.length > 0) { |
|
|
const charList = unsupportedChars.map(c => `"${c}"`).join(', '); |
|
|
throw new Error(`Unsupported characters: ${charList}`); |
|
|
} |
|
|
|
|
|
const textIdsShape = [bsz, textIds[0].length]; |
|
|
const textMaskShape = [bsz, 1, textMask[0][0].length]; |
|
|
const textMaskTensor = arrayToTensor(textMask, textMaskShape); |
|
|
|
|
|
const dpResult = await models.dpOrt.run({ |
|
|
text_ids: intArrayToTensor(textIds, textIdsShape), |
|
|
style_dp: styleDpTensor, |
|
|
text_mask: textMaskTensor |
|
|
}); |
|
|
|
|
|
const durOnnx = Array.from(dpResult.duration.data); |
|
|
|
|
|
for (let i = 0; i < durOnnx.length; i++) { |
|
|
durOnnx[i] *= durationFactor; |
|
|
} |
|
|
const durReshaped = []; |
|
|
for (let b = 0; b < bsz; b++) { |
|
|
durReshaped.push([[durOnnx[b]]]); |
|
|
} |
|
|
|
|
|
|
|
|
const textEncResult = await models.textEncOrt.run({ |
|
|
text_ids: intArrayToTensor(textIds, textIdsShape), |
|
|
style_ttl: styleTtlTensor, |
|
|
text_mask: textMaskTensor |
|
|
}); |
|
|
|
|
|
const textEmbTensor = textEncResult.text_emb; |
|
|
|
|
|
|
|
|
let { noisyLatent, latentMask } = sampleNoisyLatent(durReshaped, cfgs); |
|
|
const latentDim = noisyLatent[0].length; |
|
|
const latentLen = noisyLatent[0][0].length; |
|
|
const latentShape = [bsz, latentDim, latentLen]; |
|
|
const latentMaskShape = [bsz, 1, latentMask[0][0].length]; |
|
|
const latentMaskTensor = arrayToTensor(latentMask, latentMaskShape); |
|
|
|
|
|
|
|
|
const latentBufferSize = bsz * latentDim * latentLen; |
|
|
const latentBuffer = new Float32Array(latentBufferSize); |
|
|
|
|
|
|
|
|
let initIdx = 0; |
|
|
for (let b = 0; b < bsz; b++) { |
|
|
for (let d = 0; d < latentDim; d++) { |
|
|
for (let t = 0; t < latentLen; t++) { |
|
|
latentBuffer[initIdx++] = noisyLatent[b][d][t]; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const scalarShape = [bsz]; |
|
|
const totalStepTensor = arrayToTensor(new Array(bsz).fill(totalStep), scalarShape); |
|
|
|
|
|
|
|
|
const stepTensors = []; |
|
|
for (let step = 0; step < totalStep; step++) { |
|
|
stepTensors.push(arrayToTensor(new Array(bsz).fill(step), scalarShape)); |
|
|
} |
|
|
|
|
|
for (let step = 0; step < totalStep; step++) { |
|
|
|
|
|
const noisyLatentTensor = new ort.Tensor('float32', latentBuffer, latentShape); |
|
|
|
|
|
const vectorEstResult = await models.vectorEstOrt.run({ |
|
|
noisy_latent: noisyLatentTensor, |
|
|
text_emb: textEmbTensor, |
|
|
style_ttl: styleTtlTensor, |
|
|
text_mask: textMaskTensor, |
|
|
latent_mask: latentMaskTensor, |
|
|
total_step: totalStepTensor, |
|
|
current_step: stepTensors[step] |
|
|
}); |
|
|
|
|
|
|
|
|
const denoisedData = vectorEstResult.denoised_latent.data; |
|
|
latentBuffer.set(denoisedData); |
|
|
} |
|
|
|
|
|
|
|
|
const vocoderResult = await models.vocoderOrt.run({ |
|
|
latent: new ort.Tensor('float32', latentBuffer, latentShape) |
|
|
}); |
|
|
|
|
|
const wavBatch = vocoderResult.wav_tts.data; |
|
|
const wavLen = Math.floor(sampleRate * durOnnx[0]); |
|
|
|
|
|
const audioData = wavBatch.slice(0, wavLen); |
|
|
|
|
|
|
|
|
const supertonicEndTime = Date.now(); |
|
|
const supertonicProcessingTime = (supertonicEndTime - supertonicStartTime) / 1000; |
|
|
const audioDurationSec = durOnnx[0]; |
|
|
|
|
|
return { |
|
|
success: true, |
|
|
processingTime: supertonicProcessingTime, |
|
|
audioDuration: audioDurationSec, |
|
|
audioData: audioData, |
|
|
sampleRate: sampleRate, |
|
|
text: text |
|
|
}; |
|
|
} catch (error) { |
|
|
return { |
|
|
success: false, |
|
|
error: error.message, |
|
|
text: text |
|
|
}; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
function formatTimeDetailed(seconds) { |
|
|
const hours = Math.floor(seconds / 3600); |
|
|
const mins = Math.floor((seconds % 3600) / 60); |
|
|
const secs = seconds % 60; |
|
|
const ms = Math.floor((secs % 1) * 100); |
|
|
const wholeSecs = Math.floor(secs); |
|
|
|
|
|
if (seconds < 60) { |
|
|
return `${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`; |
|
|
} else if (seconds < 3600) { |
|
|
return `${mins.toString().padStart(2, '0')}:${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`; |
|
|
} else { |
|
|
return `${hours.toString().padStart(2, '0')}:${mins.toString().padStart(2, '0')}:${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
async function generateSupertonicSpeechChunked(text, totalStep, durationFactor, onFirstChunkReady, onChunkAdded) { |
|
|
const supertonicStartTime = Date.now(); |
|
|
const sampleRate = cfgs.ae.sample_rate; |
|
|
const silenceDuration = 0.3; |
|
|
|
|
|
try { |
|
|
|
|
|
const chunks = chunkText(text); |
|
|
|
|
|
const audioDataArrays = []; |
|
|
const durations = []; |
|
|
const silenceSamples = Math.floor(silenceDuration * sampleRate); |
|
|
let firstChunkEndTime = 0; |
|
|
let firstChunkTime = 0; |
|
|
|
|
|
|
|
|
for (let i = 0; i < chunks.length; i++) { |
|
|
const chunkText = chunks[i]; |
|
|
|
|
|
const result = await generateSupertonicSpeech(chunkText, totalStep, durationFactor); |
|
|
|
|
|
if (!result.success) { |
|
|
throw new Error(`Failed to generate chunk ${i + 1}: ${result.error}`); |
|
|
} |
|
|
|
|
|
|
|
|
const audioData = result.audioData; |
|
|
|
|
|
audioDataArrays.push(audioData); |
|
|
durations.push(result.audioDuration); |
|
|
|
|
|
|
|
|
if (i === 0 && onFirstChunkReady) { |
|
|
|
|
|
firstChunkEndTime = Date.now(); |
|
|
firstChunkTime = (firstChunkEndTime - supertonicStartTime) / 1000; |
|
|
|
|
|
const totalDurationSoFar = result.audioDuration; |
|
|
const processedChars = chunks[0].length; |
|
|
|
|
|
onFirstChunkReady(audioData, sampleRate, totalDurationSoFar, text, chunks.length, firstChunkTime, processedChars); |
|
|
} else if (i > 0 && onChunkAdded) { |
|
|
|
|
|
const totalDurationSoFar = durations.slice(0, i + 1).reduce((sum, dur) => sum + dur, 0) + silenceDuration * i; |
|
|
const currentProcessingTime = (Date.now() - supertonicStartTime) / 1000; |
|
|
const processedChars = chunks.slice(0, i + 1).reduce((sum, chunk) => sum + chunk.length, 0); |
|
|
|
|
|
onChunkAdded(audioData, sampleRate, totalDurationSoFar, i + 1, chunks.length, currentProcessingTime, processedChars); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const totalDuration = durations.reduce((sum, dur) => sum + dur, 0) + silenceDuration * (chunks.length - 1); |
|
|
|
|
|
|
|
|
let totalSamples = 0; |
|
|
for (let i = 0; i < audioDataArrays.length; i++) { |
|
|
totalSamples += audioDataArrays[i].length; |
|
|
if (i < audioDataArrays.length - 1) { |
|
|
totalSamples += silenceSamples; |
|
|
} |
|
|
} |
|
|
|
|
|
const wavCat = new Float32Array(totalSamples); |
|
|
|
|
|
let currentIdx = 0; |
|
|
for (let i = 0; i < audioDataArrays.length; i++) { |
|
|
|
|
|
const audioData = audioDataArrays[i]; |
|
|
wavCat.set(audioData, currentIdx); |
|
|
currentIdx += audioData.length; |
|
|
|
|
|
|
|
|
if (i < audioDataArrays.length - 1) { |
|
|
|
|
|
currentIdx += silenceSamples; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const wavBuffer = writeWavFile(wavCat, sampleRate); |
|
|
const blob = new Blob([wavBuffer], { type: 'audio/wav' }); |
|
|
const url = URL.createObjectURL(blob); |
|
|
|
|
|
const supertonicEndTime = Date.now(); |
|
|
const supertonicProcessingTime = (supertonicEndTime - supertonicStartTime) / 1000; |
|
|
|
|
|
return { |
|
|
success: true, |
|
|
processingTime: supertonicProcessingTime, |
|
|
audioDuration: totalDuration, |
|
|
url: url, |
|
|
text: text, |
|
|
firstChunkTime: firstChunkTime |
|
|
}; |
|
|
} catch (error) { |
|
|
return { |
|
|
success: false, |
|
|
error: error.message, |
|
|
text: text |
|
|
}; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
async function generateSpeech() { |
|
|
let text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); |
|
|
|
|
|
|
|
|
const validation = validateTextInput(text); |
|
|
if (!validation.valid) { |
|
|
showDemoError(validation.message); |
|
|
return; |
|
|
} |
|
|
|
|
|
if (!models || !cfgs || !processors) { |
|
|
showDemoError('Models are still loading. Please wait.'); |
|
|
return; |
|
|
} |
|
|
|
|
|
if (!currentStyleTtlTensor || !currentStyleDpTensor) { |
|
|
showDemoError('Reference embeddings are not ready. Please wait.'); |
|
|
return; |
|
|
} |
|
|
|
|
|
|
|
|
const charValidation = validateCharacters(text); |
|
|
if (!charValidation.valid && charValidation.unsupportedChars.length > 0) { |
|
|
const charList = charValidation.unsupportedChars.map(c => `"${c}"`).join(', '); |
|
|
showDemoError(`Cannot generate speech: Unsupported characters found: ${charList}`); |
|
|
return; |
|
|
} |
|
|
|
|
|
currentGenerationTextLength = text.length; |
|
|
|
|
|
try { |
|
|
isGenerating = true; |
|
|
demoGenerateBtn.disabled = true; |
|
|
|
|
|
|
|
|
const speakerItemsForGeneration = document.querySelectorAll('.speaker-item[data-voice]'); |
|
|
speakerItemsForGeneration.forEach(item => item.classList.add('disabled')); |
|
|
|
|
|
|
|
|
const languageItemsForGeneration = document.querySelectorAll('.speaker-item[data-language]'); |
|
|
languageItemsForGeneration.forEach(item => item.classList.add('disabled')); |
|
|
|
|
|
hideDemoError(); |
|
|
hideDemoStatus(); |
|
|
|
|
|
|
|
|
if (audioContext) { |
|
|
|
|
|
scheduledSources.forEach(source => { |
|
|
try { |
|
|
source.stop(); |
|
|
} catch (e) { |
|
|
|
|
|
} |
|
|
}); |
|
|
scheduledSources = []; |
|
|
|
|
|
|
|
|
if (audioContext.state !== 'closed') { |
|
|
audioContext.close(); |
|
|
} |
|
|
audioContext = null; |
|
|
} |
|
|
|
|
|
|
|
|
if (animationFrameId) { |
|
|
cancelAnimationFrame(animationFrameId); |
|
|
animationFrameId = null; |
|
|
} |
|
|
|
|
|
|
|
|
customAudioPlayers.forEach(player => { |
|
|
if (player.cleanup) { |
|
|
player.cleanup(); |
|
|
} |
|
|
}); |
|
|
customAudioPlayers = []; |
|
|
|
|
|
|
|
|
audioChunks = []; |
|
|
totalDuration = 0; |
|
|
startTime = 0; |
|
|
pauseTime = 0; |
|
|
isPaused = false; |
|
|
isPlaying = false; |
|
|
firstChunkGenerationTime = 0; |
|
|
totalChunks = 0; |
|
|
nextScheduledTime = 0; |
|
|
|
|
|
|
|
|
const createInitialResultItem = (system, titleMain, titleSub, titleColor, includeStatus) => { |
|
|
const titleStatus = includeStatus |
|
|
? `<span class="title-status status-running" id="${system}-status">⏳ Running...</span>` |
|
|
: ''; |
|
|
return ` |
|
|
<div class="demo-result-item ${system}-result-item generating" id="${system}-result" style="--result-progress: 0%;"> |
|
|
<div class="demo-result-title"> |
|
|
<span class="title-main" style="color: ${titleColor};">${titleMain}</span> |
|
|
<span class="title-sub">${titleSub}</span> |
|
|
${titleStatus} |
|
|
</div> |
|
|
<div class="demo-result-info"> |
|
|
<!-- |
|
|
<div class="stat"> |
|
|
<div class="stat-value" id="${system}-chars">--</div> |
|
|
<div class="stat-label">Processed Chars</div> |
|
|
</div> |
|
|
--> |
|
|
<div class="stat"> |
|
|
<div class="stat-value" id="${system}-time">--</div> |
|
|
<div class="stat-label">Processing Time<span class="stat-arrow stat-arrow--down">↓</span></div> |
|
|
</div> |
|
|
<div class="stat"> |
|
|
<div class="stat-value" id="${system}-cps">--</div> |
|
|
<div class="stat-label">Chars/sec<span class="stat-arrow stat-arrow--up">↑</span></div> |
|
|
</div> |
|
|
<div class="stat"> |
|
|
<div class="stat-value" id="${system}-rtf">--</div> |
|
|
<div class="stat-label">RTF<span class="stat-arrow stat-arrow--down">↓</span></div> |
|
|
</div> |
|
|
</div> |
|
|
<div class="custom-audio-player"> |
|
|
<div class="demo-placeholder-audio">Generating speech...</div> |
|
|
</div> |
|
|
</div> |
|
|
`; |
|
|
}; |
|
|
const supertonicInitial = createInitialResultItem( |
|
|
'supertonic', |
|
|
'Supertonic', |
|
|
'On-Device', |
|
|
'var(--accent-yellow)', |
|
|
false |
|
|
); |
|
|
demoResults.style.display = 'flex'; |
|
|
demoResults.innerHTML = supertonicInitial; |
|
|
|
|
|
const totalStep = parseInt(demoTotalSteps.value); |
|
|
const speed = parseFloat(demoSpeed.value); |
|
|
const durationFactor = speedToDurationFactor(speed); |
|
|
|
|
|
|
|
|
let latestSupertonicProcessedChars = 0; |
|
|
|
|
|
|
|
|
const formatTime = (seconds, { trimMobile = false } = {}) => { |
|
|
const mins = Math.floor(seconds / 60); |
|
|
const secs = seconds % 60; |
|
|
const secString = secs.toFixed(2).padStart(5, '0'); |
|
|
let formatted = `${mins}:${secString}`; |
|
|
if (trimMobile) { |
|
|
formatted = trimDecimalsForMobile(formatted); |
|
|
} |
|
|
return formatted; |
|
|
}; |
|
|
|
|
|
const updateProgress = () => { |
|
|
if (!isPlaying || !audioContext) return; |
|
|
|
|
|
const currentTime = isPaused ? pauseTime : (audioContext.currentTime - startTime); |
|
|
const progress = totalDuration > 0 ? (currentTime / totalDuration) * 100 : 0; |
|
|
|
|
|
if (progressFill) { |
|
|
progressFill.style.width = `${Math.min(progress, 100)}%`; |
|
|
} |
|
|
if (currentTimeDisplay) { |
|
|
currentTimeDisplay.textContent = formatTime(Math.min(currentTime, totalDuration), { trimMobile: true }); |
|
|
} |
|
|
|
|
|
if (currentTime < totalDuration) { |
|
|
animationFrameId = requestAnimationFrame(updateProgress); |
|
|
} else { |
|
|
|
|
|
isPlaying = false; |
|
|
isPaused = false; |
|
|
if (playPauseBtn) { |
|
|
playPauseBtn.innerHTML = PLAY_ICON_SVG; |
|
|
} |
|
|
} |
|
|
}; |
|
|
|
|
|
const togglePlayPause = () => { |
|
|
if (!audioContext || audioChunks.length === 0) return; |
|
|
|
|
|
if (isPaused) { |
|
|
|
|
|
pauseAllPlayersExcept(supertonicPlayerRecord); |
|
|
|
|
|
const seekTime = pauseTime; |
|
|
|
|
|
|
|
|
let accumulatedTime = 0; |
|
|
let startChunkIndex = 0; |
|
|
let offsetInChunk = seekTime; |
|
|
|
|
|
for (let i = 0; i < audioChunks.length; i++) { |
|
|
const chunkDuration = audioChunks[i].buffer.duration; |
|
|
if (accumulatedTime + chunkDuration > seekTime) { |
|
|
startChunkIndex = i; |
|
|
offsetInChunk = seekTime - accumulatedTime; |
|
|
break; |
|
|
} |
|
|
accumulatedTime += chunkDuration + 0.3; |
|
|
} |
|
|
|
|
|
|
|
|
scheduledSources.forEach(source => { |
|
|
try { |
|
|
source.stop(); |
|
|
} catch (e) { |
|
|
|
|
|
} |
|
|
}); |
|
|
scheduledSources = []; |
|
|
|
|
|
|
|
|
if (audioContext.state === 'suspended') { |
|
|
audioContext.resume(); |
|
|
} |
|
|
|
|
|
|
|
|
startTime = audioContext.currentTime - seekTime; |
|
|
let nextStartTime = audioContext.currentTime; |
|
|
|
|
|
for (let i = startChunkIndex; i < audioChunks.length; i++) { |
|
|
const source = audioContext.createBufferSource(); |
|
|
source.buffer = audioChunks[i].buffer; |
|
|
source.connect(audioContext.destination); |
|
|
|
|
|
if (i === startChunkIndex) { |
|
|
source.start(nextStartTime, offsetInChunk); |
|
|
nextStartTime += (audioChunks[i].buffer.duration - offsetInChunk); |
|
|
} else { |
|
|
source.start(nextStartTime); |
|
|
nextStartTime += audioChunks[i].buffer.duration; |
|
|
} |
|
|
|
|
|
if (i < audioChunks.length - 1) { |
|
|
nextStartTime += 0.3; |
|
|
} |
|
|
|
|
|
scheduledSources.push(source); |
|
|
} |
|
|
|
|
|
nextScheduledTime = nextStartTime; |
|
|
|
|
|
isPaused = false; |
|
|
isPlaying = true; |
|
|
playPauseBtn.innerHTML = PAUSE_ICON_SVG; |
|
|
updateProgress(); |
|
|
} else if (isPlaying) { |
|
|
|
|
|
pauseTime = audioContext.currentTime - startTime; |
|
|
audioContext.suspend(); |
|
|
isPaused = true; |
|
|
playPauseBtn.innerHTML = PLAY_ICON_SVG; |
|
|
if (animationFrameId) { |
|
|
cancelAnimationFrame(animationFrameId); |
|
|
} |
|
|
} else { |
|
|
|
|
|
pauseAllPlayersExcept(supertonicPlayerRecord); |
|
|
|
|
|
pauseTime = 0; |
|
|
|
|
|
|
|
|
if (audioContext.state === 'suspended') { |
|
|
audioContext.resume(); |
|
|
} |
|
|
|
|
|
|
|
|
scheduledSources.forEach(source => { |
|
|
try { |
|
|
source.stop(); |
|
|
} catch (e) { |
|
|
|
|
|
} |
|
|
}); |
|
|
scheduledSources = []; |
|
|
|
|
|
|
|
|
startTime = audioContext.currentTime; |
|
|
let nextStartTime = audioContext.currentTime; |
|
|
|
|
|
for (let i = 0; i < audioChunks.length; i++) { |
|
|
const source = audioContext.createBufferSource(); |
|
|
source.buffer = audioChunks[i].buffer; |
|
|
source.connect(audioContext.destination); |
|
|
source.start(nextStartTime); |
|
|
nextStartTime += audioChunks[i].buffer.duration; |
|
|
|
|
|
if (i < audioChunks.length - 1) { |
|
|
nextStartTime += 0.3; |
|
|
} |
|
|
|
|
|
scheduledSources.push(source); |
|
|
} |
|
|
|
|
|
nextScheduledTime = nextStartTime; |
|
|
|
|
|
isPlaying = true; |
|
|
isPaused = false; |
|
|
playPauseBtn.innerHTML = PAUSE_ICON_SVG; |
|
|
updateProgress(); |
|
|
} |
|
|
}; |
|
|
|
|
|
const seekTo = (percentage) => { |
|
|
if (!audioContext || audioChunks.length === 0) return; |
|
|
|
|
|
const seekTime = (percentage / 100) * totalDuration; |
|
|
|
|
|
|
|
|
const wasPlaying = isPlaying; |
|
|
const wasPaused = isPaused; |
|
|
|
|
|
|
|
|
scheduledSources.forEach(source => { |
|
|
try { |
|
|
source.stop(); |
|
|
} catch (e) { |
|
|
|
|
|
} |
|
|
}); |
|
|
scheduledSources = []; |
|
|
|
|
|
|
|
|
if (animationFrameId) { |
|
|
cancelAnimationFrame(animationFrameId); |
|
|
} |
|
|
|
|
|
|
|
|
let accumulatedTime = 0; |
|
|
let startChunkIndex = 0; |
|
|
let offsetInChunk = seekTime; |
|
|
|
|
|
for (let i = 0; i < audioChunks.length; i++) { |
|
|
const chunkDuration = audioChunks[i].buffer.duration; |
|
|
if (accumulatedTime + chunkDuration > seekTime) { |
|
|
startChunkIndex = i; |
|
|
offsetInChunk = seekTime - accumulatedTime; |
|
|
break; |
|
|
} |
|
|
accumulatedTime += chunkDuration + 0.3; |
|
|
} |
|
|
|
|
|
|
|
|
if (wasPaused || !wasPlaying) { |
|
|
pauseTime = seekTime; |
|
|
|
|
|
|
|
|
if (progressFill) { |
|
|
const progress = (seekTime / totalDuration) * 100; |
|
|
progressFill.style.width = `${Math.min(progress, 100)}%`; |
|
|
} |
|
|
if (currentTimeDisplay) { |
|
|
currentTimeDisplay.textContent = formatTime(seekTime, { trimMobile: true }); |
|
|
} |
|
|
|
|
|
|
|
|
isPaused = true; |
|
|
isPlaying = true; |
|
|
|
|
|
if (playPauseBtn) { |
|
|
playPauseBtn.innerHTML = PLAY_ICON_SVG; |
|
|
} |
|
|
|
|
|
return; |
|
|
} |
|
|
|
|
|
|
|
|
if (audioContext.state === 'suspended') { |
|
|
audioContext.resume(); |
|
|
} |
|
|
|
|
|
|
|
|
startTime = audioContext.currentTime - seekTime; |
|
|
let nextStartTime = audioContext.currentTime; |
|
|
|
|
|
for (let i = startChunkIndex; i < audioChunks.length; i++) { |
|
|
const source = audioContext.createBufferSource(); |
|
|
source.buffer = audioChunks[i].buffer; |
|
|
source.connect(audioContext.destination); |
|
|
|
|
|
if (i === startChunkIndex) { |
|
|
|
|
|
source.start(nextStartTime, offsetInChunk); |
|
|
nextStartTime += (audioChunks[i].buffer.duration - offsetInChunk); |
|
|
} else { |
|
|
source.start(nextStartTime); |
|
|
nextStartTime += audioChunks[i].buffer.duration; |
|
|
} |
|
|
|
|
|
|
|
|
if (i < audioChunks.length - 1) { |
|
|
nextStartTime += 0.3; |
|
|
} |
|
|
|
|
|
scheduledSources.push(source); |
|
|
} |
|
|
|
|
|
|
|
|
nextScheduledTime = nextStartTime; |
|
|
|
|
|
|
|
|
isPlaying = true; |
|
|
isPaused = false; |
|
|
if (playPauseBtn) { |
|
|
playPauseBtn.innerHTML = PAUSE_ICON_SVG; |
|
|
} |
|
|
|
|
|
|
|
|
updateProgress(); |
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
const createAudioBufferFromFloat32 = (audioData, sampleRate) => { |
|
|
const audioBuffer = audioContext.createBuffer(1, audioData.length, sampleRate); |
|
|
audioBuffer.getChannelData(0).set(audioData); |
|
|
return audioBuffer; |
|
|
}; |
|
|
|
|
|
const onFirstChunkReady = async (audioData, sampleRate, duration, text, numChunks, firstChunkTime, processedChars) => { |
|
|
totalChunks = numChunks; |
|
|
firstChunkGenerationTime = firstChunkTime; |
|
|
|
|
|
const container = document.getElementById('demoResults'); |
|
|
|
|
|
|
|
|
const textLength = currentGenerationTextLength > 0 |
|
|
? currentGenerationTextLength |
|
|
: (text ? text.length : 0); |
|
|
const isBatch = textLength >= getMaxChunkLength(); |
|
|
const processingTimeStr = isBatch && firstChunkTime |
|
|
? `${formatTimeDetailed(firstChunkTime)} / ${formatTimeDetailed(firstChunkTime)}` |
|
|
: formatTimeDetailed(firstChunkTime); |
|
|
const safeInitialChars = typeof processedChars === 'number' ? processedChars : 0; |
|
|
const displayedInitialChars = textLength > 0 ? Math.min(safeInitialChars, textLength) : safeInitialChars; |
|
|
const charsPerSec = firstChunkTime > 0 && displayedInitialChars > 0 |
|
|
? (displayedInitialChars / firstChunkTime).toFixed(1) |
|
|
: '0.0'; |
|
|
const rtf = duration > 0 && firstChunkTime > 0 ? (firstChunkTime / duration).toFixed(3) : '-'; |
|
|
const progressValue = textLength > 0 ? Math.min(100, (displayedInitialChars / textLength) * 100) : 0; |
|
|
|
|
|
const resultItemEl = document.getElementById('supertonic-result'); |
|
|
if (!resultItemEl) { |
|
|
console.warn('Supertonic result container not found.'); |
|
|
return; |
|
|
} |
|
|
|
|
|
resultItemEl.classList.remove('generating'); |
|
|
resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); |
|
|
|
|
|
const titleMainEl = resultItemEl.querySelector('.title-main'); |
|
|
if (titleMainEl) { |
|
|
titleMainEl.textContent = 'Supertonic'; |
|
|
titleMainEl.style.color = 'var(--accent-yellow)'; |
|
|
} |
|
|
const titleSubEl = resultItemEl.querySelector('.title-sub'); |
|
|
if (titleSubEl) { |
|
|
titleSubEl.textContent = 'On-Device'; |
|
|
} |
|
|
|
|
|
const infoContainer = resultItemEl.querySelector('.demo-result-info'); |
|
|
if (infoContainer) { |
|
|
infoContainer.classList.remove('error'); |
|
|
} |
|
|
const timeElInitial = document.getElementById('supertonic-time'); |
|
|
if (timeElInitial) { |
|
|
timeElInitial.innerHTML = formatStatValueWithSuffix(processingTimeStr, 's', { firstLabel: true }); |
|
|
} |
|
|
const cpsElInitial = document.getElementById('supertonic-cps'); |
|
|
if (cpsElInitial) { |
|
|
cpsElInitial.textContent = charsPerSec; |
|
|
} |
|
|
const rtfElInitial = document.getElementById('supertonic-rtf'); |
|
|
if (rtfElInitial) { |
|
|
rtfElInitial.innerHTML = formatStatValueWithSuffix(rtf, 'x'); |
|
|
} |
|
|
|
|
|
const playerContainer = resultItemEl.querySelector('.custom-audio-player'); |
|
|
if (playerContainer) { |
|
|
playerContainer.style.display = ''; |
|
|
playerContainer.innerHTML = ` |
|
|
<button id="play-pause-btn" class="player-btn">${PAUSE_ICON_SVG}</button> |
|
|
<div class="time-display" id="current-time">0:00.00</div> |
|
|
<div class="progress-container" id="progress-container"> |
|
|
<div class="progress-bar"> |
|
|
<div class="progress-fill" id="progress-fill"></div> |
|
|
</div> |
|
|
</div> |
|
|
<div class="time-display" id="total-duration">${formatTime(duration, { trimMobile: true })}</div> |
|
|
<div class="demo-result-actions" style="display: none;"> |
|
|
<button class="demo-download-btn" id="supertonic-download" aria-label="Download WAV" title="Download WAV"> |
|
|
<svg width="16" height="16" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24"> |
|
|
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/> |
|
|
<polyline points="7 10 12 15 17 10"/> |
|
|
<line x1="12" y1="15" x2="12" y2="3"/> |
|
|
</svg> |
|
|
</button> |
|
|
</div> |
|
|
`; |
|
|
} |
|
|
|
|
|
container.style.display = 'flex'; |
|
|
latestSupertonicProcessedChars = displayedInitialChars; |
|
|
|
|
|
|
|
|
playPauseBtn = document.getElementById('play-pause-btn'); |
|
|
progressBar = document.getElementById('progress-container'); |
|
|
currentTimeDisplay = document.getElementById('current-time'); |
|
|
durationDisplay = document.getElementById('total-duration'); |
|
|
progressFill = document.getElementById('progress-fill'); |
|
|
|
|
|
|
|
|
audioContext = new (window.AudioContext || window.webkitAudioContext)(); |
|
|
startTime = audioContext.currentTime; |
|
|
totalDuration = duration; |
|
|
isPlaying = true; |
|
|
isPaused = false; |
|
|
|
|
|
|
|
|
const pausePlayback = () => { |
|
|
if (!audioContext || audioContext.state === 'closed') return; |
|
|
if (isPlaying) { |
|
|
pauseTime = audioContext.currentTime - startTime; |
|
|
scheduledSources.forEach(source => { |
|
|
try { |
|
|
source.stop(); |
|
|
} catch (e) { |
|
|
|
|
|
} |
|
|
}); |
|
|
scheduledSources = []; |
|
|
audioContext.suspend(); |
|
|
isPaused = true; |
|
|
isPlaying = false; |
|
|
if (playPauseBtn) { |
|
|
playPauseBtn.innerHTML = PLAY_ICON_SVG; |
|
|
} |
|
|
if (animationFrameId) { |
|
|
cancelAnimationFrame(animationFrameId); |
|
|
} |
|
|
} |
|
|
}; |
|
|
|
|
|
supertonicPlayerRecord = { |
|
|
audioContext: audioContext, |
|
|
pausePlayback: pausePlayback |
|
|
}; |
|
|
|
|
|
|
|
|
customAudioPlayers = customAudioPlayers.filter(p => p !== supertonicPlayerRecord && p.audioContext !== audioContext); |
|
|
customAudioPlayers.push(supertonicPlayerRecord); |
|
|
|
|
|
|
|
|
pauseAllPlayersExcept(supertonicPlayerRecord); |
|
|
|
|
|
|
|
|
const audioBuffer = createAudioBufferFromFloat32(audioData, sampleRate); |
|
|
|
|
|
audioChunks.push({ buffer: audioBuffer, duration: audioBuffer.duration }); |
|
|
|
|
|
|
|
|
const source = audioContext.createBufferSource(); |
|
|
source.buffer = audioBuffer; |
|
|
source.connect(audioContext.destination); |
|
|
source.start(audioContext.currentTime); |
|
|
scheduledSources.push(source); |
|
|
|
|
|
|
|
|
nextScheduledTime = audioContext.currentTime + audioBuffer.duration + 0.3; |
|
|
|
|
|
|
|
|
playPauseBtn.addEventListener('click', togglePlayPause); |
|
|
|
|
|
progressBar.addEventListener('click', (e) => { |
|
|
const rect = progressBar.getBoundingClientRect(); |
|
|
const percentage = ((e.clientX - rect.left) / rect.width) * 100; |
|
|
seekTo(percentage); |
|
|
}); |
|
|
|
|
|
|
|
|
updateProgress(); |
|
|
}; |
|
|
|
|
|
|
|
|
const onChunkAdded = async (audioData, sampleRate, duration, chunkIndex, totalChunks, currentProcessingTime, processedChars) => { |
|
|
if (!audioContext) return; |
|
|
|
|
|
|
|
|
const audioBuffer = createAudioBufferFromFloat32(audioData, sampleRate); |
|
|
|
|
|
const chunkDuration = audioBuffer.duration; |
|
|
audioChunks.push({ buffer: audioBuffer, duration: chunkDuration }); |
|
|
|
|
|
|
|
|
const source = audioContext.createBufferSource(); |
|
|
source.buffer = audioBuffer; |
|
|
source.connect(audioContext.destination); |
|
|
source.start(nextScheduledTime); |
|
|
scheduledSources.push(source); |
|
|
|
|
|
|
|
|
nextScheduledTime = nextScheduledTime + audioBuffer.duration + 0.3; |
|
|
|
|
|
|
|
|
totalDuration = duration; |
|
|
|
|
|
|
|
|
if (durationDisplay) { |
|
|
durationDisplay.textContent = formatTime(duration, { trimMobile: true }); |
|
|
durationDisplay.style.transition = 'color 0.3s'; |
|
|
durationDisplay.style.color = '#ffffff'; |
|
|
setTimeout(() => { |
|
|
durationDisplay.style.color = ''; |
|
|
}, 300); |
|
|
} |
|
|
|
|
|
|
|
|
const textLengthCandidate = currentGenerationTextLength > 0 |
|
|
? currentGenerationTextLength |
|
|
: (demoTextInput.textContent || demoTextInput.innerText || '').trim().length; |
|
|
const textLength = textLengthCandidate; |
|
|
const isBatch = textLength >= getMaxChunkLength(); |
|
|
const timeEl = document.getElementById('supertonic-time'); |
|
|
const durationEl = document.getElementById('supertonic-duration'); |
|
|
const cpsEl = document.getElementById('supertonic-cps'); |
|
|
const rtfEl = document.getElementById('supertonic-rtf'); |
|
|
const effectiveProcessedChars = typeof processedChars === 'number' ? processedChars : latestSupertonicProcessedChars; |
|
|
|
|
|
if (effectiveProcessedChars < latestSupertonicProcessedChars) { |
|
|
return; |
|
|
} |
|
|
|
|
|
const clampedProcessedChars = textLength > 0 ? Math.min(effectiveProcessedChars, textLength) : effectiveProcessedChars; |
|
|
const progressValue = textLength > 0 ? Math.min(100, (clampedProcessedChars / textLength) * 100) : 0; |
|
|
if (durationEl) { |
|
|
durationEl.textContent = formatTimeDetailed(duration); |
|
|
} |
|
|
if (timeEl && isBatch && firstChunkGenerationTime > 0 && currentProcessingTime) { |
|
|
const timeDisplay = `${formatTimeDetailed(firstChunkGenerationTime)} / ${formatTimeDetailed(currentProcessingTime)}`; |
|
|
timeEl.innerHTML = formatStatValueWithSuffix(timeDisplay, 's', { firstLabel: true }); |
|
|
} |
|
|
if (cpsEl && currentProcessingTime > 0 && clampedProcessedChars >= 0) { |
|
|
const charsPerSec = (clampedProcessedChars / currentProcessingTime).toFixed(1); |
|
|
cpsEl.textContent = charsPerSec; |
|
|
} |
|
|
if (rtfEl && duration > 0 && currentProcessingTime > 0) { |
|
|
const rtf = (currentProcessingTime / duration).toFixed(3); |
|
|
rtfEl.innerHTML = formatStatValueWithSuffix(rtf, 'x'); |
|
|
} |
|
|
const resultItemEl = document.getElementById('supertonic-result'); |
|
|
if (resultItemEl) { |
|
|
resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); |
|
|
} |
|
|
latestSupertonicProcessedChars = clampedProcessedChars; |
|
|
}; |
|
|
|
|
|
|
|
|
const result = await generateSupertonicSpeechChunked( |
|
|
text, |
|
|
totalStep, |
|
|
durationFactor, |
|
|
onFirstChunkReady, |
|
|
onChunkAdded |
|
|
); |
|
|
|
|
|
if (result.success) { |
|
|
const textLength = result.text ? result.text.length : 0; |
|
|
const isBatch = textLength >= getMaxChunkLength(); |
|
|
const processingTimeStr = isBatch && firstChunkGenerationTime > 0 |
|
|
? `${formatTimeDetailed(firstChunkGenerationTime)} / ${formatTimeDetailed(result.processingTime)}` |
|
|
: formatTimeDetailed(result.processingTime); |
|
|
const charsPerSec = result.processingTime > 0 ? (textLength / result.processingTime).toFixed(1) : '0.0'; |
|
|
const progressValue = textLength > 0 ? 100 : 0; |
|
|
|
|
|
const timeEl = document.getElementById('supertonic-time'); |
|
|
const durationEl = document.getElementById('supertonic-duration'); |
|
|
const cpsEl = document.getElementById('supertonic-cps'); |
|
|
const rtfEl = document.getElementById('supertonic-rtf'); |
|
|
|
|
|
if (timeEl) timeEl.innerHTML = formatStatValueWithSuffix(processingTimeStr, 's', { firstLabel: true }); |
|
|
if (durationEl) durationEl.textContent = formatTimeDetailed(result.audioDuration); |
|
|
latestSupertonicProcessedChars = textLength; |
|
|
if (cpsEl) cpsEl.textContent = charsPerSec; |
|
|
if (rtfEl) { |
|
|
const rtf = result.audioDuration > 0 ? (result.processingTime / result.audioDuration).toFixed(3) : '-'; |
|
|
rtfEl.innerHTML = formatStatValueWithSuffix(rtf, 'x'); |
|
|
} |
|
|
const resultItemEl = document.getElementById('supertonic-result'); |
|
|
if (resultItemEl) { |
|
|
resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); |
|
|
} |
|
|
|
|
|
|
|
|
if (audioContext && audioChunks.length > 0) { |
|
|
totalDuration = result.audioDuration; |
|
|
if (durationDisplay) { |
|
|
durationDisplay.textContent = formatTime(result.audioDuration, { trimMobile: true }); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const downloadBtn = document.getElementById('supertonic-download'); |
|
|
if (downloadBtn) { |
|
|
downloadBtn.parentElement.style.display = 'block'; |
|
|
downloadBtn.onclick = () => downloadDemoAudio(result.url, 'supertonic_speech.wav'); |
|
|
} |
|
|
} |
|
|
|
|
|
} catch (error) { |
|
|
showDemoStatus(`<strong>Error:</strong> ${error.message}`, 'error'); |
|
|
showDemoError(`Error during synthesis: ${error.message}`); |
|
|
console.error('Synthesis error:', error); |
|
|
|
|
|
|
|
|
demoResults.style.display = 'none'; |
|
|
demoResults.innerHTML = ` |
|
|
<div class="demo-placeholder"> |
|
|
<div class="demo-placeholder-icon">🎙️</div> |
|
|
<p>Your generated speech will appear here</p> |
|
|
</div> |
|
|
`; |
|
|
} finally { |
|
|
isGenerating = false; |
|
|
demoGenerateBtn.disabled = false; |
|
|
|
|
|
|
|
|
const speakerItemsForGeneration = document.querySelectorAll('.speaker-item[data-voice]'); |
|
|
speakerItemsForGeneration.forEach(item => item.classList.remove('disabled')); |
|
|
|
|
|
|
|
|
const languageItemsForGeneration = document.querySelectorAll('.speaker-item[data-language]'); |
|
|
languageItemsForGeneration.forEach(item => item.classList.remove('disabled')); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
window.downloadDemoAudio = function(url, filename) { |
|
|
const a = document.createElement('a'); |
|
|
a.href = url; |
|
|
a.download = filename; |
|
|
a.click(); |
|
|
}; |
|
|
|
|
|
|
|
|
function speedToDurationFactor(speed, offset=0.05) { |
|
|
return 1 / (speed + offset); |
|
|
} |
|
|
|
|
|
|
|
|
function updateSliderValues() { |
|
|
demoTotalStepsValue.textContent = demoTotalSteps.value + ' Steps'; |
|
|
|
|
|
const speed = parseFloat(demoSpeed.value); |
|
|
demoSpeedValue.textContent = speed.toFixed(2) + 'x'; |
|
|
} |
|
|
|
|
|
|
|
|
demoTotalSteps.addEventListener('input', updateSliderValues); |
|
|
demoSpeed.addEventListener('input', updateSliderValues); |
|
|
|
|
|
|
|
|
updateSliderValues(); |
|
|
|
|
|
|
|
|
demoGenerateBtn.addEventListener('click', generateSpeech); |
|
|
|
|
|
|
|
|
const presetItems = document.querySelectorAll('.preset-item[data-preset]'); |
|
|
const freeformBtn = document.getElementById('freeformBtn'); |
|
|
let currentPreset = 'quote'; |
|
|
|
|
|
let isPresetChanging = false; |
|
|
|
|
|
|
|
|
function updateActiveButton(presetType) { |
|
|
|
|
|
presetItems.forEach(item => item.classList.remove('active')); |
|
|
|
|
|
|
|
|
if (presetType) { |
|
|
const targetItem = document.querySelector(`.preset-item[data-preset="${presetType}"]`); |
|
|
if (targetItem) { |
|
|
targetItem.classList.add('active'); |
|
|
} |
|
|
} |
|
|
currentPreset = presetType; |
|
|
updateQuoteModeState(presetType === 'quote'); |
|
|
} |
|
|
|
|
|
function updateQuoteModeState(isQuote) { |
|
|
if (!demoResults) return; |
|
|
demoResults.classList.toggle('quote-mode', Boolean(isQuote)); |
|
|
} |
|
|
|
|
|
|
|
|
updateActiveButton('quote'); |
|
|
if (presetTexts.quote && typeof presetTexts.quote === 'object' && presetTexts.quote[currentLanguage]) { |
|
|
demoTextInput.textContent = presetTexts.quote[currentLanguage]; |
|
|
updateCharCounter(); |
|
|
} |
|
|
|
|
|
presetItems.forEach(item => { |
|
|
item.addEventListener('click', () => { |
|
|
const presetType = item.getAttribute('data-preset'); |
|
|
|
|
|
if (presetType === 'freeform') { |
|
|
|
|
|
isPresetChanging = true; |
|
|
demoTextInput.textContent = ''; |
|
|
updateCharCounter(); |
|
|
updateActiveButton('freeform'); |
|
|
isPresetChanging = false; |
|
|
} else { |
|
|
|
|
|
const preset = presetTexts[presetType]; |
|
|
if (preset && typeof preset === 'object' && preset[currentLanguage]) { |
|
|
const text = preset[currentLanguage]; |
|
|
isPresetChanging = true; |
|
|
demoTextInput.textContent = text; |
|
|
updateCharCounter(); |
|
|
updateActiveButton(presetType); |
|
|
isPresetChanging = false; |
|
|
} else if (preset && typeof preset === 'string') { |
|
|
|
|
|
isPresetChanging = true; |
|
|
demoTextInput.textContent = preset; |
|
|
updateCharCounter(); |
|
|
updateActiveButton(presetType); |
|
|
isPresetChanging = false; |
|
|
} |
|
|
} |
|
|
}); |
|
|
}); |
|
|
|
|
|
|
|
|
demoTextInput.addEventListener('paste', (e) => { |
|
|
e.preventDefault(); |
|
|
const text = (e.clipboardData || window.clipboardData).getData('text/plain'); |
|
|
const selection = window.getSelection(); |
|
|
if (!selection.rangeCount) return; |
|
|
|
|
|
const range = selection.getRangeAt(0); |
|
|
range.deleteContents(); |
|
|
const textNode = document.createTextNode(text); |
|
|
range.insertNode(textNode); |
|
|
range.setStartAfter(textNode); |
|
|
range.collapse(true); |
|
|
selection.removeAllRanges(); |
|
|
selection.addRange(range); |
|
|
|
|
|
|
|
|
demoTextInput.dispatchEvent(new Event('input', { bubbles: true })); |
|
|
}); |
|
|
|
|
|
|
|
|
let previousTextValue = demoTextInput.textContent || demoTextInput.innerText || ''; |
|
|
|
|
|
const demoInputSection = document.querySelector('.demo-input-section'); |
|
|
function updateLeftBorderHeight() { |
|
|
if (demoInputSection) { |
|
|
const height = demoInputSection.offsetHeight; |
|
|
demoInputSection.style.setProperty('--demo-text-input-height', `${height}px`); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
updateLeftBorderHeight(); |
|
|
const resizeObserver = new ResizeObserver(() => { |
|
|
updateLeftBorderHeight(); |
|
|
}); |
|
|
if (demoInputSection) { |
|
|
resizeObserver.observe(demoInputSection); |
|
|
} |
|
|
|
|
|
|
|
|
function calculateTextInputHeight() { |
|
|
if (window.innerWidth <= 768) { |
|
|
|
|
|
demoTextInput.style.height = ''; |
|
|
return; |
|
|
} |
|
|
|
|
|
const viewportHeight = window.innerHeight; |
|
|
const interactiveDemoEl = document.querySelector('.interactive-demo'); |
|
|
const containerEl = document.querySelector('.container'); |
|
|
const headerWrapperEl = document.querySelector('.demo-header-wrapper'); |
|
|
const controlsEl = document.querySelector('.demo-controls'); |
|
|
const inputLabelEl = document.querySelector('.demo-input-label'); |
|
|
const presetRowEl = document.querySelector('#presetControlsRow'); |
|
|
const outputSectionEl = document.querySelector('.demo-output-section'); |
|
|
const contentEl = document.querySelector('.demo-content'); |
|
|
|
|
|
|
|
|
const interactiveDemoStyle = window.getComputedStyle(interactiveDemoEl || document.body); |
|
|
const containerStyle = window.getComputedStyle(containerEl || document.body); |
|
|
const contentStyle = window.getComputedStyle(contentEl || document.body); |
|
|
|
|
|
|
|
|
let totalHeight = 0; |
|
|
|
|
|
|
|
|
const interactiveDemoPaddingTop = parseFloat(interactiveDemoStyle.paddingTop) || 0; |
|
|
const interactiveDemoPaddingBottom = parseFloat(interactiveDemoStyle.paddingBottom) || 0; |
|
|
totalHeight += interactiveDemoPaddingTop + interactiveDemoPaddingBottom; |
|
|
|
|
|
|
|
|
const containerPaddingTop = parseFloat(containerStyle.paddingTop) || 0; |
|
|
const containerPaddingBottom = parseFloat(containerStyle.paddingBottom) || 0; |
|
|
totalHeight += containerPaddingTop + containerPaddingBottom; |
|
|
|
|
|
|
|
|
if (headerWrapperEl) { |
|
|
totalHeight += headerWrapperEl.offsetHeight; |
|
|
} |
|
|
|
|
|
|
|
|
if (controlsEl) { |
|
|
totalHeight += controlsEl.offsetHeight; |
|
|
} |
|
|
|
|
|
|
|
|
const contentGap = parseFloat(contentStyle.gap) || 0; |
|
|
totalHeight += contentGap; |
|
|
|
|
|
|
|
|
if (inputLabelEl) { |
|
|
totalHeight += inputLabelEl.offsetHeight; |
|
|
} |
|
|
|
|
|
|
|
|
if (presetRowEl) { |
|
|
totalHeight += presetRowEl.offsetHeight; |
|
|
} |
|
|
|
|
|
|
|
|
totalHeight += contentGap; |
|
|
|
|
|
|
|
|
if (outputSectionEl) { |
|
|
totalHeight += outputSectionEl.offsetHeight; |
|
|
} |
|
|
|
|
|
|
|
|
const availableHeight = viewportHeight - totalHeight - 275; |
|
|
|
|
|
|
|
|
const minHeight = 200; |
|
|
const maxHeight = availableHeight - 20; |
|
|
|
|
|
if (availableHeight > minHeight) { |
|
|
demoTextInput.style.height = `${Math.max(minHeight, maxHeight)}px`; |
|
|
} else { |
|
|
demoTextInput.style.height = `${minHeight}px`; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
calculateTextInputHeight(); |
|
|
window.addEventListener('resize', calculateTextInputHeight); |
|
|
|
|
|
|
|
|
const heightObserver = new ResizeObserver(() => { |
|
|
calculateTextInputHeight(); |
|
|
}); |
|
|
|
|
|
const headerWrapperEl = document.querySelector('.demo-header-wrapper'); |
|
|
const controlsEl = document.querySelector('.demo-controls'); |
|
|
const presetRowEl = document.querySelector('#presetControlsRow'); |
|
|
const outputSectionEl = document.querySelector('.demo-output-section'); |
|
|
|
|
|
if (headerWrapperEl) heightObserver.observe(headerWrapperEl); |
|
|
if (controlsEl) heightObserver.observe(controlsEl); |
|
|
if (presetRowEl) heightObserver.observe(presetRowEl); |
|
|
if (outputSectionEl) heightObserver.observe(outputSectionEl); |
|
|
|
|
|
|
|
|
let scrollbarTimeout; |
|
|
demoTextInput.addEventListener('scroll', () => { |
|
|
|
|
|
demoTextInput.classList.add('scrolling'); |
|
|
|
|
|
|
|
|
if (scrollbarTimeout) { |
|
|
clearTimeout(scrollbarTimeout); |
|
|
} |
|
|
|
|
|
|
|
|
scrollbarTimeout = setTimeout(() => { |
|
|
demoTextInput.classList.remove('scrolling'); |
|
|
}, 1500); |
|
|
}); |
|
|
|
|
|
demoTextInput.addEventListener('input', () => { |
|
|
updateCharCounter(); |
|
|
|
|
|
|
|
|
const currentText = demoTextInput.textContent || demoTextInput.innerText || ''; |
|
|
if (!isPresetChanging && currentText !== previousTextValue) { |
|
|
updateActiveButton('freeform'); |
|
|
} |
|
|
|
|
|
if (currentPreset === 'freeform') { |
|
|
|
|
|
const detectedLang = detectLanguage(currentText); |
|
|
if (detectedLang && detectedLang !== currentLanguage) { |
|
|
const previousLang = currentLanguage; |
|
|
currentLanguage = detectedLang; |
|
|
window.updateActiveLanguage(currentLanguage); |
|
|
showLanguageToast(previousLang, detectedLang); |
|
|
} |
|
|
} |
|
|
|
|
|
previousTextValue = currentText; |
|
|
}); |
|
|
|
|
|
|
|
|
let resizeTimeout; |
|
|
window.addEventListener('resize', () => { |
|
|
clearTimeout(resizeTimeout); |
|
|
resizeTimeout = setTimeout(() => { |
|
|
updateCharCounter(); |
|
|
}, 100); |
|
|
}); |
|
|
|
|
|
|
|
|
updateCharCounter(); |
|
|
|
|
|
|
|
|
const speakerList = document.getElementById('speakerList'); |
|
|
const speakerItems = speakerList ? speakerList.querySelectorAll('.speaker-item[data-voice]') : []; |
|
|
const createVoiceBtn = document.getElementById('createVoiceBtn'); |
|
|
const comingSoonModal = document.getElementById('comingSoonModal'); |
|
|
const comingSoonCloseBtn = document.getElementById('comingSoonCloseBtn'); |
|
|
let voiceSelectDisabled = false; |
|
|
|
|
|
|
|
|
window.updateActiveSpeaker = function(voice) { |
|
|
if (!speakerList || !speakerItems) return; |
|
|
speakerItems.forEach(item => { |
|
|
if (item.dataset.voice === voice) { |
|
|
item.classList.add('active'); |
|
|
} else { |
|
|
item.classList.remove('active'); |
|
|
} |
|
|
}); |
|
|
}; |
|
|
|
|
|
|
|
|
if (speakerList && speakerItems.length > 0) { |
|
|
window.updateActiveSpeaker(currentVoice); |
|
|
} |
|
|
|
|
|
|
|
|
const speakerTooltip = document.getElementById('speakerTooltip'); |
|
|
|
|
|
if (speakerList) { |
|
|
speakerItems.forEach(item => { |
|
|
|
|
|
let clickFromTouch = false; |
|
|
|
|
|
|
|
|
item.addEventListener('click', async (e) => { |
|
|
|
|
|
|
|
|
if (isTouchDevice() && isMobileViewport() && !clickFromTouch) { |
|
|
return; |
|
|
} |
|
|
|
|
|
|
|
|
clickFromTouch = false; |
|
|
|
|
|
if (voiceSelectDisabled || modelsLoading || isGenerating) return; |
|
|
|
|
|
const selectedVoice = item.dataset.voice; |
|
|
|
|
|
|
|
|
if (selectedVoice === currentVoice) { |
|
|
const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); |
|
|
if (text.length >= 10 && !isGenerating && models && cfgs && processors) { |
|
|
generateSpeech(); |
|
|
} |
|
|
return; |
|
|
} |
|
|
|
|
|
|
|
|
const wasDisabled = demoGenerateBtn.disabled; |
|
|
demoGenerateBtn.disabled = true; |
|
|
voiceSelectDisabled = true; |
|
|
|
|
|
|
|
|
window.updateActiveSpeaker(selectedVoice); |
|
|
|
|
|
try { |
|
|
await switchVoice(selectedVoice); |
|
|
|
|
|
if (models && cfgs && processors) { |
|
|
demoGenerateBtn.disabled = false; |
|
|
voiceSelectDisabled = false; |
|
|
|
|
|
|
|
|
const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); |
|
|
if (text.length >= 10 && !isGenerating) { |
|
|
generateSpeech(); |
|
|
} |
|
|
} |
|
|
} catch (error) { |
|
|
console.error('Failed to switch voice:', error); |
|
|
|
|
|
window.updateActiveSpeaker(currentVoice); |
|
|
voiceSelectDisabled = false; |
|
|
if (!wasDisabled) demoGenerateBtn.disabled = false; |
|
|
} |
|
|
}); |
|
|
|
|
|
|
|
|
if (speakerTooltip) { |
|
|
|
|
|
item.addEventListener('mouseenter', (e) => { |
|
|
if (isTouchDevice() && isMobileViewport()) return; |
|
|
const voice = item.dataset.voice; |
|
|
if (voice && VOICE_DESCRIPTIONS[voice]) { |
|
|
speakerTooltip.textContent = VOICE_DESCRIPTIONS[voice]; |
|
|
speakerTooltip.style.display = 'block'; |
|
|
updateTooltipPosition(e, speakerTooltip); |
|
|
} |
|
|
}); |
|
|
|
|
|
item.addEventListener('mousemove', (e) => { |
|
|
if (isTouchDevice() && isMobileViewport()) return; |
|
|
if (speakerTooltip.style.display === 'block') { |
|
|
updateTooltipPosition(e, speakerTooltip); |
|
|
} |
|
|
}); |
|
|
|
|
|
item.addEventListener('mouseleave', () => { |
|
|
if (isTouchDevice() && isMobileViewport()) return; |
|
|
speakerTooltip.style.display = 'none'; |
|
|
}); |
|
|
|
|
|
|
|
|
let touchStartTime = 0; |
|
|
let touchHandled = false; |
|
|
let touchStartY = 0; |
|
|
const TOUCH_MOVE_THRESHOLD = 10; |
|
|
|
|
|
item.addEventListener('touchstart', (e) => { |
|
|
if (!isTouchDevice() || !isMobileViewport()) return; |
|
|
|
|
|
touchHandled = false; |
|
|
const touch = e.touches[0]; |
|
|
touchStartTime = Date.now(); |
|
|
touchStartY = touch.clientY; |
|
|
|
|
|
const voice = item.dataset.voice; |
|
|
if (voice && VOICE_DESCRIPTIONS[voice]) { |
|
|
|
|
|
e.preventDefault(); |
|
|
|
|
|
|
|
|
speakerTooltip.textContent = VOICE_DESCRIPTIONS[voice]; |
|
|
speakerTooltip.style.display = 'block'; |
|
|
updateTooltipPositionMobile(speakerTooltip, touch.clientY); |
|
|
} |
|
|
}, { passive: false }); |
|
|
|
|
|
item.addEventListener('touchmove', (e) => { |
|
|
if (!isTouchDevice() || !isMobileViewport()) return; |
|
|
|
|
|
const touch = e.touches[0]; |
|
|
const deltaY = Math.abs(touch.clientY - touchStartY); |
|
|
|
|
|
|
|
|
if (deltaY > TOUCH_MOVE_THRESHOLD) { |
|
|
touchHandled = true; |
|
|
|
|
|
speakerTooltip.style.display = 'none'; |
|
|
} |
|
|
|
|
|
|
|
|
e.preventDefault(); |
|
|
}, { passive: false }); |
|
|
|
|
|
item.addEventListener('touchend', (e) => { |
|
|
if (!isTouchDevice() || !isMobileViewport()) return; |
|
|
|
|
|
const touchEndTime = Date.now(); |
|
|
const touchDuration = touchEndTime - touchStartTime; |
|
|
|
|
|
|
|
|
speakerTooltip.style.display = 'none'; |
|
|
|
|
|
|
|
|
e.preventDefault(); |
|
|
|
|
|
|
|
|
if (!touchHandled && touchDuration < 500) { |
|
|
|
|
|
clickFromTouch = true; |
|
|
setTimeout(() => { |
|
|
const clickEvent = new MouseEvent('click', { |
|
|
bubbles: true, |
|
|
cancelable: true, |
|
|
view: window |
|
|
}); |
|
|
item.dispatchEvent(clickEvent); |
|
|
}, 50); |
|
|
} else { |
|
|
|
|
|
touchHandled = true; |
|
|
e.stopPropagation(); |
|
|
} |
|
|
}, { passive: false }); |
|
|
|
|
|
item.addEventListener('touchcancel', (e) => { |
|
|
if (!isTouchDevice() || !isMobileViewport()) return; |
|
|
|
|
|
|
|
|
speakerTooltip.style.display = 'none'; |
|
|
touchHandled = true; |
|
|
|
|
|
|
|
|
e.preventDefault(); |
|
|
}, { passive: false }); |
|
|
|
|
|
|
|
|
item.addEventListener('contextmenu', (e) => { |
|
|
if (isTouchDevice() && isMobileViewport()) { |
|
|
e.preventDefault(); |
|
|
return false; |
|
|
} |
|
|
}); |
|
|
} |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
function updateTooltipPosition(event, tooltip) { |
|
|
const x = event.clientX; |
|
|
const y = event.clientY - 40; |
|
|
|
|
|
tooltip.style.left = x + 'px'; |
|
|
tooltip.style.top = y + 'px'; |
|
|
|
|
|
|
|
|
const tooltipRect = tooltip.getBoundingClientRect(); |
|
|
const windowWidth = window.innerWidth; |
|
|
const windowHeight = window.innerHeight; |
|
|
|
|
|
if (tooltipRect.right > windowWidth) { |
|
|
tooltip.style.left = (windowWidth - tooltipRect.width - 10) + 'px'; |
|
|
} |
|
|
if (tooltipRect.left < 0) { |
|
|
tooltip.style.left = '10px'; |
|
|
} |
|
|
if (tooltipRect.top < 0) { |
|
|
tooltip.style.top = (event.clientY + 40) + 'px'; |
|
|
} |
|
|
if (tooltipRect.bottom > windowHeight) { |
|
|
tooltip.style.top = (windowHeight - tooltipRect.height - 10) + 'px'; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
function updateTooltipPositionMobile(tooltip, touchY) { |
|
|
const windowWidth = window.innerWidth; |
|
|
const windowHeight = window.innerHeight; |
|
|
|
|
|
|
|
|
tooltip.style.width = '90%'; |
|
|
tooltip.style.left = '5%'; |
|
|
tooltip.style.right = 'auto'; |
|
|
tooltip.style.marginLeft = '0'; |
|
|
tooltip.style.marginRight = '0'; |
|
|
tooltip.style.whiteSpace = 'normal'; |
|
|
tooltip.style.textAlign = 'center'; |
|
|
|
|
|
|
|
|
const y = touchY - 75; |
|
|
tooltip.style.top = y + 'px'; |
|
|
|
|
|
|
|
|
const tooltipRect = tooltip.getBoundingClientRect(); |
|
|
|
|
|
if (tooltipRect.top < 10) { |
|
|
|
|
|
tooltip.style.top = (touchY + 20) + 'px'; |
|
|
} |
|
|
if (tooltipRect.bottom > windowHeight - 10) { |
|
|
tooltip.style.top = (windowHeight - tooltipRect.height - 10) + 'px'; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (createVoiceBtn && comingSoonModal) { |
|
|
createVoiceBtn.addEventListener('click', () => { |
|
|
comingSoonModal.classList.add('show'); |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
if (comingSoonCloseBtn && comingSoonModal) { |
|
|
comingSoonCloseBtn.addEventListener('click', () => { |
|
|
comingSoonModal.classList.remove('show'); |
|
|
}); |
|
|
} |
|
|
|
|
|
if (comingSoonModal) { |
|
|
const overlay = comingSoonModal.querySelector('.coming-soon-modal-overlay'); |
|
|
if (overlay) { |
|
|
overlay.addEventListener('click', () => { |
|
|
comingSoonModal.classList.remove('show'); |
|
|
}); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const languageList = document.getElementById('languageList'); |
|
|
const languageItems = languageList ? languageList.querySelectorAll('.speaker-item[data-language]') : []; |
|
|
|
|
|
|
|
|
window.updateActiveLanguage = function(language) { |
|
|
if (!languageList || !languageItems) return; |
|
|
languageItems.forEach(item => { |
|
|
if (item.dataset.language === language) { |
|
|
item.classList.add('active'); |
|
|
} else { |
|
|
item.classList.remove('active'); |
|
|
} |
|
|
}); |
|
|
}; |
|
|
|
|
|
|
|
|
if (languageList && languageItems.length > 0) { |
|
|
window.updateActiveLanguage(currentLanguage); |
|
|
} |
|
|
|
|
|
|
|
|
if (languageList) { |
|
|
languageItems.forEach(item => { |
|
|
item.addEventListener('click', async (e) => { |
|
|
|
|
|
if (modelsLoading || isGenerating) return; |
|
|
|
|
|
const selectedLanguage = item.dataset.language; |
|
|
|
|
|
|
|
|
if (selectedLanguage === currentLanguage) { |
|
|
const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); |
|
|
if (text.length >= 10 && !isGenerating && models && cfgs && processors) { |
|
|
generateSpeech(); |
|
|
} |
|
|
return; |
|
|
} |
|
|
|
|
|
|
|
|
currentLanguage = selectedLanguage; |
|
|
window.updateActiveLanguage(currentLanguage); |
|
|
|
|
|
|
|
|
if (currentPreset && currentPreset !== 'freeform' && presetTexts[currentPreset]) { |
|
|
const preset = presetTexts[currentPreset]; |
|
|
if (preset && typeof preset === 'object' && preset[currentLanguage]) { |
|
|
isPresetChanging = true; |
|
|
demoTextInput.textContent = preset[currentLanguage]; |
|
|
updateCharCounter(); |
|
|
isPresetChanging = false; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 100)); |
|
|
const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); |
|
|
if (text.length >= 10 && !isGenerating && models && cfgs && processors) { |
|
|
generateSpeech(); |
|
|
} |
|
|
}); |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
const demoTitleLeft = document.querySelector('.demo-title-left'); |
|
|
const demoTitleRight = document.querySelector('.demo-title-right'); |
|
|
const demoOutputSection = document.querySelector('.demo-output-section'); |
|
|
|
|
|
|
|
|
if (demoTitleLeft) { |
|
|
const text = demoTitleLeft.textContent.trim(); |
|
|
demoTitleLeft.innerHTML = text.split('').map(char => |
|
|
char === ' ' ? ' ' : `<span class="letter visible">${char}</span>` |
|
|
).join(''); |
|
|
} |
|
|
|
|
|
|
|
|
if (demoInputSection && demoTitleLeft) { |
|
|
demoInputSection.addEventListener('click', () => { |
|
|
const letters = demoTitleLeft.querySelectorAll('.letter'); |
|
|
|
|
|
letters.forEach(letter => { |
|
|
letter.classList.remove('visible'); |
|
|
}); |
|
|
|
|
|
|
|
|
letters.forEach((letter, index) => { |
|
|
setTimeout(() => { |
|
|
letter.classList.add('visible'); |
|
|
}, index * 0.0625 * 1000); |
|
|
}); |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
if (demoOutputSection && demoTitleRight) { |
|
|
demoOutputSection.addEventListener('click', (event) => { |
|
|
if (event.target.closest('#demoGenerateBtn')) { |
|
|
return; |
|
|
} |
|
|
demoTitleRight.classList.remove('animate-speech'); |
|
|
|
|
|
void demoTitleRight.offsetWidth; |
|
|
demoTitleRight.classList.add('animate-speech'); |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
initializeModels(); |
|
|
})(); |
|
|
|