supertonic / script.js
dbpq's picture
Upload 3 files
691c046 verified
import * as ort from 'onnxruntime-web';
const presetTexts = window.presetTexts || {};
const PLAY_ICON_SVG = `<svg width="24" height="24" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" focusable="false"><path d="M8 5v14l11-7-11-7z"></path></svg>`;
const PAUSE_ICON_SVG = `<svg width="24" height="24" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" focusable="false"><path d="M8 6h3v12H8V6zm5 0h3v12h-3V6z"></path></svg>`;
const STOP_ICON_SVG = `<svg width="24" height="24" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" focusable="false"><path d="M7 7h10v10H7V7z"></path></svg>`;
// Lightning background parallax
(function initLightningParallax() {
if (typeof document === 'undefined') {
return;
}
// Removed scroll-based CSS variable updates for direct scroll response
// const updateLightningOffset = () => {
// document.body.style.setProperty('--lightning-scroll', `${window.scrollY}px`);
// };
// let ticking = false;
// const onScroll = () => {
// if (!ticking) {
// window.requestAnimationFrame(() => {
// updateLightningOffset();
// ticking = false;
// });
// ticking = true;
// }
// };
// updateLightningOffset();
// window.addEventListener('scroll', onScroll, { passive: true });
const runBlink = (className, onComplete) => {
let remaining = 1 + Math.round(Math.random());
const blink = () => {
if (remaining-- <= 0) {
if (typeof onComplete === 'function') {
onComplete();
}
return;
}
const wait = 20 + Math.random() * 80;
document.body.classList.add(className);
setTimeout(() => {
document.body.classList.remove(className);
setTimeout(blink, wait);
}, wait);
};
blink();
};
const schedule = () => {
setTimeout(() => runBlink('lightning-flicker', schedule), Math.random() * 10000);
};
schedule();
/*
const heroSection = document.querySelector('.hero');
if (heroSection) {
heroSection.addEventListener('click', (event) => {
runBlink('lightning-flash');
});
}
*/
})();
function escapeHtml(value) {
return value.replace(/[&<>"']/g, (match) => {
switch (match) {
case '&': return '&amp;';
case '<': return '&lt;';
case '>': return '&gt;';
case '"': return '&quot;';
case "'": return '&#39;';
default: return match;
}
});
}
function formatStatValueWithSuffix(value, suffix, options = {}) {
const { firstLabel = false } = options;
if (value === undefined || value === null) {
return '';
}
if (!suffix) {
const raw = `${value}`;
return escapeHtml(raw);
}
const raw = `${value}`.trim();
if (!raw || raw === '--' || raw === '-' || raw.toLowerCase() === 'error') {
return escapeHtml(raw);
}
const appendSuffix = (segment, includePrefix = false) => {
const trimmed = segment.trim();
if (!trimmed) {
return '';
}
const escapedValue = `<span class="stat-value-number">${escapeHtml(trimmed)}</span>`;
const suffixSpan = `<span class="stat-label stat-suffix">${escapeHtml(suffix)}</span>`;
const prefixSpan = includePrefix && firstLabel
? `<span class="stat-label stat-suffix stat-prefix">First</span>`
: '';
const segmentClass = includePrefix && firstLabel
? 'stat-value-segment has-prefix'
: 'stat-value-segment';
return `<span class="${segmentClass}">${prefixSpan}${escapedValue}${suffixSpan}</span>`;
};
if (raw.includes('/')) {
const parts = raw.split('/');
const segments = parts.map((part, index) => appendSuffix(part, index === 0));
return segments.join(' / ');
}
return appendSuffix(raw);
}
/**
* Unicode text processor
*/
export class UnicodeProcessor {
constructor(indexer) {
this.indexer = indexer;
}
call(textList) {
const processedTexts = textList.map(t => preprocessText(t));
const textIdsLengths = processedTexts.map(t => t.length);
const maxLen = Math.max(...textIdsLengths);
const textIds = [];
const unsupportedChars = new Set();
for (let i = 0; i < processedTexts.length; i++) {
const row = new Array(maxLen).fill(0);
const unicodeVals = textToUnicodeValues(processedTexts[i]);
for (let j = 0; j < unicodeVals.length; j++) {
const indexValue = this.indexer[unicodeVals[j]];
// Check if character is supported (not -1, undefined, or null)
if (indexValue === undefined || indexValue === null || indexValue === -1) {
unsupportedChars.add(processedTexts[i][j]);
row[j] = 0; // Use 0 as fallback
} else {
row[j] = indexValue;
}
}
textIds.push(row);
}
const textMask = getTextMask(textIdsLengths);
return { textIds, textMask, unsupportedChars: Array.from(unsupportedChars) };
}
}
export function preprocessText(text) {
// Normalize unicode characters
text = text.normalize('NFKD');
// Remove emojis
text = text.replace(/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F700}-\u{1F77F}\u{1F780}-\u{1F7FF}\u{1F800}-\u{1F8FF}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1F1E6}-\u{1F1FF}]/gu, '');
// Replace various dashes and symbols
text = text.replace(/–/g, "-");
text = text.replace(/‑/g, "-");
text = text.replace(/—/g, "-");
text = text.replace(/¯/g, " ");
text = text.replace(/_/g, " ");
text = text.replace(/[“”]/g, '"');
text = text.replace(/[‘’´`]/g, "'");
text = text.replace(/\[/g, " ");
text = text.replace(/\]/g, " ");
text = text.replace(/\|/g, " ");
text = text.replace(/[\u0302\u0303\u0304\u0305\u0306\u0307\u0308\u030A\u030B\u030C\u0327\u0328\u0329\u032A\u032B\u032C\u032D\u032E\u032F]/g, "");
text = text.replace(/\//g, " "); // FIXME: slash should be kept (e.g., fraction)
text = text.replace(/#/g, " "); // FIXME: hash should be kept (e.g., hashtag)
text = text.replace(/→/g, " ");
text = text.replace(/←/g, " ");
// Remove special symbols
text = text.replace(/[♥☆♡©\\]/g, "");
// Replace known expressions
text = text.replace(/@/g, " at ");
text = text.replace(/&/g, " and ");
text = text.replace(/e\.g\.,/g, "for example, ");
text = text.replace(/i\.e\.,/g, "that is, ");
// Fix spacing around punctuation
text = text.replace(/ ,/g, ",");
text = text.replace(/ \./g, ".");
text = text.replace(/ !/g, "!");
text = text.replace(/ \?/g, "?");
text = text.replace(/ ;/g, ";");
text = text.replace(/ :/g, ":");
text = text.replace(/ '/g, "'");
// Remove duplicate quotes
while (text.includes('""')) {
text = text.replace(/""/g, '"');
}
while (text.includes("''")) {
text = text.replace(/''/g, "'");
}
while (text.includes("``")) {
text = text.replace(/``/g, "`");
}
// Remove extra spaces
while (text.includes(" ")) {
text = text.replace(/ /g, " ");
}
// Remove first and last spaces
text = text.trim();
text = text.replace(/\s+/g, " "); // Remove extra spaces
// if text doesn't end with punctuation, quotes, or closing brackets, add a period
const lastChar = text[text.length - 1];
if (!/[.!?;:,'"'"')\]}…。」』】〉》›»]/.test(lastChar)) {
text = text + '.';
}
return text;
}
export function textToUnicodeValues(text) {
return Array.from(text).map(char => char.charCodeAt(0));
}
export function lengthToMask(lengths, maxLen = null) {
maxLen = maxLen || Math.max(...lengths);
const mask = [];
for (let i = 0; i < lengths.length; i++) {
const row = [];
for (let j = 0; j < maxLen; j++) {
row.push(j < lengths[i] ? 1.0 : 0.0);
}
mask.push([row]);
}
return mask;
}
export function getTextMask(textIdsLengths) {
return lengthToMask(textIdsLengths);
}
export function getLatentMask(wavLengths, cfgs) {
const baseChunkSize = cfgs.ae.base_chunk_size;
const chunkCompressFactor = cfgs.ttl.chunk_compress_factor;
const latentSize = baseChunkSize * chunkCompressFactor;
const latentLengths = wavLengths.map(len =>
Math.floor((len + latentSize - 1) / latentSize)
);
return lengthToMask(latentLengths);
}
export function sampleNoisyLatent(duration, cfgs) {
const sampleRate = cfgs.ae.sample_rate;
const baseChunkSize = cfgs.ae.base_chunk_size;
const chunkCompressFactor = cfgs.ttl.chunk_compress_factor;
const ldim = cfgs.ttl.latent_dim;
const wavLenMax = Math.max(...duration.map(d => d[0][0])) * sampleRate;
const wavLengths = duration.map(d => Math.floor(d[0][0] * sampleRate));
const chunkSize = baseChunkSize * chunkCompressFactor;
const latentLen = Math.floor((wavLenMax + chunkSize - 1) / chunkSize);
const latentDim = ldim * chunkCompressFactor;
const noisyLatent = [];
for (let b = 0; b < duration.length; b++) {
const batch = [];
for (let d = 0; d < latentDim; d++) {
const row = [];
for (let t = 0; t < latentLen; t++) {
const u1 = Math.random();
const u2 = Math.random();
const randNormal = Math.sqrt(-2.0 * Math.log(u1)) * Math.cos(2.0 * Math.PI * u2);
row.push(randNormal);
}
batch.push(row);
}
noisyLatent.push(batch);
}
const latentMask = getLatentMask(wavLengths, cfgs);
for (let b = 0; b < noisyLatent.length; b++) {
for (let d = 0; d < noisyLatent[b].length; d++) {
for (let t = 0; t < noisyLatent[b][d].length; t++) {
noisyLatent[b][d][t] *= latentMask[b][0][t];
}
}
}
return { noisyLatent, latentMask };
}
export async function loadOnnx(onnxPath, opts) {
return await ort.InferenceSession.create(onnxPath, opts);
}
export async function loadOnnxAll(basePath, opts, onProgress) {
const models = [
{ name: 'Duration Predictor', path: `${basePath}/duration_predictor.onnx`, key: 'dpOrt' },
{ name: 'Text Encoder', path: `${basePath}/text_encoder.onnx`, key: 'textEncOrt' },
{ name: 'Vector Estimator', path: `${basePath}/vector_estimator.onnx`, key: 'vectorEstOrt' },
{ name: 'Vocoder', path: `${basePath}/vocoder.onnx`, key: 'vocoderOrt' }
];
const result = {};
let loadedCount = 0;
// Load all models in parallel
const loadPromises = models.map(async (model) => {
const session = await loadOnnx(model.path, opts);
loadedCount++;
if (onProgress) {
onProgress(model.name, loadedCount, models.length);
}
return { key: model.key, session };
});
// Wait for all models to load
const loadedModels = await Promise.all(loadPromises);
// Organize results
loadedModels.forEach(({ key, session }) => {
result[key] = session;
});
try {
// Download counting
await fetch('https://huggingface.co/Supertone/supertonic/resolve/main/config.json');
} catch (error) {
console.warn('Failed to update download count:', error);
}
return result;
}
export async function loadCfgs(basePath) {
const response = await fetch(`${basePath}/tts.json`);
return await response.json();
}
export async function loadProcessors(basePath) {
const response = await fetch(`${basePath}/unicode_indexer.json`);
const unicodeIndexerData = await response.json();
const textProcessor = new UnicodeProcessor(unicodeIndexerData);
return { textProcessor };
}
function parseWavFile(buffer) {
const view = new DataView(buffer);
// Check RIFF header
const riff = String.fromCharCode(view.getUint8(0), view.getUint8(1), view.getUint8(2), view.getUint8(3));
if (riff !== 'RIFF') {
throw new Error('Not a valid WAV file');
}
const wave = String.fromCharCode(view.getUint8(8), view.getUint8(9), view.getUint8(10), view.getUint8(11));
if (wave !== 'WAVE') {
throw new Error('Not a valid WAV file');
}
let offset = 12;
let fmtChunk = null;
let dataChunk = null;
while (offset < buffer.byteLength) {
const chunkId = String.fromCharCode(
view.getUint8(offset),
view.getUint8(offset + 1),
view.getUint8(offset + 2),
view.getUint8(offset + 3)
);
const chunkSize = view.getUint32(offset + 4, true);
if (chunkId === 'fmt ') {
fmtChunk = {
audioFormat: view.getUint16(offset + 8, true),
numChannels: view.getUint16(offset + 10, true),
sampleRate: view.getUint32(offset + 12, true),
bitsPerSample: view.getUint16(offset + 22, true)
};
} else if (chunkId === 'data') {
dataChunk = {
offset: offset + 8,
size: chunkSize
};
break;
}
offset += 8 + chunkSize;
}
if (!fmtChunk || !dataChunk) {
throw new Error('Invalid WAV file format');
}
const bytesPerSample = fmtChunk.bitsPerSample / 8;
const numSamples = Math.floor(dataChunk.size / (bytesPerSample * fmtChunk.numChannels));
const audioData = new Float32Array(numSamples);
if (fmtChunk.bitsPerSample === 16) {
for (let i = 0; i < numSamples; i++) {
let sample = 0;
for (let ch = 0; ch < fmtChunk.numChannels; ch++) {
const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 2;
sample += view.getInt16(sampleOffset, true);
}
audioData[i] = (sample / fmtChunk.numChannels) / 32768.0;
}
} else if (fmtChunk.bitsPerSample === 24) {
// Support 24-bit PCM
for (let i = 0; i < numSamples; i++) {
let sample = 0;
for (let ch = 0; ch < fmtChunk.numChannels; ch++) {
const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 3;
// Read 3 bytes and convert to signed 24-bit integer
const byte1 = view.getUint8(sampleOffset);
const byte2 = view.getUint8(sampleOffset + 1);
const byte3 = view.getUint8(sampleOffset + 2);
let value = (byte3 << 16) | (byte2 << 8) | byte1;
// Convert to signed (two's complement)
if (value & 0x800000) {
value = value - 0x1000000;
}
sample += value;
}
audioData[i] = (sample / fmtChunk.numChannels) / 8388608.0; // 2^23
}
} else if (fmtChunk.bitsPerSample === 32) {
for (let i = 0; i < numSamples; i++) {
let sample = 0;
for (let ch = 0; ch < fmtChunk.numChannels; ch++) {
const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 4;
sample += view.getFloat32(sampleOffset, true);
}
audioData[i] = sample / fmtChunk.numChannels;
}
} else {
throw new Error(`Unsupported bit depth: ${fmtChunk.bitsPerSample}. Supported formats: 16-bit, 24-bit, 32-bit`);
}
return {
sampleRate: fmtChunk.sampleRate,
audioData: audioData
};
}
export function arrayToTensor(array, dims) {
const flat = array.flat(Infinity);
return new ort.Tensor('float32', Float32Array.from(flat), dims);
}
export function intArrayToTensor(array, dims) {
const flat = array.flat(Infinity);
return new ort.Tensor('int64', BigInt64Array.from(flat.map(x => BigInt(x))), dims);
}
export function writeWavFile(audioData, sampleRate) {
const numChannels = 1;
const bitsPerSample = 16;
const byteRate = sampleRate * numChannels * bitsPerSample / 8;
const blockAlign = numChannels * bitsPerSample / 8;
const dataSize = audioData.length * bitsPerSample / 8;
const buffer = new ArrayBuffer(44 + dataSize);
const view = new DataView(buffer);
// RIFF header
view.setUint8(0, 'R'.charCodeAt(0));
view.setUint8(1, 'I'.charCodeAt(0));
view.setUint8(2, 'F'.charCodeAt(0));
view.setUint8(3, 'F'.charCodeAt(0));
view.setUint32(4, 36 + dataSize, true);
view.setUint8(8, 'W'.charCodeAt(0));
view.setUint8(9, 'A'.charCodeAt(0));
view.setUint8(10, 'V'.charCodeAt(0));
view.setUint8(11, 'E'.charCodeAt(0));
// fmt chunk
view.setUint8(12, 'f'.charCodeAt(0));
view.setUint8(13, 'm'.charCodeAt(0));
view.setUint8(14, 't'.charCodeAt(0));
view.setUint8(15, ' '.charCodeAt(0));
view.setUint32(16, 16, true);
view.setUint16(20, 1, true); // PCM
view.setUint16(22, numChannels, true);
view.setUint32(24, sampleRate, true);
view.setUint32(28, byteRate, true);
view.setUint16(32, blockAlign, true);
view.setUint16(34, bitsPerSample, true);
// data chunk
view.setUint8(36, 'd'.charCodeAt(0));
view.setUint8(37, 'a'.charCodeAt(0));
view.setUint8(38, 't'.charCodeAt(0));
view.setUint8(39, 'a'.charCodeAt(0));
view.setUint32(40, dataSize, true);
// Write audio data
for (let i = 0; i < audioData.length; i++) {
const sample = Math.max(-1, Math.min(1, audioData[i]));
const intSample = Math.floor(sample * 32767);
view.setInt16(44 + i * 2, intSample, true);
}
return buffer;
}
// Smooth scroll functionality
document.addEventListener('DOMContentLoaded', () => {
// Smooth scroll for anchor links
document.querySelectorAll('a[href^="#"]').forEach(anchor => {
anchor.addEventListener('click', function (e) {
e.preventDefault();
const href = this.getAttribute('href');
const target = document.querySelector(href);
if (target) {
// Update URL with anchor
if (history.pushState) {
history.pushState(null, null, href);
}
target.scrollIntoView({
behavior: 'smooth',
block: 'start'
});
}
});
});
// Add scroll animation for sections
const observerOptions = {
threshold: 0.1,
rootMargin: '0px 0px -100px 0px'
};
const observer = new IntersectionObserver((entries) => {
entries.forEach(entry => {
if (entry.isIntersecting) {
entry.target.style.opacity = '1';
entry.target.style.transform = 'translateY(0)';
}
});
}, observerOptions);
// Observe language icons and paper cards
document.querySelectorAll('.language-icon, .paper-card').forEach(card => {
card.style.opacity = '0';
card.style.transform = 'translateY(20px)';
card.style.transition = 'opacity 0.6s ease-out, transform 0.6s ease-out';
observer.observe(card);
});
// Add parallax effect to hero background
window.addEventListener('scroll', () => {
const scrolled = window.pageYOffset;
const heroBg = document.querySelector('.hero-bg');
if (heroBg) {
heroBg.style.transform = `translateY(${scrolled * 0.5}px)`;
}
});
const paperCards = document.querySelectorAll('.paper-card[data-link]');
paperCards.forEach((card) => {
const href = card.dataset.link ? card.dataset.link.trim() : '';
if (!href) {
return;
}
const openLink = () => {
window.open(href, '_blank', 'noopener,noreferrer');
};
card.addEventListener('click', (event) => {
if (event.defaultPrevented) {
return;
}
openLink();
});
card.addEventListener('keydown', (event) => {
if (event.key === 'Enter' || event.key === ' ') {
event.preventDefault();
openLink();
}
});
});
// Active side navigation dot on scroll
const sections = document.querySelectorAll('section[id]');
const navDots = document.querySelectorAll('.nav-dot');
const languageSnippetElement = document.querySelector('.languages-placeholder [data-language-snippet]');
const languageTabButtons = Array.from(document.querySelectorAll('.languages-placeholder .language-option'));
const languageIconButtons = Array.from(document.querySelectorAll('.languages-icons-container .language-icon'));
const languageCopyBtn = document.querySelector('.languages-placeholder .code-copy-btn');
const languageCopyToast = document.querySelector('.languages-placeholder .code-copy-toast');
let copyToastTimeout = null;
if (languageSnippetElement && (languageTabButtons.length || languageIconButtons.length)) {
const sharedSetupSteps = [
{ text: '# Clone the Supertonic repository', type: 'heading' },
{ text: 'git clone https://github.com/supertone-inc/supertonic.git', type: 'command' },
{ text: 'cd supertonic', type: 'command' },
{ text: ' ', type: 'plain' },
{ text: '# Download ONNX models (NOTE: Make sure git-lfs is installed)', type: 'heading' },
{ text: 'git clone https://huggingface.co/Supertone/supertonic assets', type: 'command' },
{ text: ' ', type: 'plain' },
];
const perLanguageCommands = {
python: [
'cd py',
'uv sync',
'uv run example_onnx.py',
],
javascript: [
'cd nodejs',
'npm install',
'npm start',
],
java: [
'cd java',
'mvn clean install',
'mvn exec:java',
],
cpp: [
'cd cpp',
'mkdir build && cd build',
'cmake .. && cmake --build . --config Release',
'./example_onnx',
],
csharp: [
'cd csharp',
'dotnet restore',
'dotnet run',
],
go: [
'cd go',
'go mod download',
'go run example_onnx.go helper.go',
],
swift: [
'cd swift',
'swift build -c release',
'.build/release/example_onnx',
],
rust: [
'cd rust',
'cargo build --release',
'./target/release/example_onnx',
],
};
const buildCodeSample = (commands = []) => [
...sharedSetupSteps,
{ text: '# Run example', type: 'heading' },
...commands.map(text => ({ text, type: 'command' })),
];
const codeSamples = Object.fromEntries(
Object.entries(perLanguageCommands).map(([language, commands]) => [
language,
buildCodeSample(commands),
]),
);
const escapeHtml = (value) => value
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
const renderSnippet = (lines) => lines.map((line) => {
if (!line || typeof line.text !== 'string') {
return '';
}
const kind = line.type || (line.highlight ? 'command' : 'plain');
if (kind === 'heading') {
return `<span class="token-heading">${escapeHtml(line.text)}</span>`;
}
if (kind === 'command') {
const [command, ...rest] = line.text.trim().split(/\s+/);
if (!command) {
return '';
}
const commandHtml = `<span class="token-command">${escapeHtml(command)}</span>`;
const restHtml = rest.length
? ` <span class="token-argument">${escapeHtml(rest.join(' '))}</span>`
: '';
return `${commandHtml}${restHtml}`;
}
return `<span class="token-argument">${escapeHtml(line.text)}</span>`;
}).join('\n');
const setLanguage = (language) => {
const snippet = codeSamples[language];
if (!snippet) {
console.warn(`No code sample registered for language "${language}".`);
return;
}
languageTabButtons.forEach((button) => {
const isActive = button.dataset.language === language;
button.classList.toggle('active', isActive);
button.setAttribute('aria-selected', String(isActive));
button.setAttribute('tabindex', isActive ? '0' : '-1');
});
languageIconButtons.forEach((button) => {
const isActive = button.dataset.language === language;
button.classList.toggle('active', isActive);
button.setAttribute('aria-pressed', String(isActive));
});
languageSnippetElement.innerHTML = renderSnippet(snippet);
};
const interactiveButtons = [...new Set([...languageTabButtons, ...languageIconButtons])];
interactiveButtons.forEach((button) => {
const { language } = button.dataset;
if (!language || !codeSamples[language]) {
return;
}
button.addEventListener('click', () => setLanguage(language));
button.addEventListener('keydown', (event) => {
if (event.key === 'Enter' || event.key === ' ') {
event.preventDefault();
setLanguage(language);
}
});
});
const defaultLanguage =
(languageTabButtons[0] && languageTabButtons[0].dataset.language) ||
(languageIconButtons[0] && languageIconButtons[0].dataset.language) ||
'python';
setLanguage(defaultLanguage);
if (languageCopyBtn) {
languageCopyBtn.addEventListener('click', async () => {
const codeText = languageSnippetElement ? languageSnippetElement.textContent.trim() : '';
if (!codeText) {
return;
}
const showToast = () => {
if (!languageCopyToast) return;
languageCopyToast.textContent = 'Code copied to clipboard';
languageCopyToast.classList.add('is-visible');
if (copyToastTimeout) {
clearTimeout(copyToastTimeout);
}
copyToastTimeout = setTimeout(() => {
languageCopyToast.classList.remove('is-visible');
}, 2000);
};
try {
if (navigator.clipboard && navigator.clipboard.writeText) {
await navigator.clipboard.writeText(codeText);
} else {
const textArea = document.createElement('textarea');
textArea.value = codeText;
textArea.style.position = 'fixed';
textArea.style.top = '-1000px';
textArea.style.left = '-1000px';
document.body.appendChild(textArea);
textArea.focus();
textArea.select();
document.execCommand('copy');
document.body.removeChild(textArea);
}
showToast();
} catch (error) {
console.error('Failed to copy code snippet:', error);
}
});
}
}
window.addEventListener('scroll', () => {
let current = '';
const scrollPosition = window.pageYOffset || window.scrollY;
const windowHeight = window.innerHeight;
const documentHeight = document.documentElement.scrollHeight;
// Check if we're near the bottom of the page (within 100px)
const isNearBottom = scrollPosition + windowHeight >= documentHeight - 100;
if (isNearBottom && sections.length > 0) {
// If near bottom, activate the last section
const lastSection = sections[sections.length - 1];
current = lastSection.getAttribute('id');
} else {
// Otherwise, find the current section based on scroll position
sections.forEach(section => {
const sectionTop = section.offsetTop;
const sectionHeight = section.clientHeight;
if (scrollPosition >= sectionTop - 300) {
current = section.getAttribute('id');
}
});
}
navDots.forEach(dot => {
dot.classList.remove('active');
if (dot.getAttribute('href') === `#${current}`) {
dot.classList.add('active');
}
});
});
});
// Import helper functions and ONNX Runtime at the top
// import {
// sampleNoisyLatent,
// loadOnnxAll,
// loadCfgs,
// loadProcessors,
// loadWavRef,
// arrayToTensor,
// intArrayToTensor,
// writeWavFile
// } from './helper.js';
// import * as ort from 'onnxruntime-web';
// TTS Demo functionality
(async function() {
// Check if we're on a page with the TTS demo
const demoTextInput = document.getElementById('demoTextInput');
if (!demoTextInput) return;
// Configure ONNX Runtime for WebGPU support
ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/';
ort.env.wasm.numThreads = 1;
// Configuration
const REF_EMBEDDING_PATHS = {
'F': 'assets/voice_styles/F.json',
'M': 'assets/voice_styles/M.json'
};
// Global state
let models = null;
let cfgs = null;
let processors = null;
let currentVoice = 'F'; // Default to Female voice
let refEmbeddingCache = {}; // Cache for embeddings
let currentStyleTtlTensor = null;
let currentStyleDpTensor = null;
// UI Elements
const demoStatusBox = document.getElementById('demoStatusBox');
const demoStatusText = document.getElementById('demoStatusText');
const demoBackendBadge = document.getElementById('demoBackendBadge');
const demoGenerateBtn = document.getElementById('demoGenerateBtn');
const demoTotalSteps = document.getElementById('demoTotalSteps');
const demoDurationFactor = document.getElementById('demoDurationFactor');
const demoTotalStepsValue = document.getElementById('demoTotalStepsValue');
const demoDurationFactorValue = document.getElementById('demoDurationFactorValue');
const demoResults = document.getElementById('demoResults');
const demoError = document.getElementById('demoError');
const demoCharCount = document.getElementById('demoCharCount');
const demoCharCounter = document.getElementById('demoCharCounter');
const demoCharStatus = document.getElementById('demoCharStatus');
const demoElevenLabsApiKey = document.getElementById('demoElevenLabsApiKey');
const demoSecondaryApiKey = document.getElementById('demoSecondaryApiKey');
const demoTertiaryApiKey = document.getElementById('demoTertiaryApiKey');
const demoComparisonSection = document.getElementById('demoComparisonSection');
// Billing Modal Elements
const billingModal = document.getElementById('billingModal');
const billingModalMessage = document.getElementById('billingModalMessage');
const billingCharCount = document.getElementById('billingCharCount');
const billingProviders = document.getElementById('billingProviders');
const billingModalCancel = document.getElementById('billingModalCancel');
const billingModalConfirm = document.getElementById('billingModalConfirm');
// Text validation constants
const MIN_CHARS = 10;
const MAX_CHUNK_LENGTH = 300; // Maximum length for each chunk
// Custom audio player state (shared across generations)
let audioContext = null;
let scheduledSources = [];
let audioChunks = [];
let totalDuration = 0;
let startTime = 0;
let pauseTime = 0;
let isPaused = false;
let isPlaying = false;
let animationFrameId = null;
let playPauseBtn = null;
let progressBar = null;
let currentTimeDisplay = null;
let durationDisplay = null;
let progressFill = null;
let firstChunkGenerationTime = 0; // Processing time for first chunk
let totalChunks = 0;
let nextScheduledTime = 0; // Next time to schedule audio chunk
let currentGenerationTextLength = 0;
let supertonicPlayerRecord = null; // Supertonic player record for cross-player pause management
let isGenerating = false; // Track if speech generation is in progress
// Track all custom audio players (for ElevenLabs, etc.)
let customAudioPlayers = [];
const textHandlingAudioPlayers = [];
const TEXT_HANDLING_CARD_AUDIO_MAP = [1, 2, 3, 4];
let isComparisonMode = false;
const isMobileViewport = () => window.matchMedia('(max-width: 768px)').matches;
const trimDecimalsForMobile = (formatted) => {
if (!formatted) return formatted;
return isMobileViewport() ? formatted.replace(/\.\d{2}$/, '') : formatted;
};
function pauseAllPlayersExcept(currentPlayer) {
customAudioPlayers.forEach(player => {
if (player !== currentPlayer && player && typeof player.pausePlayback === 'function') {
player.pausePlayback();
}
});
}
function pauseTextHandlingPlayersExcept(currentPlayer) {
textHandlingAudioPlayers.forEach(player => {
if (player !== currentPlayer && player && typeof player.pausePlayback === 'function') {
player.pausePlayback();
}
});
}
/**
* Chunk text into smaller pieces based on sentence boundaries
* @param {string} text - The text to chunk
* @param {number} maxLen - Maximum length for each chunk
* @returns {Array<string>} - Array of text chunks
*/
function chunkText(text, maxLen = MAX_CHUNK_LENGTH) {
// Split by paragraph (two or more newlines)
const paragraphs = text.trim().split(/\n\s*\n+/).filter(p => p.trim());
const chunks = [];
for (let paragraph of paragraphs) {
paragraph = paragraph.trim();
if (!paragraph) continue;
// Split by sentence boundaries (period, question mark, exclamation mark followed by space)
// But exclude common abbreviations like Mr., Mrs., Dr., etc. and single capital letters like F.
const sentences = paragraph.split(/(?<!Mr\.|Mrs\.|Ms\.|Dr\.|Prof\.|Sr\.|Jr\.|Ph\.D\.|etc\.|e\.g\.|i\.e\.|vs\.|Inc\.|Ltd\.|Co\.|Corp\.|St\.|Ave\.|Blvd\.)(?<!\b[A-Z]\.)(?<=[.!?])\s+/);
let currentChunk = "";
for (let sentence of sentences) {
if (currentChunk.length + sentence.length + 1 <= maxLen) {
currentChunk += (currentChunk ? " " : "") + sentence;
} else {
if (currentChunk) {
chunks.push(currentChunk.trim());
}
currentChunk = sentence;
}
}
if (currentChunk) {
chunks.push(currentChunk.trim());
}
}
return chunks;
}
function showDemoStatus(message, type = 'info', progress = null) {
demoStatusText.innerHTML = message;
demoStatusBox.className = 'demo-status-box';
demoStatusBox.style.removeProperty('--status-progress');
demoStatusBox.style.display = ''; // Show the status box
if (type === 'success') {
demoStatusBox.classList.add('success');
} else if (type === 'error') {
demoStatusBox.classList.add('error');
}
// Update progress bar
if (progress !== null && progress >= 0 && progress <= 100) {
const clampedProgress = Math.max(0, Math.min(progress, 100));
demoStatusBox.style.setProperty('--status-progress', `${clampedProgress}%`);
demoStatusBox.classList.toggle('complete', clampedProgress >= 100);
} else if (type === 'success' || type === 'error') {
demoStatusBox.style.removeProperty('--status-progress');
demoStatusBox.classList.remove('complete');
} else {
demoStatusBox.style.removeProperty('--status-progress');
demoStatusBox.classList.remove('complete');
}
}
function hideDemoStatus() {
demoStatusBox.style.display = 'none';
}
function showDemoError(message) {
demoError.textContent = message;
demoError.classList.add('active');
}
function hideDemoError() {
demoError.classList.remove('active');
}
// Custom billing confirmation modal
function showBillingConfirmation(charCount, providers) {
return new Promise((resolve) => {
// Set modal content
billingCharCount.textContent = charCount;
billingProviders.textContent = providers.join(', ');
billingModalMessage.textContent = 'You are about to generate speech using API services.';
// Show modal
billingModal.classList.add('show');
// Handle confirm
const handleConfirm = () => {
cleanup();
resolve(true);
};
// Handle cancel
const handleCancel = () => {
cleanup();
resolve(false);
};
// Handle overlay click
const handleOverlayClick = (e) => {
if (e.target === billingModal || e.target.classList.contains('billing-modal-overlay')) {
cleanup();
resolve(false);
}
};
// Handle escape key
const handleEscape = (e) => {
if (e.key === 'Escape') {
cleanup();
resolve(false);
}
};
// Cleanup function
const cleanup = () => {
billingModal.classList.remove('show');
billingModalConfirm.removeEventListener('click', handleConfirm);
billingModalCancel.removeEventListener('click', handleCancel);
billingModal.removeEventListener('click', handleOverlayClick);
document.removeEventListener('keydown', handleEscape);
};
// Add event listeners
billingModalConfirm.addEventListener('click', handleConfirm);
billingModalCancel.addEventListener('click', handleCancel);
billingModal.addEventListener('click', handleOverlayClick);
document.addEventListener('keydown', handleEscape);
});
}
function showBackendBadge(backend) {
demoBackendBadge.textContent = backend;
demoBackendBadge.classList.add('visible');
if (backend === 'WebGPU') {
demoBackendBadge.classList.add('webgpu');
} else {
demoBackendBadge.classList.add('wasm');
}
}
// Validate characters in text
function validateCharacters(text) {
if (!processors || !processors.textProcessor) {
return { valid: true, unsupportedChars: [] };
}
try {
// Extract unique characters to minimize preprocessText calls
const uniqueChars = [...new Set(text)];
// Build mapping for unique chars only (much faster for long texts)
// For example, Korean '간' -> 'ㄱㅏㄴ', so we map 'ㄱ','ㅏ','ㄴ' -> '간'
const processedToOriginal = new Map();
const charToProcessed = new Map();
for (const char of uniqueChars) {
const processedChar = preprocessText(char);
charToProcessed.set(char, processedChar);
// Map each processed character back to its original
for (const pc of processedChar) {
if (!processedToOriginal.has(pc)) {
processedToOriginal.set(pc, new Set());
}
processedToOriginal.get(pc).add(char);
}
}
// Build full processed text using cached mappings
const fullProcessedText = Array.from(text).map(c => charToProcessed.get(c)).join('');
// Check the entire processed text once (efficient)
const { unsupportedChars } = processors.textProcessor.call([fullProcessedText]);
// Map unsupported processed chars back to original chars
const unsupportedOriginalChars = new Set();
if (unsupportedChars && unsupportedChars.length > 0) {
for (const unsupportedChar of unsupportedChars) {
const originalChars = processedToOriginal.get(unsupportedChar);
if (originalChars) {
originalChars.forEach(c => unsupportedOriginalChars.add(c));
}
}
}
const unsupportedCharsArray = Array.from(unsupportedOriginalChars);
return {
valid: unsupportedCharsArray.length === 0,
unsupportedChars: unsupportedCharsArray
};
} catch (error) {
return { valid: true, unsupportedChars: [] };
}
}
// Update character counter and validate text length
function updateCharCounter() {
const text = demoTextInput.value;
const length = text.length;
demoCharCount.textContent = length;
// Get the actual width of the textarea
const textareaWidth = demoTextInput.offsetWidth;
// Max width reference: 1280px (container max-width) / 2 (grid column) - padding/gap ≈ 638px
// Using 640px as reference for easier calculation
const maxWidthRef = 640;
// Calculate font size based on width ratio
// Original rem values at max-width (640px):
// 5rem = 80px @ 16px base → 80/640 = 12.5%
// 4rem = 64px → 64/640 = 10%
// 3rem = 48px → 48/640 = 7.5%
// 2.5rem = 40px → 40/640 = 6.25%
// 2rem = 32px → 32/640 = 5%
// 1.5rem = 24px → 24/640 = 3.75%
// 1rem = 16px → 16/640 = 2.5%
let fontSizeRatio;
if (length < 160) {
fontSizeRatio = 0.06375; // ~6.375% of width (scaled from 3rem)
} else if (length < 240) {
fontSizeRatio = 0.053125; // ~5.3125% of width (scaled from 2.5rem)
} else if (length < 400) {
fontSizeRatio = 0.0425; // ~4.25% of width (scaled from 2rem)
} else if (length < 700) {
fontSizeRatio = 0.031875; // ~3.1875% of width (scaled from 1.5rem)
} else {
fontSizeRatio = 0.025; // 2.5% of width (minimum stays the same)
}
// Calculate font size based on actual width
const fontSize = textareaWidth * fontSizeRatio;
demoTextInput.style.fontSize = `${fontSize}px`;
// Remove all status classes
demoCharCounter.classList.remove('error', 'warning', 'valid');
// Check for unsupported characters first (only if models are loaded)
let hasUnsupportedChars = false;
if (models && processors && length > 0) {
const validation = validateCharacters(text);
if (!validation.valid && validation.unsupportedChars.length > 0) {
hasUnsupportedChars = true;
const charList = validation.unsupportedChars.slice(0, 5).map(c => `"${c}"`).join(', ');
const moreChars = validation.unsupportedChars.length > 5 ? ` and ${validation.unsupportedChars.length - 5} more` : '';
showDemoError(`Unsupported characters detected: ${charList}${moreChars}. Please remove them before generating speech.`);
} else {
hideDemoError();
}
}
// Update status based on length and character validation
if (length < MIN_CHARS) {
demoCharCounter.classList.add('error');
demoCharStatus.textContent = '✗';
demoGenerateBtn.disabled = true;
} else if (hasUnsupportedChars) {
demoCharCounter.classList.add('error');
demoCharStatus.textContent = '✗';
demoGenerateBtn.disabled = true;
} else {
demoCharCounter.classList.add('valid');
demoCharStatus.textContent = '✓';
// Enable only if models are loaded AND not currently generating
demoGenerateBtn.disabled = !models || isGenerating;
}
}
// Validate text input
function validateTextInput(text) {
if (!text || text.trim().length === 0) {
return { valid: false, message: 'Please enter some text.' };
}
if (text.length < MIN_CHARS) {
return { valid: false, message: `Text must be at least ${MIN_CHARS} characters long. (Currently ${text.length})` };
}
return { valid: true };
}
// Load pre-extracted style embeddings from JSON
async function loadStyleEmbeddings(voice) {
try {
// Check if already cached
if (refEmbeddingCache[voice]) {
return refEmbeddingCache[voice];
}
const embeddingPath = REF_EMBEDDING_PATHS[voice];
if (!embeddingPath) {
throw new Error(`No embedding path configured for voice: ${voice}`);
}
const response = await fetch(embeddingPath);
if (!response.ok) {
throw new Error(`Failed to fetch embedding: ${response.statusText}`);
}
const embeddingData = await response.json();
// Convert JSON data to ONNX tensors
// Flatten nested arrays before creating Float32Array
const styleTtlData = embeddingData.style_ttl.data.flat(Infinity);
const styleTtlTensor = new ort.Tensor(
embeddingData.style_ttl.type || 'float32',
Float32Array.from(styleTtlData),
embeddingData.style_ttl.dims
);
const styleDpData = embeddingData.style_dp.data.flat(Infinity);
const styleDpTensor = new ort.Tensor(
embeddingData.style_dp.type || 'float32',
Float32Array.from(styleDpData),
embeddingData.style_dp.dims
);
const embeddings = {
styleTtl: styleTtlTensor,
styleDp: styleDpTensor
};
// Cache the embeddings
refEmbeddingCache[voice] = embeddings;
return embeddings;
} catch (error) {
throw error;
}
}
// Switch to a different voice
async function switchVoice(voice) {
try {
const embeddings = await loadStyleEmbeddings(voice);
currentStyleTtlTensor = embeddings.styleTtl;
currentStyleDpTensor = embeddings.styleDp;
currentVoice = voice;
// Re-validate text after switching voice
updateCharCounter();
} catch (error) {
showDemoError(`Failed to load ${voice === 'F' ? 'Female' : 'Male'} voice: ${error.message}`);
throw error;
}
}
// Check WebGPU support more thoroughly
async function checkWebGPUSupport() {
try {
// Detect iOS/Safari
const isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent) ||
(navigator.platform === 'MacIntel' && navigator.maxTouchPoints > 1);
const isSafari = /^((?!chrome|crios|android|edg|firefox).)*safari/i.test(navigator.userAgent);
// iOS and Safari have incomplete WebGPU support
if (isIOS) {
return { supported: false, reason: 'iOS does not support the required WebGPU features' };
}
if (isSafari) {
// Desktop Safari might work, but check carefully
return { supported: false, reason: 'Safari does not support the required WebGPU features' };
}
// Check if WebGPU is available in the browser
if (!navigator.gpu) {
return { supported: false, reason: 'WebGPU not available in this browser' };
}
// Request adapter
const adapter = await navigator.gpu.requestAdapter();
if (!adapter) {
return { supported: false, reason: 'No WebGPU adapter found' };
}
// Check adapter info
try {
const adapterInfo = await adapter.requestAdapterInfo();
} catch (infoError) {
// Ignore adapter info errors
}
// Request device to test if it actually works
const device = await adapter.requestDevice();
if (!device) {
return { supported: false, reason: 'Failed to create WebGPU device' };
}
return { supported: true, adapter, device };
} catch (error) {
// Handle specific iOS/Safari errors
const errorMsg = error.message || '';
if (errorMsg.includes('subgroupMinSize') || errorMsg.includes('subgroup')) {
return { supported: false, reason: 'iOS/Safari does not support required WebGPU features (subgroup operations)' };
}
return { supported: false, reason: error.message };
}
}
// Warmup models with dummy inference (no audio playback, no UI updates)
async function warmupModels() {
try {
const dummyText = 'Looking to integrate Supertonic into your product? We offer customized on-device SDK solutions tailored to your business needs. Our lightweight, high-performance TTS technology can be seamlessly integrated into mobile apps, IoT devices, automotive systems, and more. Try it now, and enjoy its speed.';
const totalStep = 5; // Use minimal steps for faster warmup
const durationFactor = 1.0;
const textList = [dummyText];
const bsz = 1;
// Use pre-computed style embeddings
const styleTtlTensor = currentStyleTtlTensor;
const styleDpTensor = currentStyleDpTensor;
// Step 1: Estimate duration
const { textIds, textMask } = processors.textProcessor.call(textList);
const textIdsShape = [bsz, textIds[0].length];
const textMaskShape = [bsz, 1, textMask[0][0].length];
const textMaskTensor = arrayToTensor(textMask, textMaskShape);
const dpResult = await models.dpOrt.run({
text_ids: intArrayToTensor(textIds, textIdsShape),
style_dp: styleDpTensor,
text_mask: textMaskTensor
});
const durOnnx = Array.from(dpResult.duration.data);
for (let i = 0; i < durOnnx.length; i++) {
durOnnx[i] *= durationFactor;
}
const durReshaped = [];
for (let b = 0; b < bsz; b++) {
durReshaped.push([[durOnnx[b]]]);
}
// Step 2: Encode text
const textEncResult = await models.textEncOrt.run({
text_ids: intArrayToTensor(textIds, textIdsShape),
style_ttl: styleTtlTensor,
text_mask: textMaskTensor
});
const textEmbTensor = textEncResult.text_emb;
// Step 3: Denoising
let { noisyLatent, latentMask } = sampleNoisyLatent(durReshaped, cfgs);
const latentShape = [bsz, noisyLatent[0].length, noisyLatent[0][0].length];
const latentMaskShape = [bsz, 1, latentMask[0][0].length];
const latentMaskTensor = arrayToTensor(latentMask, latentMaskShape);
const totalStepArray = new Array(bsz).fill(totalStep);
const scalarShape = [bsz];
const totalStepTensor = arrayToTensor(totalStepArray, scalarShape);
for (let step = 0; step < totalStep; step++) {
const currentStepArray = new Array(bsz).fill(step);
const vectorEstResult = await models.vectorEstOrt.run({
noisy_latent: arrayToTensor(noisyLatent, latentShape),
text_emb: textEmbTensor,
style_ttl: styleTtlTensor,
text_mask: textMaskTensor,
latent_mask: latentMaskTensor,
total_step: totalStepTensor,
current_step: arrayToTensor(currentStepArray, scalarShape)
});
const denoisedLatent = Array.from(vectorEstResult.denoised_latent.data);
// Update latent
let idx = 0;
for (let b = 0; b < noisyLatent.length; b++) {
for (let d = 0; d < noisyLatent[b].length; d++) {
for (let t = 0; t < noisyLatent[b][d].length; t++) {
noisyLatent[b][d][t] = denoisedLatent[idx++];
}
}
}
}
// Step 4: Generate waveform
const vocoderResult = await models.vocoderOrt.run({
latent: arrayToTensor(noisyLatent, latentShape)
});
// Warmup complete - no need to process the audio further
} catch (error) {
console.warn('Warmup failed (non-critical):', error.message);
// Don't throw - warmup failure shouldn't prevent normal usage
}
}
// Load models on page load
async function initializeModels() {
try {
showDemoStatus('<strong>Loading configuration...</strong>', 'info', 5);
const basePath = 'assets/onnx';
// Load config
cfgs = await loadCfgs(basePath);
// Check WebGPU support first
showDemoStatus('<strong>Checking WebGPU support...</strong>', 'info', 8);
const webgpuCheck = await checkWebGPUSupport();
// If WebGPU is not supported, show message and disable demo
if (!webgpuCheck.supported) {
// Show specific message for iOS users
const errorMessage = webgpuCheck.reason.includes('iOS') || webgpuCheck.reason.includes('Safari')
? `<strong>iOS/Safari is not currently supported.</strong><br>Please use a desktop browser that supports WebGPU (Chrome 113+, Edge 113+).`
: `Please use a browser that supports WebGPU (Chrome 113+, Edge 113+, or other WebGPU-enabled browsers).`;
showDemoStatus(errorMessage, 'error', 100);
showBackendBadge('Not Supported');
// Disable all input elements
demoTextInput.disabled = true;
demoGenerateBtn.disabled = true;
demoTotalSteps.disabled = true;
demoDurationFactor.disabled = true;
demoElevenLabsApiKey.disabled = true;
if (demoSecondaryApiKey) demoSecondaryApiKey.disabled = true;
if (demoTertiaryApiKey) demoTertiaryApiKey.disabled = true;
// Disable voice toggle
const voiceToggleTexts = document.querySelectorAll('.voice-toggle-text');
voiceToggleTexts.forEach(text => {
text.classList.add('disabled');
text.style.pointerEvents = 'none';
text.style.opacity = '0.5';
});
return; // Stop initialization
}
// Load models with WebGPU
showDemoStatus('<strong>WebGPU detected! Loading models...</strong>', 'info', 10);
const modelsLoadPromise = loadOnnxAll(basePath, {
executionProviders: ['webgpu'],
graphOptimizationLevel: 'all'
}, (modelName, current, total) => {
const progress = 10 + (current / total) * 70; // 10-80% for model loading
showDemoStatus(`<strong>Loading models with WebGPU (${current}/${total}):</strong> ${modelName}...`, 'info', progress);
});
// Load processors in parallel with models
const [loadedModels, loadedProcessors] = await Promise.all([
modelsLoadPromise,
loadProcessors(basePath)
]);
models = loadedModels;
processors = loadedProcessors;
showDemoStatus('<strong>Loading reference embeddings...</strong>', 'info', 85);
// Load pre-extracted embeddings for default voice
const embeddings = await loadStyleEmbeddings(currentVoice);
currentStyleTtlTensor = embeddings.styleTtl;
currentStyleDpTensor = embeddings.styleDp;
showDemoStatus('<strong>Warming up models...</strong>', 'info', 90);
// Warmup step: run inference once in background with dummy text
await warmupModels();
hideDemoStatus();
demoGenerateBtn.disabled = false;
// Enable voice toggle buttons after models are loaded
const voiceToggleTexts = document.querySelectorAll('.voice-toggle-text');
voiceToggleTexts.forEach(text => text.classList.remove('disabled'));
// Validate initial text now that models are loaded
updateCharCounter();
} catch (error) {
showDemoStatus(`<strong>Error:</strong> ${error.message}`, 'error');
showDemoError(`Failed to initialize: ${error.message}. Check console for details.`);
}
}
// ElevenLabs API synthesis function
async function generateSpeechElevenLabs(text, apiKey) {
const startTime = Date.now();
try {
const response = await fetch('https://api.elevenlabs.io/v1/text-to-speech/JBFqnCBsd6RMkjVDRZzb', {
method: 'POST',
headers: {
'Accept': 'audio/mpeg',
'Content-Type': 'application/json',
'xi-api-key': apiKey
},
body: JSON.stringify({
text: text,
model_id: 'eleven_flash_v2_5',
voice_settings: {
stability: 0.5,
similarity_boost: 0.5
}
})
});
if (!response.ok) {
throw new Error(`ElevenLabs API error: ${response.status} ${response.statusText}`);
}
const audioBlob = await response.blob();
const audioBuffer = await audioBlob.arrayBuffer();
// Get audio duration
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const decodedAudio = await audioContext.decodeAudioData(audioBuffer);
const audioDuration = decodedAudio.duration;
const endTime = Date.now();
const processingTime = (endTime - startTime) / 1000;
return {
success: true,
audioBlob,
audioDuration,
processingTime,
url: URL.createObjectURL(audioBlob),
text: text // 추가: text를 반환에 포함
};
} catch (error) {
const endTime = Date.now();
const processingTime = (endTime - startTime) / 1000;
return {
success: false,
error: error.message,
processingTime,
text: text // 추가: 에러 시에도 text 포함
};
}
}
// OpenAI TTS-1 API synthesis function
async function generateSpeechOpenAI(text, apiKey) {
const startTime = Date.now();
try {
const response = await fetch('https://api.openai.com/v1/audio/speech', {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'tts-1',
input: text,
voice: 'alloy',
response_format: 'mp3'
})
});
if (!response.ok) {
throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`);
}
const audioBlob = await response.blob();
const audioBuffer = await audioBlob.arrayBuffer();
// Get audio duration
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const decodedAudio = await audioContext.decodeAudioData(audioBuffer);
const audioDuration = decodedAudio.duration;
const endTime = Date.now();
const processingTime = (endTime - startTime) / 1000;
return {
success: true,
audioBlob,
audioDuration,
processingTime,
url: URL.createObjectURL(audioBlob),
text: text
};
} catch (error) {
const endTime = Date.now();
const processingTime = (endTime - startTime) / 1000;
return {
success: false,
error: error.message,
processingTime,
text: text
};
}
}
// Gemini 2.5 Flash TTS API synthesis function
async function generateSpeechGemini(text, apiKey) {
const startTime = Date.now();
try {
const response = await fetch('https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-tts:generateContent', {
method: 'POST',
headers: {
'x-goog-api-key': apiKey,
'Content-Type': 'application/json',
},
body: JSON.stringify({
contents: [{
parts: [{
text: text
}]
}],
generationConfig: {
responseModalities: ["AUDIO"],
speechConfig: {
voiceConfig: {
prebuiltVoiceConfig: {
voiceName: "Kore"
}
}
}
}
})
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Gemini API error: ${response.status} ${response.statusText}`);
}
const data = await response.json();
// Extract audio data from Gemini response
let audioContent = null;
let mimeType = null;
if (data.candidates && data.candidates[0]?.content?.parts) {
for (const part of data.candidates[0].content.parts) {
if (part.inlineData && part.inlineData.data) {
audioContent = part.inlineData.data;
mimeType = part.inlineData.mimeType;
break;
}
}
}
if (!audioContent) {
throw new Error('No audio content found in Gemini response');
}
// Decode base64 audio content
const binaryString = atob(audioContent);
const pcmData = new Uint8Array(binaryString.length);
for (let i = 0; i < binaryString.length; i++) {
pcmData[i] = binaryString.charCodeAt(i);
}
// Parse sample rate from mimeType if available (e.g., "audio/pcm;rate=24000")
let sampleRate = 24000; // default
if (mimeType && mimeType.includes('rate=')) {
const match = mimeType.match(/rate=(\d+)/);
if (match) {
sampleRate = parseInt(match[1]);
}
}
// Gemini returns s16le (signed 16-bit little-endian PCM)
const numChannels = 1; // mono
const bitsPerSample = 16;
const byteRate = sampleRate * numChannels * (bitsPerSample / 8);
const blockAlign = numChannels * (bitsPerSample / 8);
const dataSize = pcmData.length;
// Create WAV header (44 bytes)
const wavHeader = new ArrayBuffer(44);
const view = new DataView(wavHeader);
// RIFF chunk descriptor
view.setUint32(0, 0x52494646, false); // "RIFF"
view.setUint32(4, 36 + dataSize, true); // File size - 8
view.setUint32(8, 0x57415645, false); // "WAVE"
// fmt sub-chunk
view.setUint32(12, 0x666d7420, false); // "fmt "
view.setUint32(16, 16, true); // Subchunk1Size (16 for PCM)
view.setUint16(20, 1, true); // AudioFormat (1 for PCM)
view.setUint16(22, numChannels, true); // NumChannels
view.setUint32(24, sampleRate, true); // SampleRate
view.setUint32(28, byteRate, true); // ByteRate
view.setUint16(32, blockAlign, true); // BlockAlign
view.setUint16(34, bitsPerSample, true); // BitsPerSample
// data sub-chunk
view.setUint32(36, 0x64617461, false); // "data"
view.setUint32(40, dataSize, true); // Subchunk2Size
// Combine header and PCM data
const wavData = new Uint8Array(44 + dataSize);
wavData.set(new Uint8Array(wavHeader), 0);
wavData.set(pcmData, 44);
const finalAudioBuffer = wavData.buffer;
// Get audio duration
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
let decodedAudio;
try {
decodedAudio = await audioContext.decodeAudioData(finalAudioBuffer.slice(0)); // Use slice to create a copy
} catch (decodeError) {
throw new Error(`Unable to decode Gemini audio: ${decodeError.message}`);
}
const audioDuration = decodedAudio.duration;
await audioContext.close();
// Create blob with WAV format
const audioBlob = new Blob([finalAudioBuffer], { type: 'audio/wav' });
const endTime = Date.now();
const processingTime = (endTime - startTime) / 1000;
return {
success: true,
audioBlob,
audioDuration,
processingTime,
url: URL.createObjectURL(audioBlob),
text: text
};
} catch (error) {
const endTime = Date.now();
const processingTime = (endTime - startTime) / 1000;
return {
success: false,
error: error.message,
processingTime,
text: text
};
}
}
// Update individual system result in comparison table
function updateComparisonRow(system, result) {
if (!isComparisonMode) return;
const statusEl = document.getElementById(`${system}Status`);
const titleStatusEl = document.getElementById(`${system}-status`);
const timeEl = document.getElementById(`${system}Time`);
const durationEl = document.getElementById(`${system}Duration`);
const rtfEl = document.getElementById(`${system}RTF`);
if (result.success) {
if (statusEl) {
statusEl.textContent = '';
statusEl.className = 'demo-comparison-cell';
}
if (titleStatusEl) {
titleStatusEl.textContent = '✅ Completed';
titleStatusEl.classList.remove('status-error', 'status-running');
titleStatusEl.classList.add('status-success');
}
timeEl.textContent = `${result.processingTime.toFixed(2)}s`;
durationEl.textContent = `${result.audioDuration.toFixed(2)}s`;
const rtfValue = result.processingTime / result.audioDuration;
rtfEl.innerHTML = `<strong>${rtfValue.toFixed(3)}x</strong>`;
} else {
if (statusEl) {
statusEl.textContent = '';
statusEl.className = 'demo-comparison-cell';
}
if (titleStatusEl) {
titleStatusEl.textContent = '❌ Failed';
titleStatusEl.classList.remove('status-success', 'status-running');
titleStatusEl.classList.add('status-error');
}
timeEl.textContent = result.error || 'Error';
durationEl.textContent = '-';
rtfEl.textContent = '-';
}
}
// Highlight winner after all complete (based on RTF)
function highlightWinner(results) {
if (!isComparisonMode) return;
if (!Array.isArray(results) || results.length < 2) return;
// Remove all winner classes first
const systems = ['supertonic', 'elevenlabs', 'openai', 'gemini'];
systems.forEach(system => {
const row = document.querySelector(`.${system}-row`);
const rtfEl = document.getElementById(`${system}RTF`);
if (row) row.classList.remove('winner');
if (rtfEl) rtfEl.classList.remove('fastest');
});
// Calculate RTF for each result and find the best one
const systemResults = [];
results.forEach((result, index) => {
if (result && result.success && result.audioDuration > 0) {
const rtfValue = result.processingTime / result.audioDuration;
// Determine system by result order in the results array
// Results are typically passed in order: [supertonicResult, elevenlabsResult, openaiResult, geminiResult]
let system = null;
if (index === 0 || result.text === results[0]?.text) {
system = 'supertonic';
} else {
// Check which system by looking at existing elements
const hasElevenlabs = document.querySelector('.elevenlabs-row');
const hasOpenai = document.querySelector('.openai-row');
const hasGemini = document.querySelector('.gemini-row');
if (hasElevenlabs && !systemResults.find(s => s.system === 'elevenlabs')) {
system = 'elevenlabs';
} else if (hasOpenai && !systemResults.find(s => s.system === 'openai')) {
system = 'openai';
} else if (hasGemini && !systemResults.find(s => s.system === 'gemini')) {
system = 'gemini';
}
}
if (system) {
systemResults.push({ system, rtfValue });
}
}
});
// Find the best (lowest RTF)
if (systemResults.length > 0) {
const best = systemResults.reduce((prev, curr) =>
curr.rtfValue < prev.rtfValue ? curr : prev
);
const row = document.querySelector(`.${best.system}-row`);
const rtfEl = document.getElementById(`${best.system}RTF`);
if (row) row.classList.add('winner');
if (rtfEl) rtfEl.classList.add('fastest');
}
}
// Supertonic synthesis function (extracted for parallel execution)
async function generateSupertonicSpeech(text, totalStep, durationFactor) {
const supertonicStartTime = Date.now();
try {
const textList = [text];
const bsz = 1;
const sampleRate = cfgs.ae.sample_rate;
// Use pre-computed style embeddings
const styleTtlTensor = currentStyleTtlTensor;
const styleDpTensor = currentStyleDpTensor;
// Step 1: Estimate duration
const { textIds, textMask, unsupportedChars } = processors.textProcessor.call(textList);
// Check for unsupported characters
if (unsupportedChars && unsupportedChars.length > 0) {
const charList = unsupportedChars.map(c => `"${c}"`).join(', ');
throw new Error(`Unsupported characters: ${charList}`);
}
const textIdsShape = [bsz, textIds[0].length];
const textMaskShape = [bsz, 1, textMask[0][0].length];
const textMaskTensor = arrayToTensor(textMask, textMaskShape);
const dpResult = await models.dpOrt.run({
text_ids: intArrayToTensor(textIds, textIdsShape),
style_dp: styleDpTensor,
text_mask: textMaskTensor
});
const durOnnx = Array.from(dpResult.duration.data);
// Apply duration factor to adjust speech length (once)
const durationAdjustment = currentVoice === 'F' ? 0.1 : 0.08;
for (let i = 0; i < durOnnx.length; i++) {
durOnnx[i] *= (durationFactor - durationAdjustment);
}
const durReshaped = [];
for (let b = 0; b < bsz; b++) {
durReshaped.push([[durOnnx[b]]]);
}
// Step 2: Encode text
const textEncResult = await models.textEncOrt.run({
text_ids: intArrayToTensor(textIds, textIdsShape),
style_ttl: styleTtlTensor,
text_mask: textMaskTensor
});
const textEmbTensor = textEncResult.text_emb;
// Step 3: Denoising
let { noisyLatent, latentMask } = sampleNoisyLatent(durReshaped, cfgs);
const latentShape = [bsz, noisyLatent[0].length, noisyLatent[0][0].length];
const latentMaskShape = [bsz, 1, latentMask[0][0].length];
const latentMaskTensor = arrayToTensor(latentMask, latentMaskShape);
// Prepare constant tensors
const totalStepArray = new Array(bsz).fill(totalStep);
const scalarShape = [bsz];
const totalStepTensor = arrayToTensor(totalStepArray, scalarShape);
for (let step = 0; step < totalStep; step++) {
const currentStepArray = new Array(bsz).fill(step);
const vectorEstResult = await models.vectorEstOrt.run({
noisy_latent: arrayToTensor(noisyLatent, latentShape),
text_emb: textEmbTensor,
style_ttl: styleTtlTensor,
text_mask: textMaskTensor,
latent_mask: latentMaskTensor,
total_step: totalStepTensor,
current_step: arrayToTensor(currentStepArray, scalarShape)
});
const denoisedLatent = Array.from(vectorEstResult.denoised_latent.data);
// Update latent
let idx = 0;
for (let b = 0; b < noisyLatent.length; b++) {
for (let d = 0; d < noisyLatent[b].length; d++) {
for (let t = 0; t < noisyLatent[b][d].length; t++) {
noisyLatent[b][d][t] = denoisedLatent[idx++];
}
}
}
}
// Step 4: Generate waveform
const vocoderResult = await models.vocoderOrt.run({
latent: arrayToTensor(noisyLatent, latentShape)
});
const wavBatch = Array.from(vocoderResult.wav_tts.data);
const wavLen = Math.floor(sampleRate * durOnnx[0]);
const wavOut = wavBatch.slice(0, wavLen);
// Create WAV file
const wavBuffer = writeWavFile(wavOut, sampleRate);
const blob = new Blob([wavBuffer], { type: 'audio/wav' });
const url = URL.createObjectURL(blob);
// Calculate times for Supertonic
const supertonicEndTime = Date.now();
const supertonicProcessingTime = (supertonicEndTime - supertonicStartTime) / 1000;
const audioDurationSec = durOnnx[0];
return {
success: true,
processingTime: supertonicProcessingTime,
audioDuration: audioDurationSec,
url: url,
text: text
};
} catch (error) {
return {
success: false,
error: error.message,
text: text
};
}
}
// Format time: 60초 미만 -> 00.00, 60분 미만 -> 00:00.00, 60분 이상 -> 00:00:00.00
function formatTimeDetailed(seconds) {
const hours = Math.floor(seconds / 3600);
const mins = Math.floor((seconds % 3600) / 60);
const secs = seconds % 60;
const ms = Math.floor((secs % 1) * 100);
const wholeSecs = Math.floor(secs);
if (seconds < 60) {
return `${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`;
} else if (seconds < 3600) {
return `${mins.toString().padStart(2, '0')}:${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`;
} else {
return `${hours.toString().padStart(2, '0')}:${mins.toString().padStart(2, '0')}:${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`;
}
}
// Render result to UI with custom audio player
async function renderResult(system, result, isFirst = false) {
const container = document.getElementById('demoResults');
const formatTime = (seconds, { trimMobile = false } = {}) => {
const mins = Math.floor(seconds / 60);
const secs = seconds % 60;
const secString = secs.toFixed(2).padStart(5, '0');
let formatted = `${mins}:${secString}`;
if (trimMobile) {
formatted = trimDecimalsForMobile(formatted);
}
return formatted;
};
const textLength = result.text ? result.text.length : 0;
const isBatch = textLength >= MAX_CHUNK_LENGTH;
const successfulResult = result && result.success;
const firstChunkTimeValue = result.firstChunkTime;
const processingTimeStr = successfulResult
? (isBatch && firstChunkTimeValue
? `${formatTimeDetailed(firstChunkTimeValue)} / ${formatTimeDetailed(result.processingTime)}`
: formatTimeDetailed(result.processingTime))
: (result.error || 'Error');
const charsPerSec = successfulResult && result.processingTime > 0 ? (textLength / result.processingTime).toFixed(1) : '-';
const rtf = successfulResult && result.audioDuration > 0 ? (result.processingTime / result.audioDuration).toFixed(3) : '-';
const progressValue = successfulResult && textLength > 0 ? 100 : 0;
const titleMain = system === 'supertonic' ? 'Supertonic' :
(system === 'openai' ? 'OpenAI TTS-1' :
(system === 'gemini' ? 'Gemini 2.5 Flash TTS' : 'ElevenLabs Flash v2.5'));
const titleSub = system === 'supertonic' ? 'On-Device' : 'Cloud API';
const titleColor =
system === 'supertonic'
? 'var(--supertone_blue)'
: system === 'elevenlabs'
? 'var(--brand-elevenlabs)'
: system === 'openai'
? 'var(--brand-openai)'
: system === 'gemini'
? 'var(--brand-gemini)'
: '#999';
const titleStatus = isComparisonMode
? `<span class="title-status status-running" id="${system}-status">⏳ Running...</span>`
: '';
const hasAudio = successfulResult && result.url;
const totalDurationDisplay = successfulResult && typeof result.audioDuration === 'number'
? formatTime(result.audioDuration, { trimMobile: true })
: '--';
const downloadActionsHTML = hasAudio ? `
<div class="demo-result-actions">
<button class="demo-download-btn" onclick="downloadDemoAudio('${result.url}', '${system}_speech.${system === 'supertonic' ? 'wav' : 'mp3'}')" aria-label="Download ${system === 'supertonic' ? 'WAV' : 'MP3'}" title="Download ${system === 'supertonic' ? 'WAV' : 'MP3'}">
<svg width="16" height="16" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24">
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
<polyline points="7 10 12 15 17 10"/>
<line x1="12" y1="15" x2="12" y2="3"/>
</svg>
</button>
</div>` : '';
const infoMarkupSuccess = `
<!--
<div class="stat">
<div class="stat-value" id="${system}-chars">${textLength}</div>
<div class="stat-label">Processed Chars</div>
</div>
-->
<div class="stat">
<div class="stat-value" id="${system}-time">${formatStatValueWithSuffix(processingTimeStr, 's', { firstLabel: true })}</div>
<div class="stat-label">Processing Time<span class="stat-arrow stat-arrow--down">↓</span></div>
</div>
<div class="stat">
<div class="stat-value" id="${system}-cps">${charsPerSec}</div>
<div class="stat-label">Chars/sec<span class="stat-arrow stat-arrow--up">↑</span></div>
</div>
<div class="stat">
<div class="stat-value" id="${system}-rtf">${formatStatValueWithSuffix(rtf, 'x')}</div>
<div class="stat-label">RTF<span class="stat-arrow stat-arrow--down">↓</span></div>
</div>
<!--
<div class="stat">
<div class="stat-value">${progressValue.toFixed ? progressValue.toFixed(1) : progressValue}%</div>
<div class="stat-label">Progress</div>
</div>
-->
`;
const infoMarkupError = `
<div class="stat" style="width: 100%;">
<div class="stat-value">${result.error || 'Failed'}</div>
</div>
`;
const resultItemEl = document.getElementById(`${system}-result`);
const infoContainer = resultItemEl ? resultItemEl.querySelector('.demo-result-info') : null;
const playerContainer = resultItemEl ? resultItemEl.querySelector('.custom-audio-player') : null;
if (resultItemEl && infoContainer && playerContainer) {
resultItemEl.classList.add(`${system}-result-item`);
resultItemEl.classList.remove('generating');
resultItemEl.style.setProperty('--result-progress', `${progressValue}%`);
resultItemEl.style.setProperty('--provider-color', titleColor);
const titleMainEl = resultItemEl.querySelector('.title-main');
if (titleMainEl) {
titleMainEl.textContent = titleMain;
titleMainEl.style.color = titleColor;
}
const titleSubEl = resultItemEl.querySelector('.title-sub');
if (titleSubEl) {
titleSubEl.textContent = titleSub;
}
if (!resultItemEl.querySelector('.title-status') && titleStatus) {
const titleEl = resultItemEl.querySelector('.demo-result-title');
if (titleEl) {
titleEl.insertAdjacentHTML('beforeend', titleStatus);
}
}
infoContainer.classList.toggle('error', !successfulResult);
infoContainer.innerHTML = successfulResult ? infoMarkupSuccess : infoMarkupError;
if (successfulResult) {
playerContainer.style.display = '';
playerContainer.innerHTML = `
<button id="${system}-play-pause-btn" class="player-btn"${hasAudio ? '' : ' disabled'}>${hasAudio ? PLAY_ICON_SVG : STOP_ICON_SVG}</button>
<div class="time-display" id="${system}-current-time">0:00.00</div>
<div class="progress-container" id="${system}-progress-container">
<div class="progress-bar">
<div class="progress-fill" id="${system}-progress-fill"></div>
</div>
</div>
<div class="time-display" id="${system}-total-duration">${totalDurationDisplay}</div>
${downloadActionsHTML}
`;
} else {
playerContainer.style.display = 'none';
playerContainer.innerHTML = '';
}
container.style.display = 'flex';
if (successfulResult && hasAudio) {
await setupCustomPlayer(system, result);
} else if (successfulResult && !hasAudio) {
const playBtnEl = document.getElementById(`${system}-play-pause-btn`);
if (playBtnEl) playBtnEl.disabled = true;
}
return;
}
const infoSection = successfulResult
? `<div class="demo-result-info">${infoMarkupSuccess}</div>`
: `<div class="demo-result-info error">${infoMarkupError}</div>`;
const resultHTML = `
<div class="demo-result-item ${system}-result-item generating" id="${system}-result" style="--result-progress: ${progressValue}%">
<div class="demo-result-title">
<span class="title-main" style="color: ${titleColor};">${titleMain}</span>
<span class="title-sub">${titleSub}</span>
${titleStatus}
</div>
${infoSection}
<div class="custom-audio-player">
<button id="${system}-play-pause-btn" class="player-btn"${hasAudio ? '' : ' disabled'}>${hasAudio ? PLAY_ICON_SVG : STOP_ICON_SVG}</button>
<div class="time-display" id="${system}-current-time">0:00.00</div>
<div class="progress-container" id="${system}-progress-container">
<div class="progress-bar">
<div class="progress-fill" id="${system}-progress-fill"></div>
</div>
</div>
<div class="time-display" id="${system}-total-duration">${totalDurationDisplay}</div>
${downloadActionsHTML}
</div>
</div>
`;
container.insertAdjacentHTML('beforeend', resultHTML);
container.style.display = 'flex';
if (hasAudio) {
await setupCustomPlayer(system, result);
} else {
const playBtnEl = document.getElementById(`${system}-play-pause-btn`);
if (playBtnEl) playBtnEl.disabled = true;
}
}
// Setup custom audio player for a given system
async function setupCustomPlayer(system, result) {
const playPauseBtn = document.getElementById(`${system}-play-pause-btn`);
const progressContainer = document.getElementById(`${system}-progress-container`);
const currentTimeDisplay = document.getElementById(`${system}-current-time`);
const durationDisplay = document.getElementById(`${system}-total-duration`);
const progressFill = document.getElementById(`${system}-progress-fill`);
if (!playPauseBtn || !progressContainer || !currentTimeDisplay || !durationDisplay || !progressFill) {
console.error('Failed to find player elements for', system);
return;
}
// Create dedicated audio context for this player
const playerAudioContext = new (window.AudioContext || window.webkitAudioContext)();
let audioBuffer = null;
let source = null;
let startTime = 0;
let pauseTime = 0;
let isPlaying = false;
let isPaused = false;
let animationFrameId = null;
let playerRecord = null;
const formatTime = (seconds) => {
const mins = Math.floor(seconds / 60);
const secs = seconds % 60;
const secString = secs.toFixed(2).padStart(5, '0');
return `${mins}:${secString}`;
};
// Fetch and decode audio
try {
const response = await fetch(result.url);
const arrayBuffer = await response.arrayBuffer();
audioBuffer = await playerAudioContext.decodeAudioData(arrayBuffer);
} catch (error) {
console.error('Failed to load audio for', system, error);
playPauseBtn.disabled = true;
return;
}
const updateProgress = () => {
if (!isPlaying || !playerAudioContext) return;
const currentTime = isPaused ? pauseTime : (playerAudioContext.currentTime - startTime);
const duration = audioBuffer.duration;
const progress = duration > 0 ? (currentTime / duration) * 100 : 0;
progressFill.style.width = `${Math.min(progress, 100)}%`;
currentTimeDisplay.textContent = formatTime(Math.min(currentTime, duration), { trimMobile: true });
if (currentTime < duration) {
animationFrameId = requestAnimationFrame(updateProgress);
} else {
// Playback finished
isPlaying = false;
isPaused = false;
playPauseBtn.innerHTML = PLAY_ICON_SVG;
progressFill.style.width = '100%';
currentTimeDisplay.textContent = formatTime(duration, { trimMobile: true });
}
};
const togglePlayPause = () => {
if (!audioBuffer) return;
if (isPaused) {
// Resume from paused position
pauseAllPlayersExcept(playerRecord);
if (playerAudioContext.state === 'suspended') {
playerAudioContext.resume();
}
source = playerAudioContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(playerAudioContext.destination);
source.start(0, pauseTime);
startTime = playerAudioContext.currentTime - pauseTime;
isPaused = false;
isPlaying = true;
playPauseBtn.innerHTML = PAUSE_ICON_SVG;
updateProgress();
} else if (isPlaying) {
// Pause playback
pauseTime = playerAudioContext.currentTime - startTime;
if (source) {
source.stop();
source = null;
}
playerAudioContext.suspend();
isPaused = true;
isPlaying = false;
playPauseBtn.innerHTML = PLAY_ICON_SVG;
if (animationFrameId) {
cancelAnimationFrame(animationFrameId);
}
} else {
// Start from beginning
pauseAllPlayersExcept(playerRecord);
pauseTime = 0;
if (playerAudioContext.state === 'suspended') {
playerAudioContext.resume();
}
source = playerAudioContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(playerAudioContext.destination);
source.start(0);
startTime = playerAudioContext.currentTime;
isPlaying = true;
isPaused = false;
playPauseBtn.innerHTML = PAUSE_ICON_SVG;
updateProgress();
}
};
const seekTo = (percentage) => {
if (!audioBuffer) return;
const seekTime = (percentage / 100) * audioBuffer.duration;
const wasPlaying = isPlaying && !isPaused;
// Stop current playback
if (source) {
try {
source.stop();
} catch (e) {
// Already stopped
}
source = null;
}
if (animationFrameId) {
cancelAnimationFrame(animationFrameId);
}
pauseTime = seekTime;
// Update UI
const progress = (seekTime / audioBuffer.duration) * 100;
progressFill.style.width = `${Math.min(progress, 100)}%`;
currentTimeDisplay.textContent = formatTime(seekTime, { trimMobile: true });
if (wasPlaying) {
// Resume from new position
if (playerAudioContext.state === 'suspended') {
playerAudioContext.resume();
}
source = playerAudioContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(playerAudioContext.destination);
source.start(0, seekTime);
startTime = playerAudioContext.currentTime - seekTime;
isPlaying = true;
isPaused = false;
playPauseBtn.innerHTML = PAUSE_ICON_SVG;
updateProgress();
} else {
// Just update position, stay paused
isPaused = true;
isPlaying = true;
playPauseBtn.innerHTML = PLAY_ICON_SVG;
}
};
// Cleanup function for this player
const pausePlayback = () => {
if (!playerAudioContext || playerAudioContext.state === 'closed') return;
if (isPlaying) {
pauseTime = playerAudioContext.currentTime - startTime;
if (source) {
try {
source.stop();
} catch (e) {
// Already stopped
}
source = null;
}
playerAudioContext.suspend().catch(() => {});
isPaused = true;
isPlaying = false;
playPauseBtn.innerHTML = PLAY_ICON_SVG;
if (animationFrameId) {
cancelAnimationFrame(animationFrameId);
animationFrameId = null;
}
}
};
const cleanup = () => {
pausePlayback();
if (playerAudioContext && playerAudioContext.state !== 'closed') {
playerAudioContext.close();
}
if (playerRecord) {
customAudioPlayers = customAudioPlayers.filter(p => p !== playerRecord);
}
};
playerRecord = {
audioContext: playerAudioContext,
cleanup,
pausePlayback
};
customAudioPlayers.push(playerRecord);
// Setup event listeners
playPauseBtn.addEventListener('click', togglePlayPause);
progressContainer.addEventListener('click', (e) => {
const rect = progressContainer.getBoundingClientRect();
const percentage = ((e.clientX - rect.left) / rect.width) * 100;
seekTo(percentage);
});
}
// Generate Supertonic speech with chunking support and progressive playback
async function generateSupertonicSpeechChunked(text, totalStep, durationFactor, onFirstChunkReady, onChunkAdded) {
const supertonicStartTime = Date.now();
const sampleRate = cfgs.ae.sample_rate;
const silenceDuration = 0.3; // 0.3 seconds of silence between chunks
try {
// Split text into chunks
const chunks = chunkText(text);
const audioDataArrays = [];
const durations = [];
const silenceSamples = Math.floor(silenceDuration * sampleRate);
let firstChunkEndTime = 0;
let firstChunkTime = 0;
// Generate speech for each chunk
for (let i = 0; i < chunks.length; i++) {
const chunkText = chunks[i];
const result = await generateSupertonicSpeech(chunkText, totalStep, durationFactor);
if (!result.success) {
throw new Error(`Failed to generate chunk ${i + 1}: ${result.error}`);
}
// Fetch and parse the WAV file using the existing parseWavFile function
const response = await fetch(result.url);
const arrayBuffer = await response.arrayBuffer();
const { audioData } = parseWavFile(arrayBuffer);
audioDataArrays.push(audioData);
durations.push(result.audioDuration);
// Clean up the blob URL
URL.revokeObjectURL(result.url);
// Progressive playback: send each chunk individually for Web Audio API
if (i === 0 && onFirstChunkReady) {
// First chunk ready - send it immediately
firstChunkEndTime = Date.now();
firstChunkTime = (firstChunkEndTime - supertonicStartTime) / 1000;
const initialWav = writeWavFile(audioData, sampleRate);
const initialBlob = new Blob([initialWav], { type: 'audio/wav' });
const initialUrl = URL.createObjectURL(initialBlob);
const totalDurationSoFar = result.audioDuration;
const processedChars = chunks[0].length;
onFirstChunkReady(initialUrl, totalDurationSoFar, text, chunks.length, firstChunkTime, processedChars);
} else if (i > 0 && onChunkAdded) {
// Subsequent chunks - send just the new chunk
const chunkWav = writeWavFile(audioData, sampleRate);
const chunkBlob = new Blob([chunkWav], { type: 'audio/wav' });
const chunkUrl = URL.createObjectURL(chunkBlob);
const totalDurationSoFar = durations.slice(0, i + 1).reduce((sum, dur) => sum + dur, 0) + silenceDuration * i;
const currentProcessingTime = (Date.now() - supertonicStartTime) / 1000;
const processedChars = chunks.slice(0, i + 1).reduce((sum, chunk) => sum + chunk.length, 0);
onChunkAdded(chunkUrl, totalDurationSoFar, i + 1, chunks.length, currentProcessingTime, processedChars);
}
}
// Concatenate all audio chunks with silence for final result
const totalDuration = durations.reduce((sum, dur) => sum + dur, 0) + silenceDuration * (chunks.length - 1);
// Calculate total samples needed
let totalSamples = 0;
for (let i = 0; i < audioDataArrays.length; i++) {
totalSamples += audioDataArrays[i].length;
if (i < audioDataArrays.length - 1) {
totalSamples += silenceSamples;
}
}
const wavCat = new Float32Array(totalSamples);
let currentIdx = 0;
for (let i = 0; i < audioDataArrays.length; i++) {
// Copy audio data
const audioData = audioDataArrays[i];
wavCat.set(audioData, currentIdx);
currentIdx += audioData.length;
// Add silence if not the last chunk
if (i < audioDataArrays.length - 1) {
// Silence is already zeros in Float32Array, just skip the indices
currentIdx += silenceSamples;
}
}
// Create final WAV file
const wavBuffer = writeWavFile(wavCat, sampleRate);
const blob = new Blob([wavBuffer], { type: 'audio/wav' });
const url = URL.createObjectURL(blob);
const supertonicEndTime = Date.now();
const supertonicProcessingTime = (supertonicEndTime - supertonicStartTime) / 1000;
return {
success: true,
processingTime: supertonicProcessingTime,
audioDuration: totalDuration,
url: url,
text: text,
firstChunkTime: firstChunkTime
};
} catch (error) {
return {
success: false,
error: error.message,
text: text
};
}
}
// Main synthesis function
async function generateSpeech() {
const text = demoTextInput.value.trim();
// Validate text input
const validation = validateTextInput(text);
if (!validation.valid) {
showDemoError(validation.message);
return;
}
if (!models || !cfgs || !processors) {
showDemoError('Models are still loading. Please wait.');
return;
}
if (!currentStyleTtlTensor || !currentStyleDpTensor) {
showDemoError('Reference embeddings are not ready. Please wait.');
return;
}
// Validate characters before generation
const charValidation = validateCharacters(text);
if (!charValidation.valid && charValidation.unsupportedChars.length > 0) {
const charList = charValidation.unsupportedChars.map(c => `"${c}"`).join(', ');
showDemoError(`Cannot generate speech: Unsupported characters found: ${charList}`);
return;
}
const elevenlabsApiKey = demoElevenLabsApiKey.value.trim();
const openaiApiKey = demoSecondaryApiKey.value.trim();
const geminiApiKey = demoTertiaryApiKey.value.trim();
const hasComparison = !!elevenlabsApiKey || !!openaiApiKey || !!geminiApiKey;
isComparisonMode = hasComparison;
document.body.classList.toggle('comparison-mode', hasComparison);
currentGenerationTextLength = text.length;
// Show billing confirmation if API keys are provided
if (hasComparison) {
const apiProviders = [];
if (elevenlabsApiKey) apiProviders.push('ElevenLabs Flash v2.5');
if (openaiApiKey) apiProviders.push('OpenAI TTS-1');
if (geminiApiKey) apiProviders.push('Gemini 2.5 Flash TTS');
const userConfirmed = await showBillingConfirmation(text.length, apiProviders);
if (!userConfirmed) {
return;
}
}
if (!hasComparison && demoComparisonSection) {
demoComparisonSection.style.display = 'none';
}
try {
isGenerating = true;
demoGenerateBtn.disabled = true;
// Disable voice toggle during generation
const voiceToggleTexts = document.querySelectorAll('.voice-toggle-text');
voiceToggleTexts.forEach(text => text.classList.add('disabled'));
hideDemoError();
hideDemoStatus(); // Hide the status box when starting generation
// Clean up previous audio playback
if (audioContext) {
// Stop all scheduled sources
scheduledSources.forEach(source => {
try {
source.stop();
} catch (e) {
// Already stopped
}
});
scheduledSources = [];
// Close audio context
if (audioContext.state !== 'closed') {
audioContext.close();
}
audioContext = null;
}
// Cancel animation frame
if (animationFrameId) {
cancelAnimationFrame(animationFrameId);
animationFrameId = null;
}
// Clean up all custom audio players (ElevenLabs, etc.)
customAudioPlayers.forEach(player => {
if (player.cleanup) {
player.cleanup();
}
});
customAudioPlayers = [];
// Reset state
audioChunks = [];
totalDuration = 0;
startTime = 0;
pauseTime = 0;
isPaused = false;
isPlaying = false;
firstChunkGenerationTime = 0; // Processing time for first chunk
totalChunks = 0;
nextScheduledTime = 0; // Next time to schedule audio chunk
// Show result shell(s) immediately
const createInitialResultItem = (system, titleMain, titleSub, titleColor, includeStatus) => {
const titleStatus = includeStatus
? `<span class="title-status status-running" id="${system}-status">⏳ Running...</span>`
: '';
return `
<div class="demo-result-item ${system}-result-item generating" id="${system}-result" style="--result-progress: 0%;">
<div class="demo-result-title">
<span class="title-main" style="color: ${titleColor};">${titleMain}</span>
<span class="title-sub">${titleSub}</span>
${titleStatus}
</div>
<div class="demo-result-info">
<!--
<div class="stat">
<div class="stat-value" id="${system}-chars">--</div>
<div class="stat-label">Processed Chars</div>
</div>
-->
<div class="stat">
<div class="stat-value" id="${system}-time">--</div>
<div class="stat-label">Processing Time<span class="stat-arrow stat-arrow--down">↓</span></div>
</div>
<div class="stat">
<div class="stat-value" id="${system}-cps">--</div>
<div class="stat-label">Chars/sec<span class="stat-arrow stat-arrow--up">↑</span></div>
</div>
<div class="stat">
<div class="stat-value" id="${system}-rtf">--</div>
<div class="stat-label">RTF<span class="stat-arrow stat-arrow--down">↓</span></div>
</div>
</div>
<div class="custom-audio-player">
<div class="demo-placeholder-audio">Generating speech...</div>
</div>
</div>
`;
};
const supertonicInitial = createInitialResultItem(
'supertonic',
'Supertonic',
'On-Device',
'var(--supertone_blue)',
isComparisonMode
);
const initialItems = [supertonicInitial];
if (elevenlabsApiKey) {
const elevenInitial = createInitialResultItem(
'elevenlabs',
'ElevenLabs Flash v2.5',
'Cloud API',
'#999',
true
);
initialItems.push(elevenInitial);
}
if (openaiApiKey) {
const openaiInitial = createInitialResultItem(
'openai',
'OpenAI TTS-1',
'Cloud API',
'#999',
true
);
initialItems.push(openaiInitial);
}
if (geminiApiKey) {
const geminiInitial = createInitialResultItem(
'gemini',
'Gemini 2.5 Flash TTS',
'Cloud API',
'#999',
true
);
initialItems.push(geminiInitial);
}
demoResults.style.display = 'flex';
demoResults.innerHTML = initialItems.join('');
// Reset comparison table
if (hasComparison) {
demoComparisonSection.style.display = 'block';
document.getElementById('supertonicStatus').textContent = '⏳ Running...';
document.getElementById('supertonicStatus').className = 'demo-comparison-cell status-running';
document.getElementById('supertonicTime').textContent = '-';
document.getElementById('supertonicDuration').textContent = '-';
document.getElementById('supertonicRTF').textContent = '-';
if (elevenlabsApiKey) {
document.getElementById('elevenlabsStatus').textContent = '⏳ Running...';
document.getElementById('elevenlabsStatus').className = 'demo-comparison-cell status-running';
document.getElementById('elevenlabsTime').textContent = '-';
document.getElementById('elevenlabsDuration').textContent = '-';
document.getElementById('elevenlabsRTF').textContent = '-';
}
if (openaiApiKey) {
document.getElementById('openaiStatus').textContent = '⏳ Running...';
document.getElementById('openaiStatus').className = 'demo-comparison-cell status-running';
document.getElementById('openaiTime').textContent = '-';
document.getElementById('openaiDuration').textContent = '-';
document.getElementById('openaiRTF').textContent = '-';
}
if (geminiApiKey) {
document.getElementById('geminiStatus').textContent = '⏳ Running...';
document.getElementById('geminiStatus').className = 'demo-comparison-cell status-running';
document.getElementById('geminiTime').textContent = '-';
document.getElementById('geminiDuration').textContent = '-';
document.getElementById('geminiRTF').textContent = '-';
}
// Remove winner classes
document.querySelector('.supertonic-row').classList.remove('winner');
const elevenlabsRow = document.querySelector('.elevenlabs-row');
const openaiRow = document.querySelector('.openai-row');
const geminiRow = document.querySelector('.gemini-row');
if (elevenlabsRow) elevenlabsRow.classList.remove('winner');
if (openaiRow) openaiRow.classList.remove('winner');
if (geminiRow) geminiRow.classList.remove('winner');
}
const totalStep = parseInt(demoTotalSteps.value);
const durationFactor = parseFloat(demoDurationFactor.value);
// Track which one finishes first
let firstFinished = false;
let supertonicResult = null;
let elevenlabsResult = null;
let openaiResult = null;
let geminiResult = null;
let latestSupertonicProcessedChars = 0;
// Helper functions for custom player
const formatTime = (seconds, { trimMobile = false } = {}) => {
const mins = Math.floor(seconds / 60);
const secs = seconds % 60;
const secString = secs.toFixed(2).padStart(5, '0');
let formatted = `${mins}:${secString}`;
if (trimMobile) {
formatted = trimDecimalsForMobile(formatted);
}
return formatted;
};
const updateProgress = () => {
if (!isPlaying || !audioContext) return;
const currentTime = isPaused ? pauseTime : (audioContext.currentTime - startTime);
const progress = totalDuration > 0 ? (currentTime / totalDuration) * 100 : 0;
if (progressFill) {
progressFill.style.width = `${Math.min(progress, 100)}%`;
}
if (currentTimeDisplay) {
currentTimeDisplay.textContent = formatTime(Math.min(currentTime, totalDuration), { trimMobile: true });
}
if (currentTime < totalDuration) {
animationFrameId = requestAnimationFrame(updateProgress);
} else {
// Playback finished
isPlaying = false;
isPaused = false;
if (playPauseBtn) {
playPauseBtn.innerHTML = PLAY_ICON_SVG;
}
}
};
const togglePlayPause = () => {
if (!audioContext || audioChunks.length === 0) return;
if (isPaused) {
// Resume from paused position
pauseAllPlayersExcept(supertonicPlayerRecord);
const seekTime = pauseTime;
// Find which chunk we should start from
let accumulatedTime = 0;
let startChunkIndex = 0;
let offsetInChunk = seekTime;
for (let i = 0; i < audioChunks.length; i++) {
const chunkDuration = audioChunks[i].buffer.duration;
if (accumulatedTime + chunkDuration > seekTime) {
startChunkIndex = i;
offsetInChunk = seekTime - accumulatedTime;
break;
}
accumulatedTime += chunkDuration + 0.3;
}
// Stop any existing sources
scheduledSources.forEach(source => {
try {
source.stop();
} catch (e) {
// Already stopped
}
});
scheduledSources = [];
// Resume AudioContext if suspended
if (audioContext.state === 'suspended') {
audioContext.resume();
}
// Reschedule from the pause point
startTime = audioContext.currentTime - seekTime;
let nextStartTime = audioContext.currentTime;
for (let i = startChunkIndex; i < audioChunks.length; i++) {
const source = audioContext.createBufferSource();
source.buffer = audioChunks[i].buffer;
source.connect(audioContext.destination);
if (i === startChunkIndex) {
source.start(nextStartTime, offsetInChunk);
nextStartTime += (audioChunks[i].buffer.duration - offsetInChunk);
} else {
source.start(nextStartTime);
nextStartTime += audioChunks[i].buffer.duration;
}
if (i < audioChunks.length - 1) {
nextStartTime += 0.3;
}
scheduledSources.push(source);
}
nextScheduledTime = nextStartTime;
isPaused = false;
isPlaying = true;
playPauseBtn.innerHTML = PAUSE_ICON_SVG;
updateProgress();
} else if (isPlaying) {
// Pause playback
pauseTime = audioContext.currentTime - startTime;
audioContext.suspend();
isPaused = true;
playPauseBtn.innerHTML = PLAY_ICON_SVG;
if (animationFrameId) {
cancelAnimationFrame(animationFrameId);
}
} else {
// Was finished, restart from beginning
pauseAllPlayersExcept(supertonicPlayerRecord);
pauseTime = 0;
// Resume AudioContext if suspended
if (audioContext.state === 'suspended') {
audioContext.resume();
}
// Stop any existing sources
scheduledSources.forEach(source => {
try {
source.stop();
} catch (e) {
// Already stopped
}
});
scheduledSources = [];
// Restart from beginning
startTime = audioContext.currentTime;
let nextStartTime = audioContext.currentTime;
for (let i = 0; i < audioChunks.length; i++) {
const source = audioContext.createBufferSource();
source.buffer = audioChunks[i].buffer;
source.connect(audioContext.destination);
source.start(nextStartTime);
nextStartTime += audioChunks[i].buffer.duration;
if (i < audioChunks.length - 1) {
nextStartTime += 0.3;
}
scheduledSources.push(source);
}
nextScheduledTime = nextStartTime;
isPlaying = true;
isPaused = false;
playPauseBtn.innerHTML = PAUSE_ICON_SVG;
updateProgress();
}
};
const seekTo = (percentage) => {
if (!audioContext || audioChunks.length === 0) return;
const seekTime = (percentage / 100) * totalDuration;
// Remember current playing state
const wasPlaying = isPlaying;
const wasPaused = isPaused;
// Stop all current sources
scheduledSources.forEach(source => {
try {
source.stop();
} catch (e) {
// Already stopped
}
});
scheduledSources = [];
// Cancel animation
if (animationFrameId) {
cancelAnimationFrame(animationFrameId);
}
// Find which chunk we should start from
let accumulatedTime = 0;
let startChunkIndex = 0;
let offsetInChunk = seekTime;
for (let i = 0; i < audioChunks.length; i++) {
const chunkDuration = audioChunks[i].buffer.duration;
if (accumulatedTime + chunkDuration > seekTime) {
startChunkIndex = i;
offsetInChunk = seekTime - accumulatedTime;
break;
}
accumulatedTime += chunkDuration + 0.3; // Include silence
}
// If paused or finished, just update the pause position
if (wasPaused || !wasPlaying) {
pauseTime = seekTime;
// Update UI
if (progressFill) {
const progress = (seekTime / totalDuration) * 100;
progressFill.style.width = `${Math.min(progress, 100)}%`;
}
if (currentTimeDisplay) {
currentTimeDisplay.textContent = formatTime(seekTime, { trimMobile: true });
}
// Set to paused state so play button will resume from seek position
isPaused = true;
isPlaying = true; // Valid state for playback
if (playPauseBtn) {
playPauseBtn.innerHTML = PLAY_ICON_SVG;
}
return;
}
// Resume AudioContext if it was suspended
if (audioContext.state === 'suspended') {
audioContext.resume();
}
// Reschedule from the seek point
startTime = audioContext.currentTime - seekTime;
let nextStartTime = audioContext.currentTime;
for (let i = startChunkIndex; i < audioChunks.length; i++) {
const source = audioContext.createBufferSource();
source.buffer = audioChunks[i].buffer;
source.connect(audioContext.destination);
if (i === startChunkIndex) {
// Start from offset
source.start(nextStartTime, offsetInChunk);
nextStartTime += (audioChunks[i].buffer.duration - offsetInChunk);
} else {
source.start(nextStartTime);
nextStartTime += audioChunks[i].buffer.duration;
}
// Add silence between chunks
if (i < audioChunks.length - 1) {
nextStartTime += 0.3;
}
scheduledSources.push(source);
}
// Update nextScheduledTime for any future chunks
nextScheduledTime = nextStartTime;
// Resume playing state
isPlaying = true;
isPaused = false;
if (playPauseBtn) {
playPauseBtn.innerHTML = PAUSE_ICON_SVG;
}
// Restart progress animation
updateProgress();
};
// Callback for first chunk ready - create custom player and start playback
const onFirstChunkReady = async (url, duration, text, numChunks, firstChunkTime, processedChars) => {
totalChunks = numChunks;
firstChunkGenerationTime = firstChunkTime;
const container = document.getElementById('demoResults');
if (!firstFinished) {
firstFinished = true;
}
const textLength = currentGenerationTextLength > 0
? currentGenerationTextLength
: (text ? text.length : 0);
const isBatch = textLength >= MAX_CHUNK_LENGTH;
const processingTimeStr = isBatch && firstChunkTime
? `${formatTimeDetailed(firstChunkTime)} / ${formatTimeDetailed(firstChunkTime)}`
: formatTimeDetailed(firstChunkTime);
const safeInitialChars = typeof processedChars === 'number' ? processedChars : 0;
const displayedInitialChars = textLength > 0 ? Math.min(safeInitialChars, textLength) : safeInitialChars;
const charsPerSec = firstChunkTime > 0 && displayedInitialChars > 0
? (displayedInitialChars / firstChunkTime).toFixed(1)
: '0.0';
const rtf = duration > 0 && firstChunkTime > 0 ? (firstChunkTime / duration).toFixed(3) : '-';
const progressValue = textLength > 0 ? Math.min(100, (displayedInitialChars / textLength) * 100) : 0;
const resultItemEl = document.getElementById('supertonic-result');
if (!resultItemEl) {
console.warn('Supertonic result container not found.');
return;
}
resultItemEl.classList.remove('generating');
resultItemEl.style.setProperty('--result-progress', `${progressValue}%`);
const titleMainEl = resultItemEl.querySelector('.title-main');
if (titleMainEl) {
titleMainEl.textContent = 'Supertonic';
titleMainEl.style.color = 'var(--supertone_blue)';
}
const titleSubEl = resultItemEl.querySelector('.title-sub');
if (titleSubEl) {
titleSubEl.textContent = 'On-Device';
}
const infoContainer = resultItemEl.querySelector('.demo-result-info');
if (infoContainer) {
infoContainer.classList.remove('error');
}
const timeElInitial = document.getElementById('supertonic-time');
if (timeElInitial) {
timeElInitial.innerHTML = formatStatValueWithSuffix(processingTimeStr, 's', { firstLabel: true });
}
const cpsElInitial = document.getElementById('supertonic-cps');
if (cpsElInitial) {
cpsElInitial.textContent = charsPerSec;
}
const rtfElInitial = document.getElementById('supertonic-rtf');
if (rtfElInitial) {
rtfElInitial.innerHTML = formatStatValueWithSuffix(rtf, 'x');
}
const playerContainer = resultItemEl.querySelector('.custom-audio-player');
if (playerContainer) {
playerContainer.style.display = '';
playerContainer.innerHTML = `
<button id="play-pause-btn" class="player-btn">${PAUSE_ICON_SVG}</button>
<div class="time-display" id="current-time">0:00.00</div>
<div class="progress-container" id="progress-container">
<div class="progress-bar">
<div class="progress-fill" id="progress-fill"></div>
</div>
</div>
<div class="time-display" id="total-duration">${formatTime(duration, { trimMobile: true })}</div>
<div class="demo-result-actions" style="display: none;">
<button class="demo-download-btn" id="supertonic-download" aria-label="Download WAV" title="Download WAV">
<svg width="16" height="16" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24">
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
<polyline points="7 10 12 15 17 10"/>
<line x1="12" y1="15" x2="12" y2="3"/>
</svg>
</button>
</div>
`;
}
container.style.display = 'flex';
latestSupertonicProcessedChars = displayedInitialChars;
// Get UI elements
playPauseBtn = document.getElementById('play-pause-btn');
progressBar = document.getElementById('progress-container');
currentTimeDisplay = document.getElementById('current-time');
durationDisplay = document.getElementById('total-duration');
progressFill = document.getElementById('progress-fill');
// Initialize Web Audio API
audioContext = new (window.AudioContext || window.webkitAudioContext)();
startTime = audioContext.currentTime;
totalDuration = duration;
isPlaying = true;
isPaused = false;
// Create Supertonic player record and register it
const pausePlayback = () => {
if (!audioContext || audioContext.state === 'closed') return;
if (isPlaying) {
pauseTime = audioContext.currentTime - startTime;
scheduledSources.forEach(source => {
try {
source.stop();
} catch (e) {
// Already stopped
}
});
scheduledSources = [];
audioContext.suspend();
isPaused = true;
isPlaying = false;
if (playPauseBtn) {
playPauseBtn.innerHTML = PLAY_ICON_SVG;
}
if (animationFrameId) {
cancelAnimationFrame(animationFrameId);
}
}
};
supertonicPlayerRecord = {
audioContext: audioContext,
pausePlayback: pausePlayback
};
// Remove old Supertonic player if exists and add new one
customAudioPlayers = customAudioPlayers.filter(p => p !== supertonicPlayerRecord && p.audioContext !== audioContext);
customAudioPlayers.push(supertonicPlayerRecord);
// Pause all other players before starting Supertonic
pauseAllPlayersExcept(supertonicPlayerRecord);
// Fetch and decode first chunk
const response = await fetch(url);
const arrayBuffer = await response.arrayBuffer();
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
audioChunks.push({ buffer: audioBuffer, duration: audioBuffer.duration });
// Play first chunk immediately
const source = audioContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(audioContext.destination);
source.start(audioContext.currentTime);
scheduledSources.push(source);
// Set next scheduled time for additional chunks
nextScheduledTime = audioContext.currentTime + audioBuffer.duration + 0.3; // Add silence gap
// Setup player controls
playPauseBtn.addEventListener('click', togglePlayPause);
progressBar.addEventListener('click', (e) => {
const rect = progressBar.getBoundingClientRect();
const percentage = ((e.clientX - rect.left) / rect.width) * 100;
seekTo(percentage);
});
// Start progress animation
updateProgress();
// Clean up URL
URL.revokeObjectURL(url);
};
// Callback for each additional chunk - schedule seamlessly
const onChunkAdded = async (url, duration, chunkIndex, totalChunks, currentProcessingTime, processedChars) => {
if (!audioContext) return;
// Fetch and decode the new chunk
const response = await fetch(url);
const arrayBuffer = await response.arrayBuffer();
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
const chunkDuration = audioBuffer.duration;
audioChunks.push({ buffer: audioBuffer, duration: chunkDuration });
// Schedule the new chunk at the pre-calculated time
const source = audioContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(audioContext.destination);
source.start(nextScheduledTime);
scheduledSources.push(source);
// Update next scheduled time for the next chunk
nextScheduledTime = nextScheduledTime + audioBuffer.duration + 0.3; // Add silence gap
// Update total duration
totalDuration = duration;
// Update duration display with smooth animation
if (durationDisplay) {
durationDisplay.textContent = formatTime(duration, { trimMobile: true });
durationDisplay.style.transition = 'color 0.3s';
durationDisplay.style.color = 'var(--supertone_blue)';
setTimeout(() => {
durationDisplay.style.color = '';
}, 300);
}
// Update info display
const textLengthCandidate = currentGenerationTextLength > 0
? currentGenerationTextLength
: demoTextInput.value.trim().length;
const textLength = textLengthCandidate;
const isBatch = textLength >= MAX_CHUNK_LENGTH;
const timeEl = document.getElementById('supertonic-time');
const durationEl = document.getElementById('supertonic-duration');
const cpsEl = document.getElementById('supertonic-cps');
const rtfEl = document.getElementById('supertonic-rtf');
const effectiveProcessedChars = typeof processedChars === 'number' ? processedChars : latestSupertonicProcessedChars;
if (effectiveProcessedChars < latestSupertonicProcessedChars) {
URL.revokeObjectURL(url);
return;
}
const clampedProcessedChars = textLength > 0 ? Math.min(effectiveProcessedChars, textLength) : effectiveProcessedChars;
const progressValue = textLength > 0 ? Math.min(100, (clampedProcessedChars / textLength) * 100) : 0;
if (durationEl) {
durationEl.textContent = formatTimeDetailed(duration);
}
if (timeEl && isBatch && firstChunkGenerationTime > 0 && currentProcessingTime) {
const timeDisplay = `${formatTimeDetailed(firstChunkGenerationTime)} / ${formatTimeDetailed(currentProcessingTime)}`;
timeEl.innerHTML = formatStatValueWithSuffix(timeDisplay, 's', { firstLabel: true });
}
if (cpsEl && currentProcessingTime > 0 && clampedProcessedChars >= 0) {
const charsPerSec = (clampedProcessedChars / currentProcessingTime).toFixed(1);
cpsEl.textContent = charsPerSec;
}
if (rtfEl && duration > 0 && currentProcessingTime > 0) {
const rtf = (currentProcessingTime / duration).toFixed(3);
rtfEl.innerHTML = formatStatValueWithSuffix(rtf, 'x');
}
const resultItemEl = document.getElementById('supertonic-result');
if (resultItemEl) {
resultItemEl.style.setProperty('--result-progress', `${progressValue}%`);
}
latestSupertonicProcessedChars = clampedProcessedChars;
// Clean up URL
URL.revokeObjectURL(url);
};
// Start all syntheses simultaneously
const supertonicPromise = generateSupertonicSpeechChunked(
text,
totalStep,
durationFactor,
onFirstChunkReady,
onChunkAdded
);
const elevenlabsPromise = elevenlabsApiKey ? generateSpeechElevenLabs(text, elevenlabsApiKey) : null;
const openaiPromise = openaiApiKey ? generateSpeechOpenAI(text, openaiApiKey) : null;
const geminiPromise = geminiApiKey ? generateSpeechGemini(text, geminiApiKey) : null;
// Handle results as they arrive
supertonicPromise.then(result => {
supertonicResult = result;
if (result.success) {
const textLength = result.text ? result.text.length : 0;
const isBatch = textLength >= MAX_CHUNK_LENGTH;
const processingTimeStr = isBatch && firstChunkGenerationTime > 0
? `${formatTimeDetailed(firstChunkGenerationTime)} / ${formatTimeDetailed(result.processingTime)}`
: formatTimeDetailed(result.processingTime);
const charsPerSec = result.processingTime > 0 ? (textLength / result.processingTime).toFixed(1) : '0.0';
const progressValue = textLength > 0 ? 100 : 0;
const progressDisplay = progressValue.toFixed(1);
const timeEl = document.getElementById('supertonic-time');
const durationEl = document.getElementById('supertonic-duration');
const cpsEl = document.getElementById('supertonic-cps');
const rtfEl = document.getElementById('supertonic-rtf');
if (timeEl) timeEl.innerHTML = formatStatValueWithSuffix(processingTimeStr, 's', { firstLabel: true });
if (durationEl) durationEl.textContent = formatTimeDetailed(result.audioDuration);
latestSupertonicProcessedChars = textLength;
if (cpsEl) cpsEl.textContent = charsPerSec;
if (rtfEl) {
const rtf = result.audioDuration > 0 ? (result.processingTime / result.audioDuration).toFixed(3) : '-';
rtfEl.innerHTML = formatStatValueWithSuffix(rtf, 'x');
}
const resultItemEl = document.getElementById('supertonic-result');
if (resultItemEl) {
resultItemEl.style.setProperty('--result-progress', `${progressValue}%`);
}
latestSupertonicProcessedChars = textLength;
// Final duration update (if custom player was used)
if (audioContext && audioChunks.length > 0) {
totalDuration = result.audioDuration;
if (durationDisplay) {
durationDisplay.textContent = formatTime(result.audioDuration, { trimMobile: true });
}
}
// Always show download button
const downloadBtn = document.getElementById('supertonic-download');
if (downloadBtn) {
downloadBtn.parentElement.style.display = 'block';
downloadBtn.onclick = () => downloadDemoAudio(result.url, 'supertonic_speech.wav');
}
}
// Update comparison table immediately
if (hasComparison) {
updateComparisonRow('supertonic', result);
// Highlight winner if all are done
const allResults = [supertonicResult, elevenlabsResult, openaiResult, geminiResult].filter(r => r !== null);
const allFinished = (!elevenlabsApiKey || elevenlabsResult) && (!openaiApiKey || openaiResult) && (!geminiApiKey || geminiResult);
if (allFinished && allResults.length > 1) {
highlightWinner(allResults);
}
}
});
if (elevenlabsPromise) {
elevenlabsPromise.then(result => {
elevenlabsResult = result;
renderResult('elevenlabs', result, !firstFinished);
if (!firstFinished) firstFinished = true;
// Update comparison table immediately
updateComparisonRow('elevenlabs', result);
// Highlight winner if all are done
const allResults = [supertonicResult, elevenlabsResult, openaiResult, geminiResult].filter(r => r !== null);
const allFinished = (!elevenlabsApiKey || elevenlabsResult) && (!openaiApiKey || openaiResult) && (!geminiApiKey || geminiResult);
if (allFinished && allResults.length > 1) {
highlightWinner(allResults);
}
});
}
if (openaiPromise) {
openaiPromise.then(result => {
openaiResult = result;
renderResult('openai', result, !firstFinished);
if (!firstFinished) firstFinished = true;
// Update comparison table immediately
updateComparisonRow('openai', result);
// Highlight winner if all are done
const allResults = [supertonicResult, elevenlabsResult, openaiResult, geminiResult].filter(r => r !== null);
const allFinished = (!elevenlabsApiKey || elevenlabsResult) && (!openaiApiKey || openaiResult) && (!geminiApiKey || geminiResult);
if (allFinished && allResults.length > 1) {
highlightWinner(allResults);
}
});
}
if (geminiPromise) {
geminiPromise.then(result => {
geminiResult = result;
renderResult('gemini', result, !firstFinished);
if (!firstFinished) firstFinished = true;
// Update comparison table immediately
updateComparisonRow('gemini', result);
// Highlight winner if all are done
const allResults = [supertonicResult, elevenlabsResult, openaiResult, geminiResult].filter(r => r !== null);
const allFinished = (!elevenlabsApiKey || elevenlabsResult) && (!openaiApiKey || openaiResult) && (!geminiApiKey || geminiResult);
if (allFinished && allResults.length > 1) {
highlightWinner(allResults);
}
});
}
// Wait for all to complete
await Promise.allSettled([supertonicPromise, elevenlabsPromise, openaiPromise, geminiPromise].filter(p => p !== null));
// If no API key, mark as skipped
if (!elevenlabsApiKey && hasComparison) {
const elevenlabsStatus = document.getElementById('elevenlabsStatus');
const elevenlabsTime = document.getElementById('elevenlabsTime');
if (elevenlabsStatus) {
elevenlabsStatus.textContent = '⏭️ Skipped';
elevenlabsStatus.className = 'demo-comparison-cell';
}
if (elevenlabsTime) {
elevenlabsTime.textContent = 'No API key';
}
}
if (!openaiApiKey && hasComparison) {
const openaiStatus = document.getElementById('openaiStatus');
const openaiTime = document.getElementById('openaiTime');
if (openaiStatus) {
openaiStatus.textContent = '⏭️ Skipped';
openaiStatus.className = 'demo-comparison-cell';
}
if (openaiTime) {
openaiTime.textContent = 'No API key';
}
}
if (!geminiApiKey && hasComparison) {
const geminiStatus = document.getElementById('geminiStatus');
const geminiTime = document.getElementById('geminiTime');
if (geminiStatus) {
geminiStatus.textContent = '⏭️ Skipped';
geminiStatus.className = 'demo-comparison-cell';
}
if (geminiTime) {
geminiTime.textContent = 'No API key';
}
}
} catch (error) {
showDemoStatus(`<strong>Error:</strong> ${error.message}`, 'error');
showDemoError(`Error during synthesis: ${error.message}`);
console.error('Synthesis error:', error);
// Restore placeholder
demoResults.style.display = 'none';
demoResults.innerHTML = `
<div class="demo-placeholder">
<div class="demo-placeholder-icon">🎙️</div>
<p>Your generated speech will appear here</p>
</div>
`;
} finally {
isGenerating = false;
demoGenerateBtn.disabled = false;
// Re-enable voice toggle after generation
const voiceToggleTexts = document.querySelectorAll('.voice-toggle-text');
voiceToggleTexts.forEach(text => text.classList.remove('disabled'));
}
}
// Download handler (make it global)
window.downloadDemoAudio = function(url, filename) {
const a = document.createElement('a');
a.href = url;
a.download = filename;
a.click();
};
// Update slider value displays
function updateSliderValues() {
demoTotalStepsValue.textContent = demoTotalSteps.value;
// Remove unnecessary trailing zeros (1.00 -> 1, 0.80 -> 0.8, 0.75 -> 0.75)
demoDurationFactorValue.textContent = parseFloat(parseFloat(demoDurationFactor.value).toFixed(2));
}
// Attach slider event listeners
demoTotalSteps.addEventListener('input', updateSliderValues);
demoDurationFactor.addEventListener('input', updateSliderValues);
// Initialize slider values
updateSliderValues();
// Attach generate function to button
demoGenerateBtn.addEventListener('click', generateSpeech);
// Preset text buttons (defined before input listener to share scope)
const presetButtons = document.querySelectorAll('[data-preset]');
const freeformBtn = document.getElementById('freeformBtn');
let currentPreset = 'quote'; // Initialize with quote
let isPresetChanging = false; // Flag to track if text change is from preset button
// Helper function to update active button state
function updateActiveButton(presetType) {
// Remove active from all buttons
presetButtons.forEach(btn => btn.classList.remove('active'));
// Add active to the specified button
if (presetType) {
const targetBtn = document.querySelector(`[data-preset="${presetType}"]`);
if (targetBtn) {
targetBtn.classList.add('active');
}
}
currentPreset = presetType;
updateQuoteModeState(presetType === 'quote');
}
function updateQuoteModeState(isQuote) {
if (!demoResults) return;
demoResults.classList.toggle('quote-mode', Boolean(isQuote));
}
// Initialize with quote button active
updateActiveButton('quote');
presetButtons.forEach(btn => {
btn.addEventListener('click', () => {
const presetType = btn.getAttribute('data-preset');
if (presetType === 'freeform') {
// Freeform button: clear text
isPresetChanging = true;
demoTextInput.value = '';
updateCharCounter();
updateActiveButton('freeform');
isPresetChanging = false;
} else {
// Other preset buttons: set text
const text = presetTexts[presetType];
if (text) {
isPresetChanging = true;
demoTextInput.value = text;
updateCharCounter();
updateActiveButton(presetType);
isPresetChanging = false;
}
}
});
});
// Update character counter on input
let previousTextValue = demoTextInput.value;
demoTextInput.addEventListener('input', () => {
updateCharCounter();
// If text was modified by user (not from preset button), switch to freeform
if (!isPresetChanging && demoTextInput.value !== previousTextValue) {
updateActiveButton('freeform');
}
previousTextValue = demoTextInput.value;
});
// Update font size when window is resized (for responsive width-based font sizing)
let resizeTimeout;
window.addEventListener('resize', () => {
clearTimeout(resizeTimeout);
resizeTimeout = setTimeout(() => {
updateCharCounter();
}, 100);
});
// Initialize character counter
updateCharCounter();
// Voice toggle button handlers
const voiceToggleTexts = document.querySelectorAll('.voice-toggle-text');
// Disable voice toggle texts initially
voiceToggleTexts.forEach(text => text.classList.add('disabled'));
voiceToggleTexts.forEach(text => {
text.addEventListener('click', async () => {
if (text.classList.contains('disabled')) return;
const selectedVoice = text.getAttribute('data-voice');
// Don't reload if already selected
if (selectedVoice === currentVoice) {
return;
}
// Update UI
voiceToggleTexts.forEach(t => t.classList.remove('active'));
text.classList.add('active');
// Disable all controls while loading
const wasDisabled = demoGenerateBtn.disabled;
demoGenerateBtn.disabled = true;
voiceToggleTexts.forEach(t => t.classList.add('disabled'));
try {
await switchVoice(selectedVoice);
// Re-enable texts if models are loaded
if (models && cfgs && processors) {
demoGenerateBtn.disabled = false;
voiceToggleTexts.forEach(t => t.classList.remove('disabled'));
}
} catch (error) {
console.error('Failed to switch voice:', error);
// Revert UI on error
voiceToggleTexts.forEach(t => t.classList.remove('active'));
document.querySelector(`[data-voice="${currentVoice}"]`).classList.add('active');
// Re-enable texts
voiceToggleTexts.forEach(t => t.classList.remove('disabled'));
if (!wasDisabled) demoGenerateBtn.disabled = false;
}
});
});
// Title animation setup
const demoTitleLeft = document.querySelector('.demo-title-left');
const demoTitleRight = document.querySelector('.demo-title-right');
const demoInputSection = document.querySelector('.demo-input-section');
const demoOutputSection = document.querySelector('.demo-output-section');
// Initialize Text with letters wrapped in spans
if (demoTitleLeft) {
const text = demoTitleLeft.textContent.trim();
demoTitleLeft.innerHTML = text.split('').map(char =>
char === ' ' ? ' ' : `<span class="letter visible">${char}</span>`
).join('');
}
// Text animation on demo-input-section click
if (demoInputSection && demoTitleLeft) {
demoInputSection.addEventListener('click', () => {
const letters = demoTitleLeft.querySelectorAll('.letter');
// Reset all letters
letters.forEach(letter => {
letter.classList.remove('visible');
});
// Show letters one by one (total 0.25s = 0.125s / 2)
letters.forEach((letter, index) => {
setTimeout(() => {
letter.classList.add('visible');
}, index * 0.0625 * 1000); // 0.0625s delay between each letter
});
});
}
// Speech animation on demo-output-section click
if (demoOutputSection && demoTitleRight) {
demoOutputSection.addEventListener('click', (event) => {
if (event.target.closest('#demoGenerateBtn')) {
return;
}
demoTitleRight.classList.remove('animate-speech');
// Trigger reflow
void demoTitleRight.offsetWidth;
demoTitleRight.classList.add('animate-speech');
});
}
function getProviderSlugForAudio(providerName) {
if (!providerName) return null;
const normalized = providerName.toLowerCase();
switch (normalized) {
case 'supertone':
case 'supertonic':
return 'supertone';
case 'elevenlabs':
return 'elevenlabs';
case 'openai':
return 'openai';
case 'gemini':
return 'gemini';
case 'microsoft':
return 'microsoft';
default:
return normalized;
}
}
function updateTextHandlingPlayButtonState(cardState, isPlaying) {
if (!cardState.playButton) return;
cardState.playButton.classList.toggle('is-playing', isPlaying);
cardState.playButton.setAttribute('aria-pressed', String(isPlaying));
const action = isPlaying ? 'Pause' : 'Play';
cardState.playButton.setAttribute('aria-label', `${action} ${cardState.sampleTitle} sample`);
}
function handleTextHandlingAudioEnded(cardState, audioEl) {
if (cardState.currentAudio !== audioEl) {
return;
}
cardState.isPlaying = false;
cardState.isPaused = false;
audioEl.currentTime = 0;
updateTextHandlingPlayButtonState(cardState, false);
}
function getOrCreateTextHandlingAudio(cardState, providerSlug) {
if (!cardState.audioElements.has(providerSlug)) {
// Microsoft uses .wav files, others use .mp3
const audioExtension = providerSlug === 'microsoft' ? 'wav' : 'mp3';
const audioPath = `audio/${providerSlug}_speech-${cardState.audioNumber}.${audioExtension}`;
const audioEl = new Audio(audioPath);
audioEl.preload = 'auto';
audioEl.addEventListener('ended', () => handleTextHandlingAudioEnded(cardState, audioEl));
cardState.audioElements.set(providerSlug, audioEl);
}
return cardState.audioElements.get(providerSlug);
}
function pauseTextHandlingAudio(cardState, { reset = false } = {}) {
const audioEl = cardState.currentAudio;
if (!audioEl) {
cardState.isPlaying = false;
if (reset) {
cardState.isPaused = false;
}
return;
}
try {
audioEl.pause();
} catch (error) {
console.warn('Failed to pause audio', error);
}
if (reset) {
audioEl.currentTime = 0;
cardState.currentAudio = null;
cardState.isPaused = false;
} else {
cardState.isPaused = audioEl.currentTime > 0 && audioEl.currentTime < audioEl.duration;
}
cardState.isPlaying = false;
updateTextHandlingPlayButtonState(cardState, false);
}
function playTextHandlingAudio(cardState, { restart = false } = {}) {
const providerName = cardState.currentProvider;
const providerSlug = getProviderSlugForAudio(providerName);
if (!providerSlug) {
return;
}
const audioEl = getOrCreateTextHandlingAudio(cardState, providerSlug);
if (!audioEl) {
return;
}
if (cardState.currentAudio && cardState.currentAudio !== audioEl) {
pauseTextHandlingAudio(cardState, { reset: true });
}
cardState.currentAudio = audioEl;
if (restart || audioEl.ended) {
audioEl.currentTime = 0;
}
pauseTextHandlingPlayersExcept(cardState.playerRecord);
const playPromise = audioEl.play();
if (playPromise && typeof playPromise.catch === 'function') {
playPromise.catch(error => {
console.warn('Failed to play text-handling audio', error);
});
}
cardState.isPlaying = true;
cardState.isPaused = false;
updateTextHandlingPlayButtonState(cardState, true);
}
function handleTextHandlingProviderSelection(cardState, option) {
cardState.providerOptions.forEach(btn => {
const isActive = btn === option;
btn.classList.toggle('active', isActive);
btn.setAttribute('aria-pressed', String(isActive));
});
const providerName = option.dataset.provider || option.textContent.trim();
cardState.currentProvider = providerName;
if (cardState.textModelLabel) {
cardState.textModelLabel.textContent = getProviderLabel(providerName);
}
if (cardState.textModel) {
cardState.textModel.setAttribute('data-selected-provider', providerName);
}
cardState.card.style.setProperty('--provider-color', getProviderColor(providerName));
playTextHandlingAudio(cardState, { restart: true });
}
function handleTextHandlingPlayClick(cardState) {
const activeOption = cardState.card.querySelector('.provider-option.active');
if (!activeOption) {
const defaultOption = cardState.providerOptions[0];
if (defaultOption) {
defaultOption.click();
}
return;
}
if (cardState.isPlaying) {
pauseTextHandlingAudio(cardState);
return;
}
if (cardState.isPaused && cardState.currentAudio) {
pauseTextHandlingPlayersExcept(cardState.playerRecord);
const resumePromise = cardState.currentAudio.play();
if (resumePromise && typeof resumePromise.catch === 'function') {
resumePromise.catch(error => console.warn('Failed to resume audio', error));
}
cardState.isPlaying = true;
cardState.isPaused = false;
updateTextHandlingPlayButtonState(cardState, true);
return;
}
playTextHandlingAudio(cardState, { restart: false });
}
function initTextHandlingCards() {
const cards = document.querySelectorAll('.text-handling-card');
if (!cards.length) {
return;
}
cards.forEach((card, index) => {
const providerOptions = Array.from(card.querySelectorAll('.provider-option'));
const textModel = card.querySelector('.text-model');
const textModelLabel = card.querySelector('.text-model-label');
const playButton = card.querySelector('.text-handling-player');
const sampleTitle = card.querySelector('.text-handling-label')?.textContent?.trim() || 'sample';
const cardState = {
card,
providerOptions,
textModel,
textModelLabel,
playButton,
sampleTitle,
audioNumber: TEXT_HANDLING_CARD_AUDIO_MAP[index] || index + 1,
audioElements: new Map(),
currentProvider: null,
currentAudio: null,
isPlaying: false,
isPaused: false,
playerRecord: null
};
const playerRecord = {
pausePlayback: () => pauseTextHandlingAudio(cardState)
};
cardState.playerRecord = playerRecord;
textHandlingAudioPlayers.push(playerRecord);
providerOptions.forEach(option => {
option.addEventListener('click', () => handleTextHandlingProviderSelection(cardState, option));
});
if (playButton) {
playButton.addEventListener('click', () => handleTextHandlingPlayClick(cardState));
updateTextHandlingPlayButtonState(cardState, false);
}
card.style.setProperty('--provider-color', getProviderColor('Supertone'));
});
}
function getProviderColor(provider) {
switch (provider) {
case 'Supertone':
case 'supertone':
return getComputedStyle(document.documentElement).getPropertyValue('--supertone_blue') || '#227CFF';
case 'ElevenLabs':
return getComputedStyle(document.documentElement).getPropertyValue('--brand-elevenlabs') || '#999999';
case 'OpenAI':
return getComputedStyle(document.documentElement).getPropertyValue('--brand-openai') || '#52a584';
case 'Gemini':
return getComputedStyle(document.documentElement).getPropertyValue('--brand-gemini') || '#887eca';
case 'Microsoft':
return getComputedStyle(document.documentElement).getPropertyValue('--brand-microsoft') || '#00A4EF';
default:
return getComputedStyle(document.documentElement).getPropertyValue('--primary') || '#227CFF';
}
}
function getProviderLabel(provider) {
switch ((provider || '').toLowerCase()) {
case 'supertone':
return 'Supertonic';
case 'elevenlabs':
return 'Flash v2.5';
case 'openai':
return 'TTS-1';
case 'gemini':
return '2.5 Flash TTS';
case 'microsoft':
return 'VibeVoice Realtime 0.5B';
default:
return provider || 'Supertonic';
}
}
initTextHandlingCards();
// Initialize models
initializeModels();
})();