faster-qwen3-tts-demo / index.html
andito's picture
andito HF Staff
add demo continuation comparison
390c635
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1, user-scalable=no">
<title>Faster Qwen3-TTS</title>
<style>
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
:root {
--bg: #09090b;
--surface: #18181b;
--surface2: #27272a;
--border: #27272a;
--border2: #3f3f46;
--text: #fafafa;
--text2: #a1a1aa;
--dim: #71717a;
--dimmer: #52525b;
--accent: #8b5cf6;
--accent2: #7c3aed;
--accent-bg: rgba(139,92,246,0.1);
--accent-ring: rgba(139,92,246,0.3);
--green: #22c55e;
--amber: #eab308;
--red: #ef4444;
--radius: 10px;
--radius-sm: 6px;
--shadow: 0 1px 3px rgba(0,0,0,0.4);
}
:root[data-theme="light"] {
--bg: #fafafa;
--surface: #ffffff;
--surface2: #f4f4f5;
--border: #e4e4e7;
--border2: #d4d4d8;
--text: #09090b;
--text2: #52525b;
--dim: #71717a;
--dimmer: #a1a1aa;
--accent: #7c3aed;
--accent2: #6d28d9;
--accent-bg: rgba(124,58,237,0.07);
--accent-ring: rgba(124,58,237,0.2);
--shadow: 0 1px 3px rgba(0,0,0,0.08);
}
html, body { height: 100%; overflow: hidden; }
body {
background: var(--bg);
color: var(--text);
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
font-size: 14px;
line-height: 1.5;
}
/* ── App shell ── */
.app {
display: flex;
flex-direction: column;
justify-content: center;
min-height: 100vh;
min-height: 100dvh;
max-width: 680px;
margin: 0 auto;
padding: 10px 16px;
padding-bottom: max(10px, env(safe-area-inset-bottom));
gap: 8px;
}
/* ── Header ── */
.hdr {
display: flex;
align-items: center;
gap: 8px;
flex-shrink: 0;
}
.hdr h1 { font-size: 15px; font-weight: 600; letter-spacing: -0.3px; }
.badge {
font-size: 9px; font-weight: 700; letter-spacing: 0.8px;
background: var(--accent-bg); color: var(--accent);
border: 1px solid var(--accent-ring);
padding: 1px 6px; border-radius: 100px;
}
.spacer { flex: 1; }
.mpill {
display: flex; align-items: center; gap: 5px;
font-size: 11px; color: var(--dim);
padding: 4px 10px;
background: var(--surface); border: 1px solid var(--border);
border-radius: 20px; cursor: pointer;
transition: border-color 0.15s;
}
.mpill:hover { border-color: var(--border2); }
.mdot {
width: 6px; height: 6px; border-radius: 50%;
background: var(--dimmer); flex-shrink: 0;
transition: background 0.3s;
}
.mdot.loaded { background: var(--green); }
.mdot.loading { background: var(--amber); animation: blink 0.7s infinite; }
.mdot.error { background: var(--red); }
.ibtn {
width: 30px; height: 30px;
display: flex; align-items: center; justify-content: center;
background: transparent;
border: 1px solid var(--border); border-radius: 8px;
color: var(--dim); cursor: pointer;
transition: all 0.15s;
}
.ibtn:hover { border-color: var(--border2); color: var(--text); }
.ibtn svg { width: 15px; height: 15px; }
/* ── Main area ── */
.main {
display: flex; flex-direction: column; gap: 6px;
}
.text-wrap textarea {
width: 100%; height: 120px;
background: var(--surface); border: 1px solid var(--border);
border-radius: var(--radius);
color: var(--text); font-size: 15px; font-family: inherit;
padding: 10px 12px; resize: none; outline: none;
transition: border-color 0.15s;
}
.text-wrap textarea:focus { border-color: var(--accent-ring); }
.text-wrap textarea::placeholder { color: var(--dimmer); }
/* ── Mode rows (arcade selector) ── */
.mode-row {
flex-shrink: 0;
padding: 7px 9px; border-radius: var(--radius-sm);
transition: box-shadow 0.2s, opacity 0.2s;
}
.mode-row .label {
font-size: 11px; color: var(--dim);
margin-bottom: 4px;
display: flex; align-items: center; gap: 8px;
transition: color 0.2s;
}
.mode-row .label::after {
content: ''; flex: 1; height: 1px;
background: var(--border);
transition: background 0.2s;
}
.mode-row .row { display: flex; align-items: flex-start; gap: 6px; }
/* Active (loaded) row β€” neon arcade glow */
.mode-row.compatible {
box-shadow: 0 0 0 1px var(--accent),
0 0 12px rgba(139,92,246,0.45),
0 0 24px rgba(139,92,246,0.15);
}
.mode-row.compatible .label {
color: var(--accent); font-weight: 600; letter-spacing: 0.03em;
}
.mode-row.compatible .label::before {
content: 'β–Ά'; color: var(--accent);
font-size: 9px;
animation: blink-cursor 1s step-end infinite;
}
.mode-row.compatible .label::after { background: var(--accent-ring); }
@keyframes blink-cursor { 0%, 100% { opacity: 1; } 50% { opacity: 0; } }
/* Loading row β€” pulsing glow while model is fetched */
.mode-row.loading {
animation: row-pulse 0.7s ease-in-out infinite alternate;
}
.mode-row.loading .label {
color: var(--accent); font-weight: 600; letter-spacing: 0.03em;
}
.mode-row.loading .label::before {
content: 'β—Œ'; color: var(--accent); font-size: 11px;
animation: spin-ring 1s linear infinite;
}
.mode-row.loading .label::after { background: var(--accent-ring); }
@keyframes row-pulse {
from { box-shadow: 0 0 0 1px rgba(139,92,246,0.4), 0 0 6px rgba(139,92,246,0.2); }
to { box-shadow: 0 0 0 1px var(--accent), 0 0 20px rgba(139,92,246,0.55), 0 0 36px rgba(139,92,246,0.2); }
}
@keyframes spin-ring { to { transform: rotate(360deg); } }
/* Inactive rows β€” dim + pointer; hover lifts the veil */
.mode-row.incompatible { opacity: 0.42; cursor: pointer; }
/* Block inner elements from capturing clicks/focus so the row onclick fires cleanly */
.mode-row.incompatible > * { pointer-events: none; }
.mode-row.incompatible:hover {
opacity: 0.9;
box-shadow: 0 0 0 1px var(--border2), 0 0 10px rgba(139,92,246,0.2);
}
.mode-row.incompatible:hover .label { color: var(--text2); }
.mode-row.incompatible:hover .switch-tag { display: inline-flex; }
/* "LOAD β–Ά" chip shown on hover of incompatible rows */
.switch-tag {
display: none; align-items: center;
font-size: 9px; font-family: monospace; font-weight: 700;
letter-spacing: 0.12em; text-transform: uppercase;
color: var(--accent); padding: 1px 5px;
border: 1px solid var(--accent-ring); border-radius: 3px;
background: var(--accent-bg);
animation: flicker-in 0.35s step-end;
}
@keyframes flicker-in {
0% { opacity: 0; } 20% { opacity: 1; } 35% { opacity: 0.2; }
55% { opacity: 1; } 70% { opacity: 0.5; } 100% { opacity: 1; }
}
/* ── Loader overlay (centered modal) ── */
.lov {
position: fixed; inset: 0;
background: rgba(0,0,0,0.75);
backdrop-filter: blur(2px);
z-index: 110;
display: flex; align-items: center; justify-content: center;
padding: 20px;
opacity: 0; pointer-events: none;
transition: opacity 0.2s;
}
.lov.open { opacity: 1; pointer-events: auto; }
.lpanel {
background: var(--bg);
border: 1px solid var(--border);
border-top: 2px solid var(--accent);
border-radius: 16px;
width: 100%; max-width: 460px;
max-height: 85dvh; overflow-y: auto;
padding: 20px 20px 24px;
transform: scale(0.93) translateY(12px);
transition: transform 0.25s cubic-bezier(0.16, 1, 0.3, 1);
box-shadow: 0 0 40px rgba(139,92,246,0.2), 0 20px 60px rgba(0,0,0,0.5);
}
.lov.open .lpanel { transform: scale(1) translateY(0); }
.l-header {
display: flex; align-items: center; margin-bottom: 14px;
}
.l-back {
display: flex; align-items: center; gap: 4px;
font-size: 11px; font-family: monospace; color: var(--dim);
cursor: pointer; border: none; background: none;
padding: 0; margin-right: auto;
transition: color 0.15s;
}
.l-back:hover { color: var(--text2); }
.l-title {
font-family: monospace; font-size: 12px; font-weight: 700;
letter-spacing: 0.22em; color: var(--accent);
text-align: center; flex: 1;
text-shadow: 0 0 8px rgba(139,92,246,0.6);
}
.lcard-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(150px, 1fr));
gap: 8px; margin-bottom: 14px;
}
.lcard {
border: 1px solid var(--border2); border-radius: var(--radius-sm);
padding: 10px 12px; cursor: pointer; transition: all 0.15s;
background: var(--surface);
}
.lcard:hover { border-color: var(--accent-ring); background: var(--accent-bg); }
.lcard.sel {
border-color: var(--accent); background: var(--accent-bg);
box-shadow: 0 0 8px rgba(139,92,246,0.35);
}
.lcard .lc-name { font-size: 13px; font-weight: 600; color: var(--text); }
.lcard .lc-sub { font-size: 11px; color: var(--dim); margin-top: 2px; }
.lcard.sel .lc-name { color: var(--accent); }
/* Ref audio chip */
.ref-chip {
display: inline-flex; align-items: center; gap: 5px;
padding: 6px 10px;
background: var(--surface); border: 1px dashed var(--border2);
border-radius: var(--radius-sm);
font-size: 12px; color: var(--dim);
cursor: pointer; transition: all 0.15s;
position: relative; overflow: hidden;
max-width: 220px;
}
.ref-chip:hover { border-color: var(--accent); color: var(--accent); }
.ref-chip.has { border-style: solid; color: var(--accent); border-color: var(--accent-ring); }
.ref-chip input[type="file"] {
position: absolute; inset: 0; opacity: 0; cursor: pointer;
}
.ref-chip svg { width: 14px; height: 14px; flex-shrink: 0; }
.ref-chip span { overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
/* Mic button */
.mic-btn {
width: 32px; height: 32px;
display: flex; align-items: center; justify-content: center;
background: var(--surface); border: 1px solid var(--border);
border-radius: var(--radius-sm);
color: var(--dim); cursor: pointer; transition: all 0.15s;
flex-shrink: 0;
}
.mic-btn:hover { border-color: var(--accent); color: var(--accent); }
.mic-btn.rec { border-color: var(--red); color: var(--red); animation: blink 1s infinite; }
.mic-btn svg { width: 14px; height: 14px; }
.preset-row {
display: contents;
}
.preset-row:empty { display: none; }
.preset-btn {
background: var(--surface2);
border: 1px solid var(--border2);
color: var(--text2);
padding: 6px 10px;
border-radius: var(--radius-sm);
font-size: 12px;
cursor: pointer; transition: all 0.15s;
flex-shrink: 0;
}
.preset-btn:hover { border-color: var(--accent); color: var(--accent); background: var(--accent-bg); }
.preset-btn.active { border-color: var(--accent); color: var(--accent); background: var(--accent-bg); }
.rec-time {
font-size: 11px; color: var(--red);
font-family: 'SF Mono', 'Fira Code', monospace;
min-width: 28px;
}
.rec-status { font-size: 11px; color: var(--dim); }
/* Recording level meter */
.rec-bar-wrap {
height: 3px; background: var(--border); border-radius: 2px;
margin-top: 4px; display: none;
}
.rec-bar {
height: 100%; width: 0%;
background: linear-gradient(90deg, var(--green), var(--amber), var(--red));
border-radius: 2px; transition: width 0.05s;
}
/* Recording preview */
.rec-preview {
width: 100%; height: 28px; border-radius: 4px;
margin-top: 4px; display: none;
}
:root:not([data-theme="light"]) .rec-preview {
filter: invert(0.88) hue-rotate(180deg);
}
/* Small select */
.sm-sel {
padding: 6px 28px 6px 10px;
background: var(--surface); border: 1px solid var(--border);
border-radius: var(--radius-sm);
color: var(--text); font-size: 12px; font-family: inherit;
outline: none; cursor: pointer; appearance: none;
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='8' height='5'%3E%3Cpath fill='%2371717a' d='M4 5 0 0h8z'/%3E%3C/svg%3E");
background-repeat: no-repeat; background-position: right 8px center;
}
.sm-sel option { background: var(--surface); }
/* Voice instruction textarea (auto-grow) */
.voice-in {
flex: 1; padding: 6px 10px;
background: var(--surface); border: 1px solid var(--border);
border-radius: var(--radius-sm);
color: var(--text); font-size: 12px; font-family: inherit;
outline: none; transition: border-color 0.15s;
resize: none; overflow: hidden;
min-height: 32px; line-height: 1.45;
}
.voice-in:focus { border-color: var(--accent-ring); }
.voice-in::placeholder { color: var(--dimmer); }
/* Full-width instruction field below a row (Custom mode) */
.instr-wrap { margin-top: 6px; display: flex; }
.instr-wrap .voice-in { flex: 1; }
/* ── Clone mode controls ── */
.clone-toggle-row { margin-bottom: 6px; }
.clone-hint {
font-size: 11px; color: var(--dim);
margin-bottom: 5px; line-height: 1.4;
}
/* ── Play button ── */
.play-btn {
width: 32px; height: 32px;
display: flex; align-items: center; justify-content: center;
background: var(--accent); color: #fff; border: none;
border-radius: var(--radius-sm); cursor: pointer; flex-shrink: 0;
margin-left: auto;
transition: all 0.15s;
}
.play-btn:hover:not(:disabled) { background: var(--accent2); }
.play-btn:disabled { opacity: 0.35; cursor: not-allowed; }
.play-btn svg { width: 14px; height: 14px; margin-left: 1px; }
/* ── Error / Warning ── */
.msg-bar {
display: none; padding: 8px 12px;
border-radius: var(--radius-sm);
font-size: 12px; flex-shrink: 0;
align-items: center; gap: 8px;
}
.msg-bar.show { display: flex; }
.msg-bar.err {
background: rgba(239,68,68,0.08); border: 1px solid rgba(239,68,68,0.2);
color: #fca5a5;
}
:root[data-theme="light"] .msg-bar.err { color: #dc2626; }
.msg-bar.warn {
background: rgba(234,179,8,0.08); border: 1px solid rgba(234,179,8,0.2);
color: #fcd34d;
}
:root[data-theme="light"] .msg-bar.warn { color: #a16207; }
.msg-bar .sw-btn {
margin-left: auto; padding: 3px 10px;
background: rgba(234,179,8,0.15); border: none; border-radius: 4px;
color: inherit; font-size: 11px; font-weight: 600;
cursor: pointer; white-space: nowrap;
}
.msg-bar .sw-btn:hover { background: rgba(234,179,8,0.3); }
/* ── Queue indicator ── */
.queue-bar {
display: none; padding: 6px 12px;
border-radius: var(--radius-sm); font-size: 13px; color: var(--text2);
background: rgba(99,102,241,0.08); border: 1px solid rgba(99,102,241,0.2);
}
.queue-bar.show { display: flex; align-items: center; gap: 8px; }
/* ── Progress ── */
.pbar-wrap {
height: 2px; background: var(--border);
border-radius: 1px; overflow: hidden;
flex-shrink: 0; display: none;
}
.pbar-wrap.show { display: block; }
.pbar {
height: 100%; background: var(--accent);
border-radius: 1px; transition: width 0.4s; width: 0%;
}
.pbar.spin {
width: 25% !important;
animation: sweep 1.4s ease-in-out infinite;
}
/* ── Results ── */
.results {
flex-shrink: 0; display: none;
flex-direction: column; gap: 6px;
}
.results.show { display: flex; }
.player-row { display: flex; align-items: center; gap: 6px; }
.player-row audio { flex: 1; height: 32px; border-radius: 4px; }
:root:not([data-theme="light"]) .player-row audio { filter: invert(0.88) hue-rotate(180deg); }
.dl-btn {
width: 28px; height: 28px;
display: flex; align-items: center; justify-content: center;
background: transparent; border: 1px solid var(--border);
border-radius: var(--radius-sm);
color: var(--dim); cursor: pointer; font-size: 13px;
transition: all 0.15s; flex-shrink: 0;
}
.dl-btn:hover { border-color: var(--accent); color: var(--accent); }
.metrics {
display: flex; align-items: center; gap: 10px; flex-wrap: wrap;
padding: 5px 10px;
background: var(--surface); border: 1px solid var(--border);
border-radius: var(--radius-sm);
font-size: 11px;
}
.met { display: flex; align-items: baseline; gap: 3px; }
.met-k {
color: var(--dim); text-transform: uppercase;
letter-spacing: 0.5px; font-size: 10px;
}
.met-v {
font-family: 'SF Mono', 'Fira Code', monospace;
font-weight: 600; font-size: 12px;
}
.wave {
display: flex; align-items: center; gap: 2px; height: 12px;
margin-left: auto;
}
.wave.off { display: none; }
.wave i {
width: 2px; border-radius: 1px; background: var(--accent);
animation: wavebar 0.7s ease-in-out infinite;
display: block;
}
.wave i:nth-child(1) { height: 3px; animation-delay: 0s; }
.wave i:nth-child(2) { height: 7px; animation-delay: 0.1s; }
.wave i:nth-child(3) { height: 11px; animation-delay: 0.2s; }
.wave i:nth-child(4) { height: 7px; animation-delay: 0.3s; }
.wave i:nth-child(5) { height: 3px; animation-delay: 0.4s; }
/* ── Continuation compare ── */
.continuation {
display: flex; flex-direction: column; gap: 8px;
padding: 10px 12px;
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
}
.continuation-head {
display: flex; align-items: baseline; justify-content: space-between; gap: 10px;
flex-wrap: wrap;
}
.continuation-head h2 {
font-size: 13px; font-weight: 600; letter-spacing: -0.2px;
}
.continuation-sub {
font-size: 11px; color: var(--dim);
}
.continuation textarea {
width: 100%; min-height: 72px;
background: var(--surface2);
border: 1px solid var(--border);
border-radius: var(--radius-sm);
color: var(--text);
font-size: 13px;
font-family: inherit;
padding: 9px 10px;
resize: vertical;
outline: none;
}
.continuation textarea:focus { border-color: var(--accent-ring); }
.continuation textarea::placeholder { color: var(--dimmer); }
.continuation textarea:disabled {
opacity: 0.7;
cursor: not-allowed;
}
.continuation-actions {
display: flex; align-items: center; justify-content: space-between; gap: 8px;
flex-wrap: wrap;
}
.continuation-status {
font-size: 11px; color: var(--dim);
}
.compare-btn {
padding: 7px 14px;
background: var(--accent);
color: #fff;
border: none;
border-radius: var(--radius-sm);
font-size: 12px;
font-weight: 600;
cursor: pointer;
transition: background 0.15s, opacity 0.15s;
}
.compare-btn:hover:not(:disabled) { background: var(--accent2); }
.compare-btn:disabled { opacity: 0.4; cursor: not-allowed; }
.compare-grid {
display: none;
grid-template-columns: 1fr;
gap: 8px;
}
.compare-grid.show { display: grid; }
.compare-card {
display: flex; flex-direction: column; gap: 6px;
padding: 9px 10px;
background: var(--surface2);
border: 1px solid var(--border);
border-radius: var(--radius-sm);
}
.compare-title {
display: flex; align-items: center; justify-content: space-between; gap: 8px;
font-size: 12px; font-weight: 600;
}
.compare-badge {
font-size: 10px; font-weight: 700; letter-spacing: 0.5px;
text-transform: uppercase;
padding: 2px 6px;
border-radius: 999px;
}
.compare-badge.fresh {
color: var(--amber);
background: rgba(234,179,8,0.12);
border: 1px solid rgba(234,179,8,0.25);
}
.compare-badge.cont {
color: var(--green);
background: rgba(34,197,94,0.12);
border: 1px solid rgba(34,197,94,0.25);
}
.compare-card audio {
width: 100%;
height: 32px;
border-radius: 4px;
}
:root:not([data-theme="light"]) .compare-card audio { filter: invert(0.88) hue-rotate(180deg); }
/* ── Settings overlay ── */
.sov {
position: fixed; inset: 0;
background: rgba(0,0,0,0.5);
z-index: 100;
display: flex; align-items: flex-end; justify-content: center;
opacity: 0; pointer-events: none;
transition: opacity 0.2s;
}
.sov.open { opacity: 1; pointer-events: auto; }
.spanel {
background: var(--bg);
border: 1px solid var(--border);
border-radius: 16px 16px 0 0;
width: 100%; max-width: 680px;
max-height: 75dvh;
overflow-y: auto;
padding: 16px 20px 24px;
transform: translateY(100%);
transition: transform 0.3s cubic-bezier(0.16, 1, 0.3, 1);
}
.sov.open .spanel { transform: translateY(0); }
.s-handle {
width: 32px; height: 4px;
background: var(--border2); border-radius: 2px;
margin: 0 auto 14px;
}
.s-head {
display: flex; align-items: center;
margin-bottom: 16px;
}
.s-head span { font-size: 14px; font-weight: 600; }
.s-head .spacer { flex: 1; }
.s-section {
font-size: 10px; font-weight: 600; letter-spacing: 0.8px;
text-transform: uppercase; color: var(--dim);
margin: 16px 0 8px; padding-top: 12px;
border-top: 1px solid var(--border);
}
.s-section:first-of-type { margin-top: 0; padding-top: 0; border-top: none; }
.s-row {
display: flex; align-items: center; gap: 8px;
margin-bottom: 10px;
}
.s-row label {
font-size: 12px; color: var(--text2);
min-width: 90px; flex-shrink: 0;
}
.s-row select, .s-row input[type="text"], .s-row textarea {
flex: 1; padding: 7px 10px;
background: var(--surface); border: 1px solid var(--border);
border-radius: var(--radius-sm);
color: var(--text); font-size: 13px; font-family: inherit;
outline: none;
}
.s-row select {
appearance: none; padding-right: 28px;
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='8' height='5'%3E%3Cpath fill='%2371717a' d='M4 5 0 0h8z'/%3E%3C/svg%3E");
background-repeat: no-repeat; background-position: right 10px center;
cursor: pointer;
}
.s-row select option { background: var(--surface); }
.s-row textarea { resize: vertical; min-height: 40px; max-height: 80px; }
/* Settings hint text */
.s-hint {
font-size: 11px; color: var(--dimmer);
margin: -6px 0 8px 98px;
}
/* Toggle */
.toggle {
display: flex; background: var(--surface);
border: 1px solid var(--border); border-radius: var(--radius-sm);
padding: 2px; flex: 1;
}
.tog {
flex: 1; padding: 4px 8px; font-size: 11px; font-weight: 500;
text-align: center; cursor: pointer; border-radius: 4px;
color: var(--dim); transition: all 0.15s; user-select: none;
}
.tog.on {
background: var(--surface2); color: var(--text);
box-shadow: var(--shadow);
}
/* Slider */
.s-slider {
flex: 1; display: flex; align-items: center; gap: 8px;
}
.s-slider input[type="range"] {
-webkit-appearance: none; flex: 1;
height: 3px; background: var(--border2); border-radius: 2px;
outline: none; cursor: pointer;
}
.s-slider input[type="range"]::-webkit-slider-thumb {
-webkit-appearance: none; width: 13px; height: 13px;
border-radius: 50%; background: var(--accent); cursor: pointer;
}
.s-slider .sv {
font-size: 11px; color: var(--dim);
font-family: 'SF Mono', 'Fira Code', monospace;
min-width: 30px; text-align: right;
}
.s-btn {
padding: 6px 14px; font-size: 12px; font-weight: 600;
background: var(--accent); color: #fff; border: none;
border-radius: var(--radius-sm); cursor: pointer;
font-family: inherit; transition: background 0.15s;
}
.s-btn:hover { background: var(--accent2); }
.s-btn:disabled { opacity: 0.4; cursor: not-allowed; }
/* ── Responsive ── */
@media (min-width: 640px) {
.sov { align-items: center; }
.spanel { border-radius: 16px; max-height: 70vh; }
.s-grid3 {
display: grid; grid-template-columns: repeat(3, 1fr); gap: 8px;
}
.s-grid3 .s-row { flex-direction: column; align-items: stretch; margin-bottom: 0; }
.s-grid3 .s-row label { min-width: unset; margin-bottom: 4px; }
.compare-grid { grid-template-columns: repeat(2, 1fr); }
}
@media (max-height: 520px) {
.app { padding: 6px 12px; gap: 6px; }
.hdr h1 { font-size: 13px; }
.mode-row .label { font-size: 10px; margin-bottom: 2px; }
.play-btn { width: 28px; height: 28px; }
.play-btn svg { width: 12px; height: 12px; }
.player-row audio { height: 28px; }
}
/* ── Animations ── */
@keyframes blink { 0%,100%{opacity:1} 50%{opacity:0.2} }
@keyframes sweep { 0%{transform:translateX(-200%)} 100%{transform:translateX(500%)} }
@keyframes wavebar { 0%,100%{transform:scaleY(0.3)} 50%{transform:scaleY(1)} }
</style>
</head>
<body>
<div class="app">
<!-- Header -->
<header class="hdr">
<h1>faster-qwen3-tts</h1>
<span class="badge">CUDA GRAPHS</span>
<span class="spacer"></span>
<button class="mpill" onclick="openSettings()" title="Settings">
<span class="mdot" id="mdot"></span>
<span id="mtext">not loaded</span>
</button>
<button class="ibtn" onclick="openSettings()" title="Settings">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round"><circle cx="12" cy="12" r="3"/><path d="M19.4 15a1.65 1.65 0 00.33 1.82l.06.06a2 2 0 01-2.83 2.83l-.06-.06a1.65 1.65 0 00-1.82-.33 1.65 1.65 0 00-1 1.51V21a2 2 0 01-4 0v-.09A1.65 1.65 0 009 19.4a1.65 1.65 0 00-1.82.33l-.06.06a2 2 0 01-2.83-2.83l.06-.06A1.65 1.65 0 004.68 15a1.65 1.65 0 00-1.51-1H3a2 2 0 010-4h.09A1.65 1.65 0 004.6 9a1.65 1.65 0 00-.33-1.82l-.06-.06a2 2 0 012.83-2.83l.06.06A1.65 1.65 0 009 4.68a1.65 1.65 0 001-1.51V3a2 2 0 014 0v.09a1.65 1.65 0 001 1.51 1.65 1.65 0 001.82-.33l.06-.06a2 2 0 012.83 2.83l-.06.06A1.65 1.65 0 0019.4 9a1.65 1.65 0 001.51 1H21a2 2 0 010 4h-.09a1.65 1.65 0 00-1.51 1z"/></svg>
</button>
<button class="ibtn" id="themeBtn" onclick="toggleTheme()" title="Toggle theme">
<svg id="themeIcon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round"><circle cx="12" cy="12" r="5"/><line x1="12" y1="1" x2="12" y2="3"/><line x1="12" y1="21" x2="12" y2="23"/><line x1="4.22" y1="4.22" x2="5.64" y2="5.64"/><line x1="18.36" y1="18.36" x2="19.78" y2="19.78"/><line x1="1" y1="12" x2="3" y2="12"/><line x1="21" y1="12" x2="23" y2="12"/><line x1="4.22" y1="19.78" x2="5.64" y2="18.36"/><line x1="18.36" y1="5.64" x2="19.78" y2="4.22"/></svg>
</button>
</header>
<!-- Main content -->
<div class="main">
<div class="text-wrap">
<textarea id="textIn" placeholder="Enter text to synthesize...">Against the odds, the wild lobster has found a new vessel for its voice. And with it, the possibility to realise its full potential.</textarea>
</div>
<!-- Clone -->
<div class="mode-row" id="rowClone" onclick="onModeRowClick('voice_clone')">
<div class="label">Clone β€” match a voice from a reference clip<span class="switch-tag">LOAD β–Ά</span></div>
<!-- Simple / Advanced toggle -->
<div class="clone-toggle-row">
<div class="toggle">
<div class="tog" data-x="1" onclick="setXvec(true)">Simple</div>
<div class="tog on" data-x="0" onclick="setXvec(false)">Advanced</div>
</div>
</div>
<!-- Reference audio + action buttons -->
<div class="row">
<label class="ref-chip" id="refChip">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round"><path d="M21.44 11.05l-9.19 9.19a6 6 0 01-8.49-8.49l9.19-9.19a4 4 0 015.66 5.66l-9.2 9.19a2 2 0 01-2.83-2.83l8.49-8.48"/></svg>
<span id="refLabel">Reference audio</span>
<input type="file" id="refInput" accept=".wav,.mp3,.flac,.ogg" onchange="pickRef(this)">
</label>
<button class="mic-btn" id="micBtn" onclick="toggleRec()" title="Record from microphone">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round"><path d="M12 1a3 3 0 00-3 3v8a3 3 0 006 0V4a3 3 0 00-3-3z"/><path d="M19 10v2a7 7 0 01-14 0v-2"/><line x1="12" y1="19" x2="12" y2="23"/><line x1="8" y1="23" x2="16" y2="23"/></svg>
</button>
<span class="rec-time" id="recTime"></span>
<span class="rec-status" id="recStatus"></span>
<div class="preset-row" id="presetRow"></div>
<button class="play-btn" onclick="generate('voice_clone')" title="Generate">
<svg viewBox="0 0 24 24" fill="currentColor"><polygon points="6,3 20,12 6,21"/></svg>
</button>
</div>
<!-- Advanced: transcript -->
<div id="advTranscript">
<div class="clone-hint">Advanced cloning uses the transcript of the reference clip for higher accuracy. Upload or record to auto-transcribe.</div>
<div class="instr-wrap">
<textarea id="refTextIn" class="voice-in" rows="2"
placeholder="Upload or record audio to auto-transcribe…"
oninput="autoGrow(this)"></textarea>
</div>
</div>
<!-- Level meter (shown during recording) -->
<div class="rec-bar-wrap" id="recBarWrap"><div class="rec-bar" id="recBar"></div></div>
<!-- Preview of last recording -->
<audio id="recPreview" class="rec-preview" controls></audio>
</div>
<!-- Custom (built-in speakers) -->
<div class="mode-row" id="rowChar" onclick="onModeRowClick('custom')">
<div class="label">Custom β€” use a built-in speaker<span class="switch-tag">LOAD β–Ά</span></div>
<div class="row">
<select class="sm-sel" id="speakerId" style="flex:1">
<option value="">Select speaker...</option>
</select>
<button class="play-btn" onclick="generate('custom')" title="Generate">
<svg viewBox="0 0 24 24" fill="currentColor"><polygon points="6,3 20,12 6,21"/></svg>
</button>
</div>
<div class="instr-wrap">
<textarea id="customInstr" class="voice-in" rows="1"
placeholder="Voice style instructions (optional)…"
oninput="autoGrow(this)"></textarea>
</div>
</div>
<!-- Voice Design -->
<div class="mode-row" id="rowCustom" onclick="onModeRowClick('voice_design')">
<div class="label">Voice Design β€” describe any voice<span class="switch-tag">LOAD β–Ά</span></div>
<div class="row">
<textarea class="voice-in" id="voiceInstr" rows="1"
placeholder="e.g. warm, calm female narrator…"
oninput="autoGrow(this)">A warm, calm narrator with a clear and engaging delivery.</textarea>
<button class="play-btn" onclick="generate('voice_design')" title="Generate">
<svg viewBox="0 0 24 24" fill="currentColor"><polygon points="6,3 20,12 6,21"/></svg>
</button>
</div>
</div>
<!-- Messages -->
<div class="msg-bar" id="msgBar"></div>
<div class="queue-bar" id="queueBar"></div>
<!-- Progress -->
<div class="pbar-wrap" id="pbarWrap"><div class="pbar" id="pbar"></div></div>
<!-- Results -->
<div class="results" id="results">
<div class="player-row">
<audio id="player" controls></audio>
<button class="dl-btn" onclick="dlAudio()" title="Download WAV">&darr;</button>
</div>
<div class="metrics">
<div class="met"><span class="met-k">TTFA</span><span class="met-v" id="mTTFA">&mdash;</span></div>
<div class="met"><span class="met-k">Client</span><span class="met-v" id="mClient">&mdash;</span></div>
<div class="met" id="mCloneWrap" style="display:none"><span class="met-k">Clone</span><span class="met-v" id="mClone">&mdash;</span></div>
<div class="met"><span class="met-k">RTF</span><span class="met-v" id="mRTF">&mdash;</span></div>
<div class="met"><span class="met-k">Dur</span><span class="met-v" id="mDur">&mdash;</span></div>
<div class="met"><span class="met-k">Buf</span><span class="met-v" id="mBuf">&mdash;</span></div>
<div class="wave" id="waveInd"><i></i><i></i><i></i><i></i><i></i></div>
</div>
</div>
<div class="continuation" id="continuationWrap">
<div class="continuation-head">
<h2>Continue Previous Sentence</h2>
<span class="continuation-sub">Render the next sentence twice: fresh and with continuation state.</span>
</div>
<textarea id="continuationText" placeholder="Enter the follow-up sentence you want to compare..." oninput="updateContinuationControls()"></textarea>
<div class="continuation-actions">
<span class="continuation-status" id="continuationStatus">Generate a first sentence to unlock this comparison.</span>
<button class="compare-btn" id="continuationBtn" onclick="compareContinuation()" disabled>Compare fresh vs continued</button>
</div>
<div class="compare-grid" id="continuationResults">
<div class="compare-card">
<div class="compare-title">
<span>Fresh second sentence</span>
<span class="compare-badge fresh">No continuation</span>
</div>
<audio id="cmpFreshPlayer" controls></audio>
<div class="metrics">
<div class="met"><span class="met-k">Total</span><span class="met-v" id="cmpFreshMs">&mdash;</span></div>
<div class="met"><span class="met-k">RTF</span><span class="met-v" id="cmpFreshRTF">&mdash;</span></div>
<div class="met"><span class="met-k">Dur</span><span class="met-v" id="cmpFreshDur">&mdash;</span></div>
</div>
</div>
<div class="compare-card">
<div class="compare-title">
<span>Continued second sentence</span>
<span class="compare-badge cont">Continuation</span>
</div>
<audio id="cmpContPlayer" controls></audio>
<div class="metrics">
<div class="met"><span class="met-k">Total</span><span class="met-v" id="cmpContMs">&mdash;</span></div>
<div class="met"><span class="met-k">RTF</span><span class="met-v" id="cmpContRTF">&mdash;</span></div>
<div class="met"><span class="met-k">Dur</span><span class="met-v" id="cmpContDur">&mdash;</span></div>
</div>
</div>
</div>
</div>
</div>
</div><!-- /.app -->
<!-- Settings bottom sheet -->
<div class="sov" id="settingsOv" onclick="closeSettings(event)">
<div class="spanel" onclick="event.stopPropagation()">
<div class="s-handle"></div>
<div class="s-head">
<span>Settings</span>
<span class="spacer"></span>
<button class="ibtn" onclick="closeSettings()" title="Close">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg>
</button>
</div>
<div class="s-section">Model</div>
<div class="s-row">
<label>Model</label>
<select id="modelSel"></select>
<button class="s-btn" id="loadBtn" onclick="loadModel()">Load</button>
</div>
<div class="s-section">Generation</div>
<div class="s-row">
<label>Language</label>
<select id="langSel">
<option>English</option><option>Chinese</option><option>French</option>
<option>German</option><option>Spanish</option><option>Auto</option>
</select>
</div>
<div class="s-row">
<label>Mode</label>
<div class="toggle">
<div class="tog on" data-m="stream" onclick="setMode('stream')">Streaming</div>
<div class="tog" data-m="non" onclick="setMode('non')">Non-streaming</div>
</div>
</div>
<div class="s-row" id="chunkRow">
<label>Chunk size</label>
<div class="s-slider">
<input type="range" id="chunkSl" min="1" max="24" value="8"
oninput="$('chunkV').textContent=this.value">
<span class="sv" id="chunkV">8</span>
</div>
</div>
<div class="s-row">
<label>Text feed</label>
<div class="toggle">
<div class="tog on" data-nsm="0" onclick="setNonStreamingMode(false)">Step-by-step</div>
<div class="tog" data-nsm="1" onclick="setNonStreamingMode(true)">Prefill text</div>
</div>
</div>
<div class="s-section">Sampling</div>
<div class="s-grid3">
<div class="s-row">
<label>Temperature</label>
<div class="s-slider">
<input type="range" id="tempSl" min="0.1" max="2.0" step="0.05" value="0.9"
oninput="$('tempV').textContent=this.value">
<span class="sv" id="tempV">0.9</span>
</div>
</div>
<div class="s-row">
<label>Top-K</label>
<div class="s-slider">
<input type="range" id="topkSl" min="1" max="100" value="50"
oninput="$('topkV').textContent=this.value">
<span class="sv" id="topkV">50</span>
</div>
</div>
<div class="s-row">
<label>Rep. penalty</label>
<div class="s-slider">
<input type="range" id="repSl" min="1.0" max="1.5" step="0.01" value="1.05"
oninput="$('repV').textContent=this.value">
<span class="sv" id="repV">1.05</span>
</div>
</div>
</div>
<div class="s-section">Recording</div>
<div class="s-row">
<label>Microphone</label>
<select id="micSel"></select>
</div>
</div>
</div>
<!-- ── Model Loader Overlay ── -->
<div class="lov" id="loaderOv" onclick="closeLoader(event)">
<div class="lpanel" onclick="event.stopPropagation()">
<div class="l-header">
<button class="l-back" onclick="$('loaderOv').classList.remove('open')">β—€ BACK</button>
<div class="l-title">⬑ <span id="loaderModeLabel"></span> ⬑</div>
</div>
<div class="lcard-grid" id="loaderGrid"></div>
<button class="btn-primary" style="width:100%" onclick="loadFromSheet()">LOAD β–Ά</button>
</div>
</div>
<script>
// ── Shorthand ─────────────────────────────────────────────────────────────────
const $ = id => document.getElementById(id);
function autoGrow(el) {
el.style.height = 'auto';
el.style.height = el.scrollHeight + 'px';
}
function setPlayBtns(disabled) {
document.querySelectorAll('.play-btn').forEach(b => b.disabled = disabled);
updateContinuationControls();
}
// ── State ─────────────────────────────────────────────────────────────────────
let genMode = 'stream';
let xvecOnly = false;
let nonStreamingMode = false;
let busy = false;
let refFile = null;
let presetRefId = null;
let presetRefs = [];
let loadedModel = null;
let loadingModel = null;
let dlBlob = null;
let lastContinuationSessionId = null;
const defaultSpeakers = [
{ id: 'Vivian', desc: 'Chinese β€” Bright young female' },
{ id: 'Serena', desc: 'Chinese β€” Warm gentle female' },
{ id: 'Uncle_Fu', desc: 'Chinese β€” Seasoned low male' },
{ id: 'Dylan', desc: 'Chinese β€” Youthful Beijing male' },
{ id: 'Eric', desc: 'Chinese (Sichuan) β€” Lively male' },
{ id: 'Ryan', desc: 'English β€” Dynamic male' },
{ id: 'Aiden', desc: 'English β€” Sunny American male' },
{ id: 'Ono_Anna', desc: 'Japanese β€” Playful female' },
{ id: 'Sohee', desc: 'Korean β€” Warm female' },
];
// Web Audio
let actx = null, sproc = null, pcmQueue = [], rawPcmParts = [], rawPcmSr = 24000;
let chunkQ = Promise.resolve();
let clientT0 = 0, firstChunkAt = null, firstAudioAt = null, lastBufS = 0, firstServerWall = null;
// Recording
let recStream = null, recCtx = null, recSource = null, recProc = null;
let recGain = null, recAnalyser = null, recMeterRaf = null, recMeterBuf = null;
let recChunks = [], recActive = false, recDeviceId = null;
let recPeak = 0, recHasSignal = false, recSignalTs = null, recSignalCount = 0;
const REC_SR = 24000;
// ── Init ──────────────────────────────────────────────────────────────────────
window.addEventListener('load', async () => {
initTheme();
autoGrow($('voiceInstr'));
autoGrow($('continuationText'));
await fetchStatus();
setContinuationSession(null);
if (!loadedModel && availableModels.length > 0) {
// Prefer CustomVoice as default (richer demo), fall back to first available
const preferred = availableModels.find(m => m.includes('CustomVoice'))
|| availableModels[0];
$('modelSel').value = preferred;
loadModel();
}
loadMics();
populateSpeakers([]);
const warm = () => { warmAudio(); window.removeEventListener('pointerdown', warm); };
window.addEventListener('pointerdown', warm, { passive: true });
});
// ── Theme ─────────────────────────────────────────────────────────────────────
function initTheme() {
const t = localStorage.getItem('theme') ||
(matchMedia('(prefers-color-scheme: light)').matches ? 'light' : 'dark');
applyTheme(t);
}
function toggleTheme() {
applyTheme((document.documentElement.dataset.theme || 'dark') === 'dark' ? 'light' : 'dark');
}
function applyTheme(t) {
document.documentElement.dataset.theme = t;
localStorage.setItem('theme', t);
$('themeIcon').innerHTML = t === 'dark'
? '<circle cx="12" cy="12" r="5"/><line x1="12" y1="1" x2="12" y2="3"/><line x1="12" y1="21" x2="12" y2="23"/><line x1="4.22" y1="4.22" x2="5.64" y2="5.64"/><line x1="18.36" y1="18.36" x2="19.78" y2="19.78"/><line x1="1" y1="12" x2="3" y2="12"/><line x1="21" y1="12" x2="23" y2="12"/><line x1="4.22" y1="19.78" x2="5.64" y2="18.36"/><line x1="18.36" y1="5.64" x2="19.78" y2="4.22"/>'
: '<path d="M21 12.79A9 9 0 1111.21 3 7 7 0 0021 12.79z"/>';
}
// ── Settings ──────────────────────────────────────────────────────────────────
function openSettings() { $('settingsOv').classList.add('open'); }
function closeSettings(e) {
// Close only when clicking the backdrop or via button (no event)
if (e && e.target !== $('settingsOv')) return;
$('settingsOv').classList.remove('open');
}
function setMode(m) {
genMode = m;
document.querySelectorAll('.tog[data-m]').forEach(el =>
el.classList.toggle('on', el.dataset.m === m));
$('chunkRow').style.display = m === 'stream' ? '' : 'none';
}
function setNonStreamingMode(v) {
nonStreamingMode = v;
document.querySelectorAll('.tog[data-nsm]').forEach(el =>
el.classList.toggle('on', el.dataset.nsm === (v ? '1' : '0')));
}
function defaultNonStreamingModeForModel(modelId) {
return !!modelId && (modelId.includes('CustomVoice') || modelId.includes('VoiceDesign'));
}
function syncTextFeedDefault(modelId) {
if (!modelId) return;
setNonStreamingMode(defaultNonStreamingModeForModel(modelId));
}
function setXvec(v) {
xvecOnly = v;
document.querySelectorAll('.tog[data-x]').forEach(el =>
el.classList.toggle('on', el.dataset.x === (v ? '1' : '0')));
$('advTranscript').style.display = v ? 'none' : '';
}
function renderPresetRefs(list) {
presetRefs = list || [];
const row = $('presetRow');
if (!row) return;
row.innerHTML = '';
presetRefs.forEach(p => {
const b = document.createElement('button');
b.type = 'button';
b.className = 'preset-btn';
b.textContent = p.label || p.id;
b.dataset.id = p.id;
b.onclick = () => selectPresetRef(p.id);
row.appendChild(b);
});
updatePresetButtons();
if (presetRefs.length > 0 && !presetRefId) selectPresetRef(presetRefs[0].id);
}
function updatePresetButtons() {
const row = $('presetRow');
if (!row) return;
row.querySelectorAll('.preset-btn').forEach(btn => {
if (btn.dataset.id === presetRefId) btn.classList.add('active');
else btn.classList.remove('active');
});
}
function clearPresetSelection() {
presetRefId = null;
updatePresetButtons();
}
async function selectPresetRef(id) {
try {
const d = await fetch(`/preset_ref/${id}`).then(r => r.json());
const bytes = Uint8Array.from(atob(d.audio_b64), c => c.charCodeAt(0));
const blob = new Blob([bytes], { type: 'audio/wav' });
const file = new File([blob], d.filename || `${id}.wav`, { type: 'audio/wav' });
presetRefId = id;
refFile = null; // avoid uploading when preset is selected
setXvec(false); // presets have verified transcripts β€” use ICL mode
$('refLabel').textContent = d.label || d.filename || 'Reference audio';
$('refChip').classList.add('has');
const prev = $('recPreview');
if (prev.src) URL.revokeObjectURL(prev.src);
prev.src = URL.createObjectURL(file);
prev.style.display = 'block';
$('refTextIn').value = d.ref_text || '';
autoGrow($('refTextIn'));
updatePresetButtons();
} catch {
showMsg('err', 'Failed to load preset audio.');
}
}
// ── Auto-transcription ─────────────────────────────────────────────────────────
async function transcribeRef() {
if (!refFile) return;
const ta = $('refTextIn');
ta.disabled = true;
ta.value = '';
ta.placeholder = 'Transcribing…';
autoGrow(ta);
try {
const fd = new FormData();
fd.append('audio', refFile);
const res = await fetch('/transcribe', { method: 'POST', body: fd });
if (!res.ok) return; // transcription unavailable β€” leave field empty
const d = await res.json();
ta.value = d.text || '';
autoGrow(ta);
} catch {
// network error β€” leave field empty, user can type manually
} finally {
ta.disabled = false;
if (!ta.value) ta.placeholder = 'Type the full transcript here…';
}
}
// ── Model ─────────────────────────────────────────────────────────────────────
async function fetchStatus() {
try {
const d = await fetch('/status').then(r => r.json());
applyAvailableModels(d.available_models || []);
if (d.loaded && d.model) {
// Only trust loadedModel when the server confirms it's actually loaded
loadedModel = d.model;
$('modelSel').value = d.model;
syncTextFeedDefault(d.model);
}
populateSpeakers(d.speakers || []);
renderPresetRefs(d.preset_refs || []);
setPill(d.loaded ? 'loaded' : 'off', d.loaded ? 'ready' : 'not loaded');
} catch { setPill('off', 'offline'); }
}
async function loadModel() {
const btn = $('loadBtn');
btn.disabled = true; btn.textContent = 'Loading...';
loadingModel = $('modelSel').value;
loadedModel = null;
setContinuationSession(null);
setPlayBtns(true);
setPill('loading', 'loading...');
try {
const fd = new FormData();
fd.append('model_id', loadingModel);
const d = await fetch('/load', { method: 'POST', body: fd }).then(r => r.json());
if (d.status === 'loaded' || d.status === 'already_loaded') {
loadedModel = loadingModel;
loadingModel = null;
syncTextFeedDefault(loadedModel);
setPill('loaded', 'ready');
fetchStatus();
} else {
loadingModel = null;
setPill('error', 'failed');
}
} catch {
loadingModel = null;
setPill('error', 'error');
}
btn.disabled = false; btn.textContent = 'Load';
setPlayBtns(false);
}
function setPill(state, label) {
$('mdot').className = 'mdot ' + state;
$('mtext').textContent = label;
updateModeRows();
}
function updateModeRows() {
function modelToRow(m) {
if (!m) return null;
if (m.includes('VoiceDesign')) return 'rowCustom';
if (m.includes('CustomVoice')) return 'rowChar';
return 'rowClone';
}
const activeRow = modelToRow(loadingModel ?? loadedModel);
const isLoading = !!loadingModel;
for (const id of ['rowClone', 'rowChar', 'rowCustom']) {
const el = $(id);
el.classList.toggle('compatible', id === activeRow && !isLoading);
el.classList.toggle('loading', id === activeRow && isLoading);
el.classList.toggle('incompatible', activeRow !== null && id !== activeRow);
}
}
// ── Arcade mode loader ─────────────────────────────────────────────────────────
const ALL_MODE_MODELS = {
voice_clone: [
{ id: 'Qwen/Qwen3-TTS-12Hz-0.6B-Base', name: '0.6B Base', sub: 'Fastest Β· RTF ~4Γ—' },
{ id: 'Qwen/Qwen3-TTS-12Hz-1.7B-Base', name: '1.7B Base', sub: 'Higher quality Β· RTF ~3.5Γ—' },
],
custom: [
{ id: 'Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice', name: '0.6B CustomVoice', sub: 'Fastest' },
{ id: 'Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice', name: '1.7B CustomVoice', sub: 'Higher quality' },
],
voice_design: [
{ id: 'Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign', name: '1.7B VoiceDesign', sub: 'Describe any voice' },
],
};
// Filtered at runtime by available_models from /status
let MODE_MODELS = ALL_MODE_MODELS;
const MODE_LABELS = { voice_clone: 'Clone', custom: 'Custom', voice_design: 'Voice Design' };
let loaderMode = null;
let loaderSelectedModel = null;
let availableModels = [];
function applyAvailableModels(available) {
availableModels = available || [];
const avail = new Set(availableModels);
// Rebuild model selector options
const sel = $('modelSel');
const prev = sel.value;
sel.innerHTML = '';
for (const [mode, models] of Object.entries(ALL_MODE_MODELS)) {
for (const m of models) {
if (!avail.has(m.id)) continue;
const opt = document.createElement('option');
opt.value = m.id;
opt.textContent = m.name;
sel.appendChild(opt);
}
}
// Restore selection if still available, else pick first
if (avail.has(prev)) sel.value = prev;
else if (sel.options.length > 0) sel.value = sel.options[0].value;
// Filter MODE_MODELS to available only
MODE_MODELS = {};
for (const [mode, models] of Object.entries(ALL_MODE_MODELS)) {
MODE_MODELS[mode] = models.filter(m => avail.has(m.id));
}
// Hide mode rows that have no available models; show those that do
const modeRows = { voice_clone: 'rowClone', custom: 'rowChar', voice_design: 'rowCustom' };
for (const [mode, rowId] of Object.entries(modeRows)) {
$(rowId).style.display = MODE_MODELS[mode].length > 0 ? '' : 'none';
}
}
function onModeRowClick(mode) {
const rowMap = { voice_clone: 'rowClone', custom: 'rowChar', voice_design: 'rowCustom' };
if (!$(rowMap[mode]).classList.contains('incompatible')) return;
openModelLoader(mode);
}
function openModelLoader(mode) {
loaderMode = mode;
const models = MODE_MODELS[mode];
loaderSelectedModel = models[0].id;
const grid = $('loaderGrid');
grid.innerHTML = '';
models.forEach(m => {
const card = document.createElement('div');
card.className = 'lcard' + (m.id === loaderSelectedModel ? ' sel' : '');
card.dataset.id = m.id;
card.innerHTML = `<div class="lc-name">${m.name}</div><div class="lc-sub">${m.sub}</div>`;
card.onclick = () => {
loaderSelectedModel = m.id;
grid.querySelectorAll('.lcard').forEach(c => c.classList.toggle('sel', c.dataset.id === m.id));
};
grid.appendChild(card);
});
$('loaderModeLabel').textContent = MODE_LABELS[mode] || mode;
$('loaderOv').classList.add('open');
}
function closeLoader(e) {
if (e && e.target !== $('loaderOv')) return;
$('loaderOv').classList.remove('open');
}
async function loadFromSheet() {
if (!loaderSelectedModel) return;
$('loaderOv').classList.remove('open');
$('modelSel').value = loaderSelectedModel;
await loadModel();
}
// ── Speakers ──────────────────────────────────────────────────────────────────
function populateSpeakers(list) {
const sel = $('speakerId');
const prev = sel.value;
sel.innerHTML = '<option value="" disabled hidden>Select speaker...</option>';
let ids = Array.isArray(list) ? list : Object.keys(list || {});
const defaultMap = new Map(defaultSpeakers.map(s => [s.id.toLowerCase(), s]));
if (!ids.length) ids = defaultSpeakers.map(s => s.id);
// Normalize to canonical capitalization from defaultSpeakers where known
ids = ids.map(id => defaultMap.get(String(id).toLowerCase())?.id ?? id);
// Keep previous selection even if backend list doesn't include it (case-insensitive)
if (prev && !ids.some(id => id.toLowerCase() === prev.toLowerCase())) ids = [prev, ...ids];
// Order: English first (based on known descriptions), then the rest
ids.sort((a, b) => {
const aInfo = defaultMap.get(String(a).toLowerCase());
const bInfo = defaultMap.get(String(b).toLowerCase());
const aEn = aInfo && aInfo.desc.includes('English') ? 0 : 1;
const bEn = bInfo && bInfo.desc.includes('English') ? 0 : 1;
if (aEn !== bEn) return aEn - bEn;
return String(a).localeCompare(String(b));
});
ids.forEach(id => {
const o = document.createElement('option');
o.value = id; // keep backend ID for requests
const info = defaultMap.get(String(id).toLowerCase());
const label = info ? `${info.id} β€” ${info.desc}` : id;
o.textContent = label;
sel.appendChild(o);
});
if (prev) {
sel.value = prev;
} else {
const defaultId = 'Aiden';
sel.value = ids.find(id => id.toLowerCase() === defaultId.toLowerCase()) ?? ids[0] ?? '';
}
}
// ── Mic devices ───────────────────────────────────────────────────────────────
async function loadMics() {
if (!navigator.mediaDevices?.enumerateDevices) return;
try {
const devs = await navigator.mediaDevices.enumerateDevices();
const inputs = devs.filter(d => d.kind === 'audioinput');
const sel = $('micSel');
const prevSelection = recDeviceId || sel.value;
sel.innerHTML = '';
let defaultId = null;
inputs.forEach((d, i) => {
const o = document.createElement('option');
o.value = d.deviceId;
o.textContent = d.label || `Microphone ${i + 1}`;
sel.appendChild(o);
const lbl = (d.label || '').toLowerCase();
if (!defaultId && (d.deviceId === 'default' || lbl.includes('default'))) {
defaultId = d.deviceId;
}
});
sel.onchange = () => { recDeviceId = sel.value; };
if (prevSelection && inputs.some(d => d.deviceId === prevSelection)) {
recDeviceId = prevSelection;
sel.value = prevSelection;
} else if (defaultId) {
recDeviceId = defaultId;
sel.value = defaultId;
} else if (inputs.length) {
recDeviceId = inputs[0].deviceId;
sel.value = inputs[0].deviceId;
}
} catch {}
}
// ── Reference audio ───────────────────────────────────────────────────────────
function pickRef(input) { const f = input.files[0]; if (f) setRef(f); }
function setRef(f) {
clearPresetSelection();
refFile = f;
$('refLabel').textContent = f.name;
$('refChip').classList.add('has');
const prev = $('recPreview');
if (prev.src) URL.revokeObjectURL(prev.src);
prev.src = URL.createObjectURL(f);
prev.style.display = 'block';
transcribeRef();
}
// ── Recording ─────────────────────────────────────────────────────────────────
function toggleRec() { if (recActive) stopRec(); else startRec(); }
async function startRec() {
if (!navigator.mediaDevices?.getUserMedia) {
showMsg('err', 'Recording not supported. Use HTTPS or localhost.');
return;
}
// Secure-context guard β€” 0.0.0.0 will silently fail without this
if (!window.isSecureContext) {
const { hostname, port } = window.location;
if (hostname !== 'localhost' && hostname !== '127.0.0.1') {
showMsg('err', `Recording requires HTTPS or localhost. Try http://localhost:${port}/`);
return;
}
}
hideMsg();
recChunks = []; recPeak = 0;
recHasSignal = false; recSignalTs = null; recSignalCount = 0;
recActive = true;
$('micBtn').classList.add('rec');
$('recStatus').textContent = 'Starting mic…';
$('recBarWrap').style.display = 'block';
$('recPreview').style.display = 'none';
try {
const constraints = {
audio: { channelCount: 1, echoCancellation: false, noiseSuppression: false, autoGainControl: false }
};
if (recDeviceId) constraints.audio.deviceId = { exact: recDeviceId };
recStream = await navigator.mediaDevices.getUserMedia(constraints);
await loadMics(); // refresh labels now that permission is granted
recCtx = new (window.AudioContext || window.webkitAudioContext)({ latencyHint: 'interactive' });
if (recCtx.state === 'suspended') await recCtx.resume();
recSource = recCtx.createMediaStreamSource(recStream);
recAnalyser = recCtx.createAnalyser();
recAnalyser.fftSize = 2048;
recGain = recCtx.createGain();
recGain.gain.value = 0.0001; // near-silent, keeps graph alive
recProc = recCtx.createScriptProcessor(4096, 1, 1);
recProc.onaudioprocess = e => {
if (!recActive) return;
const inp = e.inputBuffer.getChannelData(0);
const copy = new Float32Array(inp.length);
for (let i = 0; i < inp.length; i++) {
copy[i] = inp[i];
const a = Math.abs(inp[i]);
if (a > recPeak) recPeak = a;
}
recChunks.push(copy);
};
recSource.connect(recAnalyser);
recAnalyser.connect(recProc);
recProc.connect(recGain);
recGain.connect(recCtx.destination);
recMeterBuf = new Uint8Array(recAnalyser.fftSize);
const tick = () => {
if (!recActive) return;
recAnalyser.getByteTimeDomainData(recMeterBuf);
let peak = 0;
for (let i = 0; i < recMeterBuf.length; i++) {
const v = Math.abs(recMeterBuf[i] - 128);
if (v > peak) peak = v;
}
const norm = Math.min(1, peak / 64);
$('recBar').style.width = Math.round(norm * 100) + '%';
if (norm > 0.02) recSignalCount++;
else recSignalCount = 0;
if (!recHasSignal && recSignalCount >= 4) {
recHasSignal = true;
recSignalTs = performance.now();
}
$('recStatus').textContent = recHasSignal ? 'Recording…' : 'Waiting for signal…';
if (recHasSignal) {
const s = Math.floor((performance.now() - recSignalTs) / 1000);
$('recTime').textContent = Math.floor(s / 60) + ':' + String(s % 60).padStart(2, '0');
}
recMeterRaf = requestAnimationFrame(tick);
};
recMeterRaf = requestAnimationFrame(tick);
} catch (e) {
showMsg('err', 'Mic failed: ' + e.message);
cleanupRec();
}
}
async function stopRec() {
recActive = false;
if (recMeterRaf) { cancelAnimationFrame(recMeterRaf); recMeterRaf = null; }
$('micBtn').classList.remove('rec');
$('recStatus').textContent = '';
$('recTime').textContent = '';
$('recBar').style.width = '0%';
$('recBarWrap').style.display = 'none';
if (!recChunks.length) {
showMsg('err', 'No audio captured.');
cleanupRec(); return;
}
try {
const total = recChunks.reduce((s, c) => s + c.length, 0);
const buf = recCtx.createBuffer(1, total, recCtx.sampleRate);
const ch = buf.getChannelData(0);
let off = 0;
for (const c of recChunks) { ch.set(c, off); off += c.length; }
// Show preview immediately from raw buffer β€” no resampling needed for playback
const rawWav = bufToWav(buf);
const prev = $('recPreview');
if (prev.src) URL.revokeObjectURL(prev.src);
prev.src = URL.createObjectURL(rawWav);
prev.style.display = 'block';
// Resample to 24kHz for server use (codec's native rate; transcription server handles 16kHz resampling)
let out = buf;
if (buf.sampleRate !== REC_SR) {
const len = Math.ceil(buf.duration * REC_SR);
const offCtx = new OfflineAudioContext(1, len, REC_SR);
const src = offCtx.createBufferSource();
src.buffer = buf; src.connect(offCtx.destination); src.start(0);
out = await offCtx.startRendering();
}
const wav = bufToWav(out);
clearPresetSelection();
refFile = new File([wav], 'recorded.wav', { type: 'audio/wav' });
$('refLabel').textContent = 'recorded.wav';
$('refChip').classList.add('has');
transcribeRef();
} catch (e) {
showMsg('err', 'Recording failed: ' + e.message);
}
cleanupRec();
}
function cleanupRec() {
if (recStream) recStream.getTracks().forEach(t => t.stop());
recStream = null; recActive = false;
if (recMeterRaf) { cancelAnimationFrame(recMeterRaf); recMeterRaf = null; }
recMeterBuf = null;
if (recAnalyser) recAnalyser.disconnect();
if (recProc) recProc.disconnect();
if (recGain) recGain.disconnect();
if (recSource) recSource.disconnect();
if (recCtx) recCtx.close().catch(() => {});
recAnalyser = null; recProc = null; recGain = null; recSource = null; recCtx = null;
$('micBtn').classList.remove('rec');
}
function bufToWav(buffer) {
const pcm = buffer.getChannelData(0);
const len = pcm.length, sr = buffer.sampleRate;
const ab = new ArrayBuffer(44 + len * 2);
const v = new DataView(ab);
const ws = (o, s) => [...s].forEach((c, i) => v.setUint8(o + i, c.charCodeAt(0)));
ws(0, 'RIFF'); v.setUint32(4, 36 + len * 2, true);
ws(8, 'WAVE'); ws(12, 'fmt ');
v.setUint32(16, 16, true); v.setUint16(20, 1, true); v.setUint16(22, 1, true);
v.setUint32(24, sr, true); v.setUint32(28, sr * 2, true);
v.setUint16(32, 2, true); v.setUint16(34, 16, true);
ws(36, 'data'); v.setUint32(40, len * 2, true);
let off = 44;
for (let i = 0; i < len; i++, off += 2) {
const s = Math.max(-1, Math.min(1, pcm[i]));
v.setInt16(off, s < 0 ? s * 0x8000 : s * 0x7fff, true);
}
return new Blob([ab], { type: 'audio/wav' });
}
// ── Web Audio: gapless streaming via ScriptProcessor ─────────────────────────
async function warmAudio() {
if (actx) return;
try { await initAudio(24000); } catch {}
}
async function initAudio(sr) {
rawPcmSr = sr || 24000;
pcmQueue = []; rawPcmParts = [];
chunkQ = Promise.resolve(); dlBlob = null;
firstChunkAt = null; firstAudioAt = null; lastBufS = 0;
if (actx) {
if (actx.state === 'suspended') await actx.resume();
return;
}
actx = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: rawPcmSr });
sproc = actx.createScriptProcessor(256, 0, 1);
sproc.onaudioprocess = e => {
const out = e.outputBuffer.getChannelData(0);
let i = 0, wrote = false;
while (i < out.length) {
if (!pcmQueue.length) { out.fill(0, i); break; }
const seg = pcmQueue[0];
const take = Math.min(out.length - i, seg.data.length - seg.pos);
out.set(seg.data.subarray(seg.pos, seg.pos + take), i);
seg.pos += take; i += take; wrote = true;
if (seg.pos >= seg.data.length) pcmQueue.shift();
}
if (wrote && firstAudioAt == null) {
firstAudioAt = performance.now();
pushClientMetrics();
}
};
if (actx.state === 'suspended') await actx.resume();
sproc.connect(actx.destination);
}
function parseWav(bytes) {
const v = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
const sr = v.getUint32(24, true);
let off = 12;
while (off + 8 <= bytes.length) {
const id = String.fromCharCode(bytes[off], bytes[off+1], bytes[off+2], bytes[off+3]);
const size = v.getUint32(off + 4, true);
if (id === 'data') {
const raw = bytes.slice(off + 8, off + 8 + size);
const i16 = new Int16Array(raw.buffer, raw.byteOffset, raw.byteLength / 2);
const f32 = new Float32Array(i16.length);
for (let i = 0; i < i16.length; i++) f32[i] = i16[i] / 32768.0;
return { pcm: f32, rawPcm: raw, sr };
}
off += 8 + size;
}
return null;
}
function enqueueChunk(b64) {
chunkQ = chunkQ.then(() => {
if (!actx) return;
const bytes = Uint8Array.from(atob(b64), c => c.charCodeAt(0));
const p = parseWav(bytes);
if (!p) return;
rawPcmParts.push(p.rawPcm);
pcmQueue.push({ data: p.pcm, pos: 0 });
if (firstChunkAt == null) firstChunkAt = performance.now();
lastBufS = pcmQueue.reduce((s, seg) => s + (seg.data.length - seg.pos), 0) / rawPcmSr;
pushClientMetrics();
});
}
function buildFinalWav() {
if (!rawPcmParts.length) return null;
const totalPcm = rawPcmParts.reduce((s, p) => s + p.length, 0);
const ab = new ArrayBuffer(44 + totalPcm);
const v = new DataView(ab);
const ws = (o, s) => [...s].forEach((c, i) => v.setUint8(o + i, c.charCodeAt(0)));
ws(0, 'RIFF'); v.setUint32(4, 36 + totalPcm, true);
ws(8, 'WAVE'); ws(12, 'fmt ');
v.setUint32(16, 16, true); v.setUint16(20, 1, true); v.setUint16(22, 1, true);
v.setUint32(24, rawPcmSr, true); v.setUint32(28, rawPcmSr * 2, true);
v.setUint16(32, 2, true); v.setUint16(34, 16, true);
ws(36, 'data'); v.setUint32(40, totalPcm, true);
const out = new Uint8Array(ab, 44);
let offset = 0;
for (const part of rawPcmParts) { out.set(part, offset); offset += part.length; }
return new Blob([ab], { type: 'audio/wav' });
}
// ── Generate ──────────────────────────────────────────────────────────────────
async function generate(mode) {
if (busy) return;
const isVD = loadedModel?.includes('VoiceDesign');
const isCV = loadedModel?.includes('CustomVoice');
if (mode === 'voice_design' && loadedModel && !isVD) {
showMsg('warn', 'Voice design requires the 1.7B-VoiceDesign model.',
'Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign', mode); return;
}
if (mode === 'custom' && loadedModel && !isCV) {
showMsg('warn', 'Custom speaker requires a CustomVoice model.',
'Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice', mode); return;
}
if (mode === 'voice_clone' && loadedModel && (isVD || isCV)) {
showMsg('warn', 'Voice clone needs a Base model.',
'Qwen/Qwen3-TTS-12Hz-0.6B-Base', mode); return;
}
if (mode === 'voice_clone' && !refFile && !presetRefId) {
showMsg('err', 'Add reference audio first.');
return;
}
if (mode === 'custom' && !$('speakerId').value) { showMsg('err', 'Select a speaker first.'); return; }
hideMsg();
$('queueBar').className = 'queue-bar';
busy = true;
setPlayBtns(true);
$('results').classList.add('show');
$('player').parentElement.style.display = 'none';
$('waveInd').classList.remove('off');
$('pbarWrap').classList.add('show');
const pb = $('pbar');
pb.style.width = ''; pb.classList.add('spin');
resetMetrics();
const fd = new FormData();
fd.append('text', $('textIn').value);
fd.append('mode', mode);
fd.append('language', $('langSel').value);
fd.append('temperature', $('tempSl').value);
fd.append('top_k', $('topkSl').value);
fd.append('repetition_penalty', $('repSl').value);
fd.append('non_streaming_mode', nonStreamingMode ? 'true' : 'false');
if (mode === 'voice_clone') {
if (presetRefId) {
fd.append('ref_preset', presetRefId);
} else if (refFile) {
fd.append('ref_audio', refFile);
}
fd.append('xvec_only', xvecOnly ? 'true' : 'false');
if (!xvecOnly) fd.append('ref_text', $('refTextIn').value);
} else if (mode === 'custom') {
fd.append('speaker', $('speakerId').value); // value is already the bare ID
fd.append('instruct', $('customInstr').value);
} else {
fd.append('instruct', $('voiceInstr').value);
}
try {
if (genMode === 'stream') {
fd.append('chunk_size', $('chunkSl').value);
await runStream(fd);
} else {
await runNonStream(fd);
}
} catch (e) {
showMsg('err', 'Generation failed: ' + e.message);
pb.classList.remove('spin'); pb.style.width = '0%';
}
busy = false;
setPlayBtns(false);
}
async function runStream(fd) {
clientT0 = performance.now();
firstChunkAt = null; firstAudioAt = null; lastBufS = 0; firstServerWall = null;
const res = await fetch('/generate/stream', { method: 'POST', body: fd });
if (!res.ok) { const e = await res.json(); throw new Error(e.detail || 'Request failed'); }
const reader = res.body.getReader();
const dec = new TextDecoder();
let buf = '', audioInited = false;
while (true) {
const { done, value } = await reader.read();
if (done) break;
buf += dec.decode(value, { stream: true });
const lines = buf.split('\n');
buf = lines.pop();
for (const line of lines) {
if (!line.startsWith('data: ')) continue;
const d = JSON.parse(line.slice(6));
if (d.type === 'queued') {
const bar = $('queueBar');
bar.textContent = d.position === 1
? 'Waiting... (1 request ahead of you)'
: `Waiting... (${d.position} requests ahead of you)`;
bar.classList.add('show');
} else if (d.type === 'chunk') {
$('queueBar').className = 'queue-bar';
if (!audioInited) { await initAudio(d.sample_rate); audioInited = true; }
// Capture server wall time on first chunk
if (firstChunkAt == null && d.elapsed_ms != null) firstServerWall = d.elapsed_ms;
pushMetrics(d.ttfa_ms, d.rtf, d.total_audio_s, d.voice_clone_ms);
enqueueChunk(d.audio_b64);
} else if (d.type === 'done') {
$('queueBar').className = 'queue-bar';
pushMetrics(d.ttfa_ms, d.rtf, d.total_audio_s, d.voice_clone_ms);
await chunkQ;
setDone();
const blob = buildFinalWav();
if (blob) setPlayer(blob);
setContinuationSession(d.continuation_session_id);
} else if (d.type === 'error') {
$('queueBar').className = 'queue-bar';
throw new Error(d.message);
}
}
}
}
async function runNonStream(fd) {
$('waveInd').classList.add('off');
const res = await fetch('/generate', { method: 'POST', body: fd });
if (!res.ok) { const e = await res.json(); throw new Error(e.detail || 'Request failed'); }
const d = await res.json();
const m = d.metrics;
pushMetrics(m.total_ms, m.rtf, m.audio_duration_s, m.voice_clone_ms);
setDone();
const bytes = Uint8Array.from(atob(d.audio_b64), c => c.charCodeAt(0));
const blob = new Blob([bytes], { type: 'audio/wav' });
setPlayer(blob);
setContinuationSession(d.continuation_session_id);
$('player').play().catch(() => {}); // gracefully handle autoplay block
}
// ── Metrics ───────────────────────────────────────────────────────────────────
function resetMetrics() {
['mTTFA', 'mClient', 'mRTF', 'mDur', 'mBuf', 'mClone'].forEach(id => $(id).innerHTML = '&mdash;');
$('mCloneWrap').style.display = 'none';
}
function pushMetrics(ttfa, rtf, dur, cloneMs) {
if (ttfa != null) $('mTTFA').textContent = Math.round(ttfa) + 'ms';
if (rtf != null) $('mRTF').textContent = rtf.toFixed(2) + 'x';
if (dur != null) $('mDur').textContent = dur.toFixed(1) + 's';
if (cloneMs != null) {
$('mClone').textContent = Math.round(cloneMs) + 'ms';
$('mCloneWrap').style.display = '';
}
}
function pushClientMetrics() {
// Client TTFA: time from request to first audio heard
if (firstAudioAt != null)
$('mClient').textContent = Math.round(firstAudioAt - clientT0) + 'ms';
else if (firstChunkAt != null)
$('mClient').textContent = Math.round(firstChunkAt - clientT0) + 'ms';
// Buffer: audio queued ahead of playback
if (lastBufS != null)
$('mBuf').textContent = lastBufS.toFixed(2) + 's';
}
function setDone() {
$('waveInd').classList.add('off');
const pb = $('pbar');
pb.classList.remove('spin'); pb.style.width = '100%';
}
function setPlayer(blob) {
dlBlob = blob;
$('player').src = URL.createObjectURL(blob);
$('player').parentElement.style.display = '';
}
function clearAudioPlayer(id) {
const el = $(id);
if (!el) return;
el.pause();
if (el.src && el.src.startsWith('blob:')) URL.revokeObjectURL(el.src);
el.removeAttribute('src');
el.load();
}
function resetComparisonMetrics() {
['cmpFreshMs', 'cmpFreshRTF', 'cmpFreshDur', 'cmpContMs', 'cmpContRTF', 'cmpContDur']
.forEach(id => $(id).innerHTML = '&mdash;');
}
function clearContinuationResults() {
clearAudioPlayer('cmpFreshPlayer');
clearAudioPlayer('cmpContPlayer');
resetComparisonMetrics();
$('continuationResults').classList.remove('show');
}
function setContinuationSession(sessionId) {
const nextSessionId = sessionId || null;
const changed = nextSessionId !== lastContinuationSessionId;
lastContinuationSessionId = nextSessionId;
if (changed) clearContinuationResults();
updateContinuationControls();
}
function updateContinuationControls() {
const hasSession = !!lastContinuationSessionId;
const hasText = !!$('continuationText').value.trim();
const btn = $('continuationBtn');
const status = $('continuationStatus');
btn.disabled = busy || !hasSession || !hasText;
if (!hasSession) {
status.textContent = 'Generate a first sentence to unlock this comparison.';
} else if (busy) {
status.textContent = 'Generation in progress…';
} else {
status.textContent = 'Uses the last completed clip as sentence 1.';
}
}
function setComparisonResult(kind, payload) {
const playerId = kind === 'fresh' ? 'cmpFreshPlayer' : 'cmpContPlayer';
const msId = kind === 'fresh' ? 'cmpFreshMs' : 'cmpContMs';
const rtfId = kind === 'fresh' ? 'cmpFreshRTF' : 'cmpContRTF';
const durId = kind === 'fresh' ? 'cmpFreshDur' : 'cmpContDur';
clearAudioPlayer(playerId);
const bytes = Uint8Array.from(atob(payload.audio_b64), c => c.charCodeAt(0));
const blob = new Blob([bytes], { type: 'audio/wav' });
$(playerId).src = URL.createObjectURL(blob);
$(msId).textContent = payload.metrics.total_ms + 'ms';
$(rtfId).textContent = payload.metrics.rtf.toFixed(2) + 'x';
$(durId).textContent = payload.metrics.audio_duration_s.toFixed(1) + 's';
$('continuationResults').classList.add('show');
}
async function compareContinuation() {
if (busy || !lastContinuationSessionId) return;
const text = $('continuationText').value.trim();
if (!text) {
showMsg('err', 'Enter the follow-up sentence first.');
return;
}
hideMsg();
busy = true;
setPlayBtns(true);
clearContinuationResults();
$('continuationStatus').textContent = 'Comparing fresh vs continued…';
const fd = new FormData();
fd.append('session_id', lastContinuationSessionId);
fd.append('text', text);
fd.append('temperature', $('tempSl').value);
fd.append('top_k', $('topkSl').value);
fd.append('repetition_penalty', $('repSl').value);
try {
const res = await fetch('/generate/compare_continuation', { method: 'POST', body: fd });
if (!res.ok) {
const e = await res.json();
throw new Error(e.detail || 'Comparison failed');
}
const d = await res.json();
setComparisonResult('fresh', d.fresh);
setComparisonResult('cont', d.continued);
setContinuationSession(d.continuation_session_id);
} catch (e) {
showMsg('err', 'Continuation comparison failed: ' + e.message);
} finally {
busy = false;
setPlayBtns(false);
updateContinuationControls();
}
}
// ── Messages ──────────────────────────────────────────────────────────────────
let pendingMode = null;
function showMsg(type, text, switchModel, mode) {
const el = $('msgBar');
el.className = 'msg-bar show ' + type;
if (switchModel) {
pendingMode = mode;
el.innerHTML = '<span>' + text + '</span>' +
'<button class="sw-btn" onclick="switchAndGen(\'' + switchModel + '\')">Load &amp; generate</button>';
} else {
el.textContent = text;
}
}
function hideMsg() { $('msgBar').className = 'msg-bar'; }
async function switchAndGen(target) {
if (busy) return;
const mode = pendingMode;
busy = true; hideMsg(); setPlayBtns(true);
$('modelSel').value = target;
try { await loadModel(); } finally { busy = false; }
setPlayBtns(false);
generate(mode);
}
// ── Download ──────────────────────────────────────────────────────────────────
function dlAudio() {
if (!dlBlob) return;
const a = document.createElement('a');
a.href = URL.createObjectURL(dlBlob);
a.download = 'generated_speech.wav';
a.click();
URL.revokeObjectURL(a.href);
}
// ── Keyboard shortcuts ────────────────────────────────────────────────────────
document.addEventListener('keydown', e => {
if ((e.metaKey || e.ctrlKey) && e.key === 'Enter') {
e.preventDefault();
// Pick best mode based on what's filled in
const mode = refFile ? 'voice_clone' : $('speakerId').value ? 'custom' : 'voice_design';
generate(mode);
}
if (e.key === 'Escape') {
if ($('loaderOv').classList.contains('open')) $('loaderOv').classList.remove('open');
else if ($('settingsOv').classList.contains('open')) closeSettings();
}
});
</script>
</body>
</html>