| <!doctype html>
|
| <html lang="en">
|
| <head>
|
| <meta charset="utf-8" />
|
| <title>SNAC 24k — Click-free Streaming (Robust scheduler + Cache)</title>
|
| <meta name="viewport" content="width=device-width, initial-scale=1" />
|
| <style>
|
| :root { color-scheme: dark light; }
|
| body { font-family: system-ui, -apple-system, Segoe UI, Roboto, Ubuntu, Cantarell, "Helvetica Neue", Arial; margin:0 }
|
| header { padding:16px 20px; background:#111827; color:#f9fafb }
|
| main { padding:16px; display:grid; gap:16px; grid-template-columns:1fr 380px }
|
| section { border:1px solid #e5e7eb20; border-radius:12px; padding:12px 14px; background:#0b1220; color:#e5e7eb }
|
| h1{ margin:0 0 6px 0; font-size:20px } h2{ margin:8px 0; font-size:16px }
|
| .row{ display:flex; gap:8px; align-items:center; flex-wrap:wrap }
|
| .btn{ padding:8px 12px; border-radius:10px; border:1px solid #475569; background:#1f2937; color:#e5e7eb; cursor:pointer }
|
| .btn:disabled{ opacity:.5; cursor:not-allowed }
|
| .mono{ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace }
|
| textarea{ width:100%; min-height:160px; border-radius:10px; border:1px solid #334155; background:#0b1220; color:#e5e7eb; padding:10px }
|
| input[type="number"],input[type="text"],select{ padding:6px 8px; border-radius:8px; border:1px solid #334155; background:#0b1220; color:#e5e7eb }
|
| .grid{ display:grid; gap:10px; grid-template-columns:repeat(2,1fr) }
|
| .log{ height:180px; overflow:auto; background:#0a0f1c; border-radius:8px; padding:8px; border:1px solid #1f2937 }
|
| .small{ font-size:12px; opacity:.9 } .muted{ opacity:.7 }
|
| .pill{ display:inline-block; padding:4px 8px; border-radius:999px; background:#0b132b; border:1px solid #334155; margin:2px }
|
| .progress{ width:100%; height:8px; background:#111827; border-radius:999px; overflow:hidden; border:1px solid #374151 }
|
| .progress>div{ height:100%; background:#22c55e; width:0% }
|
| audio{ width:100%; margin-top:8px }
|
| </style>
|
| </head>
|
| <body>
|
| <header>
|
| <h1>SNAC 24 kHz — Click-free Streaming (Robust scheduler + Cache)</h1>
|
| <div class="small muted">Streaming uses 48-frame windows, default hop 40, center-keep & equal-power crossfade. Preloads model into IndexedDB cache.</div>
|
| </header>
|
|
|
| <main>
|
| <section>
|
| <h2>1) Inputs</h2>
|
| <div class="grid">
|
| <div>
|
| <div class="small muted">Model URL (int→wav ONNX)</div>
|
| <input id="modelUrl" class="mono" type="text" style="width:100%"
|
| value="https://huggingface.co/laion/SNAC-24khz-decoder-onnx/resolve/main/snac24_int2wav_static.onnx">
|
| </div>
|
| <div>
|
| <div class="small muted">Codes URL (flattened JSON)</div>
|
| <input id="codesUrl" class="mono" type="text" style="width:100%"
|
| value="https://huggingface.co/laion/SNAC-24khz-decoder-onnx/resolve/main/snac_flattened_stream.txt">
|
| </div>
|
| </div>
|
| <div class="row" style="margin-top:8px;gap:12px;">
|
| <button id="preloadBtn" class="btn">Preload model (and cache)</button>
|
| <button id="loadCodesBtn" class="btn">Load codes into textbox</button>
|
| <button id="clearCacheBtn" class="btn">Clear cache</button>
|
| <span id="preloadStatus" class="small pill">idle</span>
|
| </div>
|
| <div class="progress" style="margin:8px 0;"><div id="dlBar"></div></div>
|
| <div class="small" id="dlText"></div>
|
| </section>
|
|
|
| <section>
|
| <h2>2) Decode options</h2>
|
| <div class="grid">
|
| <div>
|
| <div class="small muted">Execution Provider</div>
|
| <select id="providerSel">
|
| <option value="webgpu">webgpu (if available)</option>
|
| <option value="wasm">wasm</option>
|
| </select>
|
| </div>
|
| <div>
|
| <div class="small muted">Streaming mode</div>
|
| <select id="modeSel">
|
| <option value="stream">Streaming (center-keep)</option>
|
| <option value="whole">Whole file (assemble then play)</option>
|
| <option value="fixed">Fixed windows (butt-join; for comparison)</option>
|
| </select>
|
| </div>
|
| <div>
|
| <div class="small muted">Hop (L2 frames)</div>
|
| <input id="hopFrames" type="number" min="8" max="48" step="4" value="40">
|
| </div>
|
| <div>
|
| <div class="small muted">Crossfade (ms)</div>
|
| <input id="xfadeMs" type="number" min="0" max="40" step="2" value="12">
|
| </div>
|
| <div>
|
| <div class="small muted">Keep center (L2 frames)</div>
|
| <input id="keepFrames" type="number" min="8" max="48" step="4" value="40">
|
| </div>
|
| <div>
|
| <div class="small muted">Window (L2 frames)</div>
|
| <input id="winFrames" type="number" min="48" max="48" step="0" value="48" disabled>
|
| </div>
|
| </div>
|
| <div class="row" style="margin-top:8px; gap:12px;">
|
| <label class="small"><input id="sequentialChk" type="checkbox"> Sequential playback (no overlap)</label>
|
| <span class="small muted">Sample rate 24,000 Hz</span>
|
| </div>
|
| <div class="row" style="margin-top:8px; gap:12px;">
|
| <button id="generateBtn" class="btn">Generate</button>
|
| </div>
|
| </section>
|
|
|
| <section style="grid-column:1 / span 2;">
|
| <h2>3) Flattened SNAC JSON</h2>
|
| <textarea id="snacIn" class="mono" spellcheck="false"
|
| placeholder='Paste the single-line JSON (with "flattened","lengths", optional "streaming") here…'></textarea>
|
| </section>
|
|
|
| <section>
|
| <h2>4) Output</h2>
|
| <div class="small">Player</div>
|
| <audio id="player" controls></audio>
|
| <div class="small" style="margin-top:8px;">Metrics</div>
|
| <pre id="metrics" class="log mono"></pre>
|
| </section>
|
|
|
| <section>
|
| <h2>5) Logs & Info</h2>
|
| <pre id="log" class="log mono"></pre>
|
| <div class="small muted">
|
| <p><b>WebGPU</b> runs operations on your GPU (fast when supported). <b>WASM</b> runs on CPU; SIMD is automatic; multithreading needs cross-origin isolation (COOP/COEP). Without COI, WASM uses 1 thread.</p>
|
| <p>Streaming: 48-frame windows, hop 40, keep center 40. Equal-power crossfade removes seams. “Sequential” forces no overlap and starts each chunk only after the previous ended (sets crossfade to 0).</p>
|
| </div>
|
| </section>
|
| </main>
|
|
|
| <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.min.js"></script>
|
| <script>
|
| (async () => {
|
| const el = id => document.getElementById(id);
|
| const modelUrl = el('modelUrl'), codesUrl = el('codesUrl');
|
| const preloadBtn = el('preloadBtn'), loadCodesBtn = el('loadCodesBtn'), clearCacheBtn = el('clearCacheBtn');
|
| const preloadStatus = el('preloadStatus'), dlBar = el('dlBar'), dlText = el('dlText');
|
| const providerSel = el('providerSel'), modeSel = el('modeSel');
|
| const hopFrames = el('hopFrames'), keepFrames = el('keepFrames'), xfadeMs = el('xfadeMs'), winFrames = el('winFrames');
|
| const sequentialChk = el('sequentialChk');
|
| const snacIn = el('snacIn'), player = el('player'), metrics = el('metrics'), logbox = el('log');
|
| const genBtn = el('generateBtn');
|
|
|
| const log = (...a)=>{ console.log(...a); logbox.textContent += a.join(' ') + '\n'; logbox.scrollTop = logbox.scrollHeight; }
|
| const fmt = o => JSON.stringify(o, null, 2);
|
|
|
|
|
| const DB='snac-cache', STORE='files';
|
| function idb(){ return new Promise((res,rej)=>{ const r=indexedDB.open(DB,1); r.onupgradeneeded=()=>r.result.createObjectStore(STORE); r.onsuccess=()=>res(r.result); r.onerror=()=>rej(r.error); }); }
|
| async function idbGet(k){ const db=await idb(); return new Promise((res,rej)=>{ const tx=db.transaction(STORE,'readonly'); const rq=tx.objectStore(STORE).get(k); rq.onsuccess=()=>res(rq.result||null); rq.onerror=()=>rej(rq.error); }); }
|
| async function idbSet(k,v){ const db=await idb(); return new Promise((res,rej)=>{ const tx=db.transaction(STORE,'readwrite'); tx.objectStore(STORE).put(v,k); tx.oncomplete=()=>res(true); tx.onerror=()=>rej(tx.error); }); }
|
| async function idbDel(k){ const db=await idb(); return new Promise((res,rej)=>{ const tx=db.transaction(STORE,'readwrite'); tx.objectStore(STORE).delete(k); tx.oncomplete=()=>res(true); tx.onerror=()=>rej(tx.error); }); }
|
| clearCacheBtn.onclick = async () => { await idbDel(modelUrl.value.trim()); log('Cache cleared for', modelUrl.value.trim()); };
|
|
|
|
|
| async function fetchWithProgress(url, onProg){
|
| const r = await fetch(url); if(!r.ok) throw new Error(`HTTP ${r.status} for ${url}`);
|
| const len = Number(r.headers.get('Content-Length'))||0;
|
| if(!r.body || !window.ReadableStream){ const buf=await r.arrayBuffer(); onProg?.(buf.byteLength,len); return buf; }
|
| const reader=r.body.getReader(); const chunks=[]; let got=0;
|
| for(;;){ const {done,value}=await reader.read(); if(done) break; chunks.push(value); got+=value.byteLength; onProg?.(got,len); }
|
| const out=new Uint8Array(got); let off=0; for(const c of chunks){ out.set(c,off); off+=c.byteLength; } return out.buffer;
|
| }
|
|
|
|
|
| const coi = (typeof crossOriginIsolated!=='undefined') ? crossOriginIsolated : false;
|
| ort.env.wasm.simd = true;
|
| ort.env.wasm.numThreads = coi ? (navigator.hardwareConcurrency||4) : 1;
|
|
|
|
|
| let session=null, sessionEP=null;
|
| async function preloadModel(){
|
| try{
|
| const url = modelUrl.value.trim();
|
| preloadBtn.disabled = true; dlBar.style.width='0%'; dlText.textContent=''; preloadStatus.textContent='checking cache…';
|
| let buf = await idbGet(url);
|
| if(buf){ preloadStatus.textContent='cache hit'; dlBar.style.width='100%'; dlText.textContent='Loaded from IndexedDB'; }
|
| else{
|
| preloadStatus.textContent='downloading…';
|
| buf = await fetchWithProgress(url, (got,total)=>{
|
| const pct = total ? Math.round(100*got/total) : 0;
|
| dlBar.style.width = `${pct}%`;
|
| dlText.textContent = total ? `Downloading: ${pct}% (${(got/1e6).toFixed(1)} / ${(total/1e6).toFixed(1)} MB)` :
|
| `Downloading: ${(got/1e6).toFixed(1)} MB`;
|
| });
|
| await idbSet(url, buf); log('Cached model to IndexedDB');
|
| }
|
| const want = (providerSel.value==='webgpu' && 'gpu' in navigator) ? ['webgpu','wasm'] : ['wasm'];
|
| const t0=performance.now();
|
| preloadStatus.textContent='compiling…';
|
| session = await ort.InferenceSession.create(buf, { executionProviders: want, graphOptimizationLevel: 'all' });
|
| const t1=performance.now(); sessionEP = session.executionProvider ?? want[0];
|
| preloadStatus.textContent=`ready (${(t1-t0).toFixed(1)} ms) via ${sessionEP}`;
|
| log('Session ready. EP:', sessionEP, 'compile_ms:', (t1-t0).toFixed(1));
|
| }catch(e){ log('Preload error:', e); }
|
| finally{ preloadBtn.disabled=false; }
|
| }
|
| preloadBtn.onclick = preloadModel;
|
| window.addEventListener('load', ()=>preloadModel().catch(e=>log('Preload error:',e)));
|
|
|
|
|
| loadCodesBtn.onclick = async ()=>{
|
| try{ const r=await fetch(codesUrl.value.trim()); if(!r.ok) throw new Error(`HTTP ${r.status}`);
|
| const txt=await r.text(); snacIn.value = txt.trim(); log('Loaded codes text.'); }
|
| catch(e){ log('Load codes error:', e); }
|
| };
|
|
|
|
|
| const SPF = 512, SR = 24000;
|
| function unflatten(flat,L0,A=0,K=4096){
|
| const L1=2*L0, L2=4*L0;
|
| const c0=new BigInt64Array(L0), c1=new BigInt64Array(L1), c2=new BigInt64Array(L2);
|
| const bA=BigInt(A), bK=BigInt(K), mod=v=>((v%bK)+bK)%bK;
|
| for(let i=0;i<L0;i++){
|
| const v0=BigInt(flat[7*i+0])-(bA+0n*bK), v1=BigInt(flat[7*i+1])-(bA+1n*bK);
|
| const v2=BigInt(flat[7*i+2])-(bA+2n*bK), v3=BigInt(flat[7*i+3])-(bA+3n*bK);
|
| const v4=BigInt(flat[7*i+4])-(bA+4n*bK), v5=BigInt(flat[7*i+5])-(bA+5n*bK), v6=BigInt(flat[7*i+6])-(bA+6n*bK);
|
| c0[i]=mod(v0); c1[2*i]=mod(v1); c2[4*i]=mod(v2); c2[4*i+1]=mod(v3);
|
| c1[2*i+1]=mod(v4); c2[4*i+2]=mod(v5); c2[4*i+3]=mod(v6);
|
| }
|
| return {c0,c1,c2};
|
| }
|
| function sliceEdgePad(src,start,len){
|
| const T=src.length, out=new BigInt64Array(len);
|
| for(let i=0;i<len;i++){ let j=start+i; if(j<0) j=0; if(j>=T) j=T-1; out[i]=src[j]; }
|
| return out;
|
| }
|
| function concatFloat32(a,b){ const out=new Float32Array(a.length+b.length); out.set(a,0); out.set(b,a.length); return out; }
|
|
|
|
|
| async function generate(){
|
| try{
|
| genBtn.disabled=true; metrics.textContent=''; logbox.textContent='';
|
| if(!session) await preloadModel();
|
|
|
| const blob = JSON.parse(snacIn.value.trim());
|
| const A = blob.audio_tokens_start ?? 0;
|
| const K = blob.codebook_size ?? 4096;
|
| const L0 = blob.lengths?.L0 ?? Math.floor((blob.flattened.length)/7);
|
| const flat = blob.flattened; if(!Array.isArray(flat)) throw new Error("flattened missing");
|
| const {c0,c1,c2} = unflatten(flat, L0, A, K);
|
| const L2=c2.length, T_true=L2*SPF;
|
| log(`Parsed L0/L1/L2 = ${L0}/${c1.length}/${L2} -> true samples ${T_true}`);
|
|
|
|
|
| const s = blob.streaming || {};
|
| const K2 = Number(winFrames.value)||48;
|
| const H2 = Number(hopFrames.value)||Number(s.hop_frames||40);
|
| const keepF = Number(keepFrames.value)||Number(s.center_keep_frames||40);
|
| const leftCtx = Number(s.left_ctx_frames ?? ((K2-keepF)/2));
|
| const xfade = Number(xfadeMs.value ?? s.xfade_ms_default ?? 12);
|
| const sequential = !!sequentialChk.checked;
|
| const overlapSec = sequential ? 0 : (xfade/1000);
|
|
|
| const SAFETY = 0.040;
|
| const ctx = new (window.AudioContext||window.webkitAudioContext)({sampleRate: SR});
|
| let playClock = ctx.currentTime + 0.10;
|
| let scheduledEnd = null;
|
| let windows=0, samples=0; const t0=performance.now();
|
|
|
| async function runWindow(c0w,c1w,c2w, keepStart, keepEnd){
|
| const feeds = {
|
| codes0: new ort.Tensor('int64', c0w,[1,c0w.length]),
|
| codes1: new ort.Tensor('int64', c1w,[1,c1w.length]),
|
| codes2: new ort.Tensor('int64', c2w,[1,c2w.length]),
|
| };
|
| const tA=performance.now();
|
| const out = await session.run(feeds);
|
| const tB=performance.now();
|
| const audio = out.audio.data;
|
| const kept = audio.subarray(keepStart, keepEnd);
|
| const segSec = kept.length / SR;
|
| samples += kept.length;
|
|
|
|
|
| const buf = ctx.createBuffer(1, kept.length, SR);
|
| buf.copyToChannel(kept, 0, 0);
|
| const src = ctx.createBufferSource(); src.buffer = buf;
|
| const g = ctx.createGain(); g.gain.setValueAtTime(1, ctx.currentTime);
|
| src.connect(g).connect(ctx.destination);
|
|
|
|
|
| const desiredStart = (scheduledEnd==null) ? playClock : (sequential ? scheduledEnd : scheduledEnd - overlapSec);
|
| const now = ctx.currentTime;
|
| const startAt = Math.max(desiredStart, now + SAFETY);
|
| const endAt = startAt + segSec;
|
|
|
|
|
| if(!sequential && scheduledEnd!==null && overlapSec>0){
|
| const prevDropStart = startAt;
|
| const prevDropEnd = Math.min(startAt + overlapSec, scheduledEnd);
|
|
|
| const prev = lastGainNode;
|
| if(prev){
|
| prev.gain.setValueAtTime(1, prevDropStart);
|
| prev.gain.linearRampToValueAtTime(0, prevDropEnd);
|
| }
|
|
|
| g.gain.setValueAtTime(0, startAt);
|
| g.gain.linearRampToValueAtTime(1, startAt + overlapSec);
|
| }
|
|
|
| src.start(startAt);
|
| windows += 1;
|
| scheduledEnd = endAt;
|
| lastGainNode = g;
|
|
|
| const late = Math.max(0, (now + SAFETY) - desiredStart);
|
| log(`win#${windows} infer ${(tB-tA).toFixed(2)} ms now ${now.toFixed(3)} desired ${desiredStart.toFixed(3)} start ${startAt.toFixed(3)} end ${endAt.toFixed(3)} overlap ${sequential?0:(overlapSec*1000)} ms late ${ (late*1000).toFixed(1)} ms`);
|
| }
|
|
|
|
|
| function* fixedWindows(){
|
| const nWin=Math.ceil(L2/48);
|
| for(let i=0;i<nWin;i++){
|
| const s2=i*48,s1=i*24,s0=i*12;
|
| yield { c0w:sliceEdgePad(c0,s0,12), c1w:sliceEdgePad(c1,s1,24), c2w:sliceEdgePad(c2,s2,48),
|
| keepStart:0, keepEnd:48*SPF };
|
| }
|
| }
|
| function* slidingCenterWindows(){
|
| let s2=0,i=0; const keepStart = ((K2-keepF)/2)*SPF, keepEnd=((K2-keepF)/2+keepF)*SPF;
|
| while(s2 < L2 || i===0){
|
| const s1=Math.floor(s2/2), s0=Math.floor(s2/4);
|
| yield { c0w:sliceEdgePad(c0,s0,K2/4), c1w:sliceEdgePad(c1,s1,K2/2), c2w:sliceEdgePad(c2,s2,K2),
|
| keepStart, keepEnd };
|
| s2 += (modeSel.value==='fixed'?48:H2); i++; if(s2>=L2 && modeSel.value!=='fixed') break;
|
| }
|
| }
|
|
|
|
|
| const mode=modeSel.value;
|
| let gen;
|
| if(mode==='fixed') gen = fixedWindows;
|
| else gen = slidingCenterWindows;
|
|
|
|
|
| let lastGainNode = null;
|
| if(mode==='whole'){
|
|
|
| let full = new Float32Array(0);
|
| for(const {c0w,c1w,c2w,keepStart,keepEnd} of slidingCenterWindows()){
|
| const out = await session.run({
|
| codes0:new ort.Tensor('int64', c0w,[1,c0w.length]),
|
| codes1:new ort.Tensor('int64', c1w,[1,c1w.length]),
|
| codes2:new ort.Tensor('int64', c2w,[1,c2w.length]),
|
| });
|
| const a = out.audio.data.subarray(keepStart, keepEnd);
|
| full = concatFloat32(full, a);
|
| windows++; samples += a.length;
|
| }
|
| full = full.subarray(0, T_true);
|
| const wav = pcm16Wav(full, SR);
|
| player.src = URL.createObjectURL(new Blob([wav], {type:'audio/wav'}));
|
| await player.play().catch(()=>{});
|
| } else {
|
| for(const w of gen()){
|
| await runWindow(w.c0w, w.c1w, w.c2w, w.keepStart, w.keepEnd);
|
| }
|
| }
|
|
|
| const t1=performance.now();
|
| const r = {
|
| usedEP: sessionEP || providerSel.value,
|
| threads: ort.env.wasm.numThreads||1,
|
| simd: ort.env.wasm.simd===true,
|
| coi, windows, samples,
|
| audio_seconds: samples/SR,
|
| inference_ms: (t1-t0),
|
| rtf: ( (samples/SR) / ((t1-t0)/1000) ).toFixed(3)
|
| };
|
| metrics.textContent = fmt({env:{coi, hwc:navigator.hardwareConcurrency||1}, providers:['webgpu','wasm']}) + "\n" + fmt(r);
|
| log('Done.', r);
|
|
|
| function pcm16Wav(float32, sr){
|
| const clamp=v=>Math.max(-1,Math.min(1,v));
|
| const pcm=new Int16Array(float32.length); for(let i=0;i<float32.length;i++) pcm[i]=Math.round(clamp(float32[i])*32767);
|
| const bytes=44+pcm.length*2, buf=new ArrayBuffer(bytes), dv=new DataView(buf); let p=0, w=s=>{ for(let i=0;i<s.length;i++) dv.setUint8(p++, s.charCodeAt(i)); };
|
| w('RIFF'); dv.setUint32(p,bytes-8,true); p+=4; w('WAVE'); w('fmt '); dv.setUint32(p,16,true); p+=4;
|
| dv.setUint16(p,1,true); p+=2; dv.setUint16(p,1,true); p+=2; dv.setUint32(p,sr,true); p+=4;
|
| dv.setUint32(p,sr*2,true); p+=4; dv.setUint16(p,2,true); p+=2; dv.setUint16(p,16,true); p+=2;
|
| w('data'); dv.setUint32(p, pcm.length*2, true); p+=4; new Uint8Array(buf).set(new Uint8Array(pcm.buffer),44); return new Uint8Array(buf);
|
| }
|
| }catch(e){ console.error(e); log('ERROR:', e.message||e); }
|
| finally{ genBtn.disabled=false; }
|
| }
|
|
|
| genBtn.onclick = generate;
|
| })();
|
| </script>
|
| </body>
|
| </html>
|
|
|