Reza2kn's picture
Fix PhaseB live fbank normalization
d8f67ea verified
Raw
History Blame Contribute Delete
24.1 kB
const MODEL_REPO = "Reza2kn/visualears-fastconformer-fa-depoisoned-phaseB-onnx-fp16";
const MODEL_FILE = "fastconformer_phaseB_ctc_fixed2005_len_fp16_iofp32.onnx";
const MODEL_DATA_FILE = "fastconformer_phaseB_ctc_fixed2005_len_fp16_iofp32.onnx.data";
const MODEL_EMBEDDED_FILE = "fastconformer_phaseB_ctc_fixed2005_len_fp16_iofp32_embedded.onnx";
const MODEL_URL = `https://huggingface.co/${MODEL_REPO}/resolve/main/${MODEL_FILE}`;
const MODEL_DATA_URL = `https://huggingface.co/${MODEL_REPO}/resolve/main/${MODEL_DATA_FILE}`;
const MODEL_EMBEDDED_URL = `https://huggingface.co/${MODEL_REPO}/resolve/main/${MODEL_EMBEDDED_FILE}`;
const ORT_WEBGPU_VERSION = "1.26.0";
const ORT_COMPAT_VERSION = "1.18.0";
const SAMPLE_RATE = 16000;
const N_FFT = 512;
const WIN_LENGTH = 400;
const HOP_LENGTH = 160;
const N_MELS = 80;
const FIXED_FRAMES = 2005;
const OUTPUT_STRIDE = 8;
const PREEMPHASIS = 0.0;
const CENTER_PAD = N_FFT / 2;
const WINDOW_OFFSET = (N_FFT - WIN_LENGTH) / 2;
const LOG_ZERO_GUARD = 2 ** -24;
const $ = (id) => document.getElementById(id);
let ortRuntime = null;
let ortCompatRuntime = null;
let wasmFeaturePromise = null;
const state = {
session: null,
tokens: null,
blankId: 1024,
audioContext: null,
source: null,
processor: null,
mediaStream: null,
utterancePcm: new Float32Array(0),
finalTranscript: "",
partialTranscript: "",
speechActive: false,
silenceMs: 0,
finalizing: false,
recording: false,
decoding: false,
decodeTimer: null,
lastDecodeAt: 0,
fftPlan: null,
melFilters: null,
gpuName: "-",
ort: null,
};
function setStatus(message) {
$("status").textContent = message;
}
function setText(id, value) {
$(id).textContent = value;
}
function formatMs(ms) {
if (!Number.isFinite(ms)) return "-";
return ms < 1000 ? `${ms.toFixed(0)} ms` : `${(ms / 1000).toFixed(2)} s`;
}
function updateHeap() {
const mem = performance.memory;
if (!mem) {
setText("stat-heap", "unavailable");
return;
}
setText("stat-heap", `${(mem.usedJSHeapSize / 1048576).toFixed(0)} / ${(mem.jsHeapSizeLimit / 1048576).toFixed(0)} MB`);
}
function float32ToFloat16Bits(value) {
if (Number.isNaN(value)) return 0x7e00;
if (value === Infinity) return 0x7c00;
if (value === -Infinity) return 0xfc00;
const sign = value < 0 || Object.is(value, -0) ? 0x8000 : 0;
const abs = Math.abs(value);
if (abs === 0) return sign;
if (abs >= 65504) return sign | 0x7bff;
if (abs < 5.960464477539063e-8) return sign;
if (abs < 0.00006103515625) {
return sign | Math.round(abs / 5.960464477539063e-8);
}
let exponent = Math.floor(Math.log2(abs));
let mantissa = abs / (2 ** exponent) - 1;
let halfExponent = exponent + 15;
let halfMantissa = Math.round(mantissa * 1024);
if (halfMantissa === 1024) {
halfMantissa = 0;
halfExponent += 1;
}
if (halfExponent >= 31) return sign | 0x7bff;
return sign | (halfExponent << 10) | (halfMantissa & 0x03ff);
}
function float32ArrayToFloat16Bits(values) {
if (globalThis.Float16Array) return new globalThis.Float16Array(values);
const out = new Uint16Array(values.length);
for (let i = 0; i < values.length; i++) out[i] = float32ToFloat16Bits(values[i]);
return out;
}
function float16BitsToFloat32(bits) {
const sign = (bits & 0x8000) ? -1 : 1;
const exponent = (bits >> 10) & 0x1f;
const mantissa = bits & 0x03ff;
if (exponent === 0) {
return mantissa === 0 ? sign * 0 : sign * (mantissa / 1024) * 2 ** -14;
}
if (exponent === 31) {
return mantissa ? NaN : sign * Infinity;
}
return sign * (1 + mantissa / 1024) * 2 ** (exponent - 15);
}
function tensorValue(data, index, type) {
if (type !== "float16") return data[index];
if (globalThis.Float16Array && data instanceof globalThis.Float16Array) return data[index];
return float16BitsToFloat32(data[index]);
}
function renderTranscript() {
$("transcript").textContent = `Final:\n${state.finalTranscript || "..."}\n\nPartial:\n${state.partialTranscript || "..."}`;
}
function appendToUtterance(chunk) {
const maxSeconds = Number($("window-seconds")?.value || 20);
const maxSamples = Math.ceil(maxSeconds * SAMPLE_RATE);
const merged = new Float32Array(Math.min(maxSamples, state.utterancePcm.length + chunk.length));
const keep = Math.max(0, merged.length - chunk.length);
if (keep > 0) merged.set(state.utterancePcm.subarray(state.utterancePcm.length - keep), 0);
merged.set(chunk.subarray(Math.max(0, chunk.length - merged.length)), keep);
state.utterancePcm = merged;
}
function resampleLinear(input, fromRate, toRate) {
if (fromRate === toRate) return new Float32Array(input);
const outLen = Math.max(1, Math.round(input.length * toRate / fromRate));
const output = new Float32Array(outLen);
const ratio = (input.length - 1) / Math.max(1, outLen - 1);
for (let i = 0; i < outLen; i++) {
const x = i * ratio;
const j = Math.floor(x);
const frac = x - j;
output[i] = input[j] * (1 - frac) + input[Math.min(j + 1, input.length - 1)] * frac;
}
return output;
}
function hzToMel(hz) {
return 2595 * Math.log10(1 + hz / 700);
}
function melToHz(mel) {
return 700 * (10 ** (mel / 2595) - 1);
}
function createMelFilters() {
if (!Array.isArray(window.VISUALEARS_MEL_FILTERS) || window.VISUALEARS_MEL_FILTERS.length !== N_MELS) {
throw new Error("Missing embedded Slaney mel filterbank");
}
return window.VISUALEARS_MEL_FILTERS.map((row) => Float32Array.from(row));
}
function createFftPlan(n) {
const cos = new Float32Array(n / 2);
const sin = new Float32Array(n / 2);
for (let i = 0; i < n / 2; i++) {
cos[i] = Math.cos(-2 * Math.PI * i / n);
sin[i] = Math.sin(-2 * Math.PI * i / n);
}
return { n, cos, sin };
}
function fftRealPower(frame, plan) {
const n = plan.n;
const re = new Float32Array(n);
const im = new Float32Array(n);
re.set(frame);
let j = 0;
for (let i = 1; i < n; i++) {
let bit = n >> 1;
for (; j & bit; bit >>= 1) j ^= bit;
j ^= bit;
if (i < j) {
const tr = re[i]; re[i] = re[j]; re[j] = tr;
const ti = im[i]; im[i] = im[j]; im[j] = ti;
}
}
for (let len = 2; len <= n; len <<= 1) {
const half = len >> 1;
const step = n / len;
for (let i = 0; i < n; i += len) {
for (let k = 0; k < half; k++) {
const idx = k * step;
const wr = plan.cos[idx];
const wi = plan.sin[idx];
const ur = re[i + k];
const ui = im[i + k];
const vr = re[i + k + half] * wr - im[i + k + half] * wi;
const vi = re[i + k + half] * wi + im[i + k + half] * wr;
re[i + k] = ur + vr;
im[i + k] = ui + vi;
re[i + k + half] = ur - vr;
im[i + k + half] = ui - vi;
}
}
}
const power = new Float32Array(n / 2 + 1);
for (let i = 0; i < power.length; i++) power[i] = re[i] * re[i] + im[i] * im[i];
return power;
}
function reflectIndex(index, length) {
if (length <= 1) return 0;
while (index < 0 || index >= length) {
if (index < 0) index = -index;
if (index >= length) index = 2 * length - index - 2;
}
return index;
}
function pcmToLogMel(pcm) {
if (!state.fftPlan) state.fftPlan = createFftPlan(N_FFT);
if (!state.melFilters) state.melFilters = createMelFilters();
const maxSamples = (FIXED_FRAMES - 1) * HOP_LENGTH;
if (pcm.length > maxSamples) pcm = pcm.subarray(pcm.length - maxSamples);
const frameCount = Math.max(1, Math.min(FIXED_FRAMES, Math.floor(pcm.length / HOP_LENGTH) + 1));
const features = new Float32Array(N_MELS * FIXED_FRAMES);
const hann = new Float32Array(WIN_LENGTH);
for (let i = 0; i < WIN_LENGTH; i++) hann[i] = 0.5 - 0.5 * Math.cos(2 * Math.PI * i / (WIN_LENGTH - 1));
const emphasized = new Float32Array(Math.max(1, pcm.length));
if (pcm.length > 0) emphasized[0] = pcm[0];
for (let i = 1; i < pcm.length; i++) emphasized[i] = pcm[i] - PREEMPHASIS * pcm[i - 1];
for (let t = 0; t < frameCount; t++) {
const frame = new Float32Array(N_FFT);
const frameStart = t * HOP_LENGTH - CENTER_PAD;
for (let j = 0; j < N_FFT; j++) {
const winIndex = j - WINDOW_OFFSET;
if (winIndex < 0 || winIndex >= WIN_LENGTH) continue;
const src = reflectIndex(frameStart + j, emphasized.length);
frame[j] = emphasized[src] * hann[winIndex];
}
const power = fftRealPower(frame, state.fftPlan);
for (let m = 0; m < N_MELS; m++) {
const filter = state.melFilters[m];
let energy = 0;
for (let k = 0; k < filter.length; k++) energy += power[k] * filter[k];
features[m * FIXED_FRAMES + t] = Math.log(energy + LOG_ZERO_GUARD);
}
}
for (let m = 0; m < N_MELS; m++) {
const offset = m * FIXED_FRAMES;
let mean = 0;
for (let t = 0; t < frameCount; t++) mean += features[offset + t];
mean /= frameCount;
let variance = 0;
for (let t = 0; t < frameCount; t++) {
const d = features[offset + t] - mean;
variance += d * d;
}
const scale = 1 / Math.sqrt(variance / frameCount + 1e-5);
for (let t = 0; t < frameCount; t++) features[offset + t] = (features[offset + t] - mean) * scale;
}
return { features, frameCount };
}
function isSpecialToken(piece) {
return piece.startsWith("<") && piece.endsWith(">");
}
function decodeCtc(logits, timeSteps, vocabSize, logitsType = "float32") {
const tokens = state.tokens;
const blank = state.blankId;
let previous = -1;
const pieces = [];
for (let t = 0; t < timeSteps; t++) {
let best = 0;
let bestValue = -Infinity;
const base = t * vocabSize;
for (let i = 0; i < vocabSize; i++) {
const v = tensorValue(logits, base + i, logitsType);
if (v > bestValue) {
bestValue = v;
best = i;
}
}
const piece = tokens[best] || "";
if (best !== blank && best !== previous && piece && !isSpecialToken(piece)) pieces.push(piece);
previous = best;
}
return pieces.join("").replaceAll("▁", " ").replace(/\s+/g, " ").trim();
}
async function inspectGpu() {
if (!navigator.gpu) {
state.gpuName = "not exposed";
setText("stat-gpu", state.gpuName);
return;
}
try {
const adapter = await navigator.gpu.requestAdapter();
if (!adapter) {
state.gpuName = "unavailable";
} else if (adapter.info) {
state.gpuName = [adapter.info.vendor, adapter.info.architecture, adapter.info.device].filter(Boolean).join(" ") || "available";
} else if (adapter.requestAdapterInfo) {
const info = await adapter.requestAdapterInfo();
state.gpuName = [info.vendor, info.architecture, info.device].filter(Boolean).join(" ") || "available";
} else {
state.gpuName = "available";
}
} catch (err) {
state.gpuName = `unavailable: ${err.message}`;
}
setText("stat-gpu", state.gpuName);
}
async function loadCompatOrtRuntime() {
if (ortCompatRuntime) return ortCompatRuntime;
await new Promise((resolve, reject) => {
const existing = document.getElementById("ort-compat-wasm-js");
if (existing) {
existing.addEventListener("load", resolve, { once: true });
existing.addEventListener("error", () => reject(new Error("Compatibility ONNX Runtime script failed")), { once: true });
return;
}
const script = document.createElement("script");
script.id = "ort-compat-wasm-js";
script.async = true;
script.crossOrigin = "anonymous";
script.src = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${ORT_COMPAT_VERSION}/dist/ort.wasm.min.js`;
script.onload = resolve;
script.onerror = () => reject(new Error("Compatibility ONNX Runtime script failed"));
document.head.appendChild(script);
});
if (!window.ort) throw new Error("Compatibility ONNX Runtime did not initialize");
ortCompatRuntime = window.ort;
return ortCompatRuntime;
}
async function detectWasmFeatures() {
if (wasmFeaturePromise) return wasmFeaturePromise;
wasmFeaturePromise = Promise.resolve().then(() => {
const simdProbe = new Uint8Array([
0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00,
0x01, 0x05, 0x01, 0x60, 0x00, 0x01, 0x7b,
0x03, 0x02, 0x01, 0x00,
0x0a, 0x0a, 0x01, 0x08, 0x00, 0x41, 0x00, 0xfd, 0x0f, 0xfd, 0x62, 0x0b,
]);
return {
simd: typeof WebAssembly !== "undefined" && WebAssembly.validate(simdProbe),
threads: typeof Atomics !== "undefined" && typeof SharedArrayBuffer !== "undefined" && !!window.crossOriginIsolated,
};
}).catch(() => ({ simd: false, threads: false }));
return wasmFeaturePromise;
}
function configureWasm(ort, provider, wasmMode, features) {
const compatWasm = provider === "wasm";
const version = compatWasm ? ORT_COMPAT_VERSION : ORT_WEBGPU_VERSION;
ort.env.wasm.wasmPaths = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${version}/dist/`;
if (compatWasm) {
ort.env.wasm.simd = wasmMode !== "nosimd" && !!features?.simd;
ort.env.wasm.numThreads = features?.threads ? Math.max(1, Math.min(4, navigator.hardwareConcurrency || 1)) : 1;
} else {
ort.env.wasm.numThreads = Math.max(1, Math.min(4, navigator.hardwareConcurrency || 1));
}
if (ort.env.webgpu) {
ort.env.webgpu.profiling = false;
}
return provider;
}
async function createSession(provider, wasmMode = "optimized") {
const compatWasm = provider === "wasm";
const features = compatWasm ? await detectWasmFeatures() : null;
const effectiveWasmMode = compatWasm && (!features.simd || wasmMode === "nosimd") ? "nosimd" : "optimized";
const ort = compatWasm ? await loadCompatOrtRuntime() : ortRuntime;
if (!ort) throw new Error("ONNX Runtime Web did not initialize");
const effectiveProvider = configureWasm(ort, provider, effectiveWasmMode, features);
const sessionOptions = {
executionProviders: [effectiveProvider],
graphOptimizationLevel: "all",
enableMemPattern: false,
enableCpuMemArena: true,
};
if (compatWasm) {
return {
provider: effectiveWasmMode === "nosimd" ? "wasm-nosimd" : features.threads ? "wasm-simd-threaded" : "wasm-simd",
session: await ort.InferenceSession.create(MODEL_EMBEDDED_URL, sessionOptions),
ort,
};
}
return {
provider: effectiveProvider,
session: await ort.InferenceSession.create(MODEL_URL, {
...sessionOptions,
externalData: [
{
path: MODEL_DATA_FILE,
data: MODEL_DATA_URL,
},
],
}),
ort,
};
}
async function loadModel() {
$("load-model").disabled = true;
setStatus("Loading tokenizer and ONNX Runtime WebGPU...");
updateHeap();
await inspectGpu();
const tokenJson = window.VISUALEARS_TOKENS;
if (!tokenJson?.tokens?.length) throw new Error("Missing embedded CTC tokens");
state.tokens = tokenJson.tokens;
state.blankId = tokenJson.blank_id;
const provider = $("provider").value;
const start = performance.now();
let effectiveProvider = provider;
try {
const noGpu = provider === "webgpu" && !navigator.gpu;
if (noGpu) setStatus("WebGPU is unavailable here. Opening the best supported CPU WASM path...");
else if (provider === "wasm") setStatus("Opening the best supported CPU WASM path. First load is about 232 MB; cached reloads should be much faster.");
else setStatus(`Downloading/opening ${provider.toUpperCase()} PhaseB FP16 session. First load is about 232 MB; cached reloads should be much faster.`);
const result = await createSession(noGpu ? "wasm" : provider);
state.session = result.session;
state.ort = result.ort;
effectiveProvider = result.provider;
} catch (err) {
const message = err && err.message ? err.message : String(err || "unknown error");
setStatus(`Primary load failed (${message}). Retrying CPU-only WASM without SIMD...`);
const result = await createSession("wasm", "nosimd");
state.session = result.session;
state.ort = result.ort;
effectiveProvider = result.provider;
}
const elapsed = performance.now() - start;
state.provider = effectiveProvider;
setText("stat-provider", effectiveProvider);
setText("stat-decode", "-");
setText("stat-rtf", "-");
setText("stat-speed", "-");
updateHeap();
setStatus(`Model ready in ${formatMs(elapsed)} (${effectiveProvider}). Start the microphone when you are ready.`);
$("start-mic").disabled = false;
}
async function startMic() {
if (!state.session) return;
state.mediaStream = await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
},
});
state.audioContext = new AudioContext({ sampleRate: SAMPLE_RATE });
state.source = state.audioContext.createMediaStreamSource(state.mediaStream);
state.processor = state.audioContext.createScriptProcessor(4096, 1, 1);
state.source.connect(state.processor);
state.processor.connect(state.audioContext.destination);
state.utterancePcm = new Float32Array(0);
state.finalTranscript = "";
state.partialTranscript = "";
state.speechActive = false;
state.silenceMs = 0;
state.finalizing = false;
renderTranscript();
state.recording = true;
$("start-mic").disabled = true;
$("stop-mic").disabled = false;
setStatus("Listening. Partial text can move while you speak; final text freezes after silence.");
state.processor.onaudioprocess = (event) => {
if (!state.recording) return;
const input = event.inputBuffer.getChannelData(0);
let rms = 0;
for (let i = 0; i < input.length; i++) rms += input[i] * input[i];
rms = Math.sqrt(rms / input.length);
$("level-bar").style.width = `${Math.min(100, rms * 900).toFixed(1)}%`;
const gate = Number($("noise-gate").value);
const speechy = rms >= gate;
const chunk = resampleLinear(input, state.audioContext.sampleRate, SAMPLE_RATE);
const chunkMs = chunk.length / SAMPLE_RATE * 1000;
if (speechy) {
if (!state.speechActive) {
state.utterancePcm = new Float32Array(0);
state.partialTranscript = "";
state.silenceMs = 0;
state.speechActive = true;
state.finalizing = false;
renderTranscript();
}
state.silenceMs = 0;
appendToUtterance(chunk);
return;
}
if (!state.speechActive) return;
state.silenceMs += chunkMs;
if (state.silenceMs <= 350) appendToUtterance(chunk);
if (state.silenceMs >= 900 && !state.finalizing) {
state.finalizing = true;
finalizeUtterance();
}
};
scheduleDecode();
}
function stopMic() {
state.recording = false;
clearTimeout(state.decodeTimer);
state.decodeTimer = null;
if (state.processor) state.processor.disconnect();
if (state.source) state.source.disconnect();
if (state.mediaStream) state.mediaStream.getTracks().forEach((track) => track.stop());
if (state.audioContext) state.audioContext.close();
state.processor = null;
state.source = null;
state.mediaStream = null;
state.audioContext = null;
$("level-bar").style.width = "0%";
$("start-mic").disabled = !state.session;
$("stop-mic").disabled = true;
setStatus("Stopped.");
}
function scheduleDecode() {
clearTimeout(state.decodeTimer);
if (!state.recording) return;
const delay = Number($("decode-every").value) * 1000;
state.decodeTimer = setTimeout(async () => {
await runDecode();
scheduleDecode();
}, delay);
}
async function runDecode() {
if (!state.session || state.decoding || state.finalizing || !state.speechActive || state.utterancePcm.length < SAMPLE_RATE * 0.8) return;
await decodeUtterance(false);
}
async function finalizeUtterance() {
if (state.decoding) {
setTimeout(finalizeUtterance, 150);
return;
}
if (!state.session || state.utterancePcm.length < SAMPLE_RATE * 0.25) {
state.speechActive = false;
state.finalizing = false;
return;
}
await decodeUtterance(true);
}
async function decodeUtterance(final) {
state.decoding = true;
try {
const maxSeconds = Number($("window-seconds").value);
const samples = Math.min(state.utterancePcm.length, Math.floor(maxSeconds * SAMPLE_RATE));
const sourceAudio = state.utterancePcm.subarray(state.utterancePcm.length - samples);
const minLiveSamples = Math.floor(2.5 * SAMPLE_RATE);
const audio = sourceAudio.length >= minLiveSamples ? sourceAudio : (() => {
const padded = new Float32Array(minLiveSamples);
padded.set(sourceAudio, 0);
return padded;
})();
const preprocessStarted = performance.now();
const { features, frameCount } = pcmToLogMel(audio);
const ort = state.ort || ortRuntime;
if (!ort) throw new Error("ONNX Runtime Web did not initialize");
const tensor = new ort.Tensor("float32", features, [1, N_MELS, FIXED_FRAMES]);
const lengthTensor = new ort.Tensor("int64", BigInt64Array.from([BigInt(frameCount)]), [1]);
const inferStarted = performance.now();
const output = await state.session.run({ processed_signal: tensor, processed_signal_length: lengthTensor });
const inferElapsed = performance.now() - inferStarted;
const elapsed = performance.now() - preprocessStarted;
const logits = output.logits.data;
const dims = output.logits.dims;
const logitsType = output.logits.type;
const vocabSize = dims[2] || state.tokens.length;
const encodedRaw = output.encoded_lengths?.data?.[0];
const encodedLength = encodedRaw === undefined ? Math.ceil(frameCount / OUTPUT_STRIDE) : Number(encodedRaw);
const usableSteps = Math.max(1, Math.min(dims[1], encodedLength));
const transcript = decodeCtc(logits, usableSteps, vocabSize, logitsType);
const audioSeconds = samples / SAMPLE_RATE;
const rtf = (elapsed / 1000) / Math.max(0.001, audioSeconds);
const speed = Math.max(0.001, audioSeconds) / Math.max(0.001, elapsed / 1000);
if (final) {
if (transcript) {
state.finalTranscript = state.finalTranscript ? `${state.finalTranscript}\n${transcript}` : transcript;
}
state.partialTranscript = "";
state.speechActive = false;
state.finalizing = false;
state.silenceMs = 0;
state.utterancePcm = new Float32Array(0);
} else {
state.partialTranscript = transcript;
}
renderTranscript();
setText("stat-decode", `${formatMs(elapsed)} total / ${formatMs(inferElapsed)} infer`);
setText("stat-rtf", `${rtf.toFixed(3)}`);
setText("stat-speed", `${speed.toFixed(1)}x`);
setText("stat-audio", `${audioSeconds.toFixed(1)} s`);
setText("stat-frames", `${frameCount} / ${FIXED_FRAMES}`);
setText("stat-provider", state.provider || $("provider").value);
setText("stat-last", new Date().toLocaleTimeString());
updateHeap();
setStatus(`${final ? "Finalized" : "Decoded partial"} ${audioSeconds.toFixed(1)} s model window (${(samples / SAMPLE_RATE).toFixed(1)} s captured) with ${usableSteps} CTC steps.`);
} catch (err) {
console.error(err);
setStatus(`Decode failed: ${err.message}`);
if (final) {
state.speechActive = false;
state.finalizing = false;
}
} finally {
state.decoding = false;
}
}
async function boot() {
if (!$("load-model")) {
setTimeout(boot, 100);
return;
}
ortRuntime = await import(`https://cdn.jsdelivr.net/npm/onnxruntime-web@${ORT_WEBGPU_VERSION}/dist/ort.webgpu.min.mjs`);
$("load-model").addEventListener("click", () => loadModel().catch((err) => {
console.error(err);
setStatus(`Load failed: ${err.message}`);
$("load-model").disabled = false;
}));
$("start-mic").addEventListener("click", () => startMic().catch((err) => {
console.error(err);
setStatus(`Mic failed: ${err.message}`);
stopMic();
}));
$("stop-mic").addEventListener("click", stopMic);
$("provider").addEventListener("change", () => {
if (state.session) {
state.session = null;
$("start-mic").disabled = true;
$("load-model").disabled = false;
setStatus("Provider changed. Load the model again.");
}
});
setText("stat-provider", "-");
setText("stat-gpu", "-");
updateHeap();
}
boot().catch((err) => {
console.error(err);
setStatus(`Startup failed: ${err.message}`);
});