import './style.css'; import { createTokenizer } from './tokenizer'; import type { Tokenizer } from './tokenizer'; import { loadSessions } from './runtime'; import type { NeedleSessions } from './runtime'; import { generate } from './generate'; import { mountUI, setStatus, renderResult, renderError, readTools, setInteractiveEnabled } from './ui'; import type { UI } from './ui'; import { TOKENIZER_URL, SPECIALS_URL } from './config'; interface Specials { pad: number; eos: number; bos: number; tool_call: number; tools: number; } async function fetchBytes(url: string): Promise { const resp = await fetch(url); if (!resp.ok) throw new Error(`fetch ${url}: ${resp.status}`); return new Uint8Array(await resp.arrayBuffer()); } async function fetchJson(url: string): Promise { const resp = await fetch(url); if (!resp.ok) throw new Error(`fetch ${url}: ${resp.status}`); return resp.json(); } async function boot() { const ui = mountUI(); try { setStatus(ui, 'loading model…', true); const t0 = performance.now(); const [sessions, tokenizerBytes, specials] = await Promise.all([ loadSessions(m => setStatus(ui, m, true)), fetchBytes(TOKENIZER_URL), fetchJson(SPECIALS_URL), ]); const tokenizer = await createTokenizer(tokenizerBytes); const loadSecs = ((performance.now() - t0) / 1000).toFixed(1); setStatus(ui, `ready · loaded in ${loadSecs}s`); setInteractiveEnabled(ui, true); wireRun(ui, sessions, tokenizer, specials); } catch (e) { setStatus(ui, 'failed'); renderError(ui, `Failed to load model: ${(e as Error).message}`); } } function wireRun(ui: UI, sessions: NeedleSessions, tokenizer: Tokenizer, specials: Specials) { let running = false; ui.queryEl.addEventListener('change', async () => { if (running) return; const tools = readTools(ui); if (!tools.ok) { renderError(ui, tools.error); return; } const query = ui.queryEl.value.trim(); if (!query) return; running = true; let tokensSoFar = 0; const t0 = performance.now(); const tick = setInterval(() => { const elapsed = ((performance.now() - t0) / 1000).toFixed(1); setStatus(ui, `generating… ${elapsed}s · ${tokensSoFar} tok`, true); }, 100); try { const result = await generate( sessions, tokenizer, query, tools.tools, { eosTokenId: specials.eos, bosOrPrefixTokenId: specials.eos, // Cactus seeds decoder with EOS, not BOS toolsTokenId: specials.tools, maxNewTokens: 256, }, (_id, decodedSoFar) => { tokensSoFar += 1; let display = decodedSoFar; if (display.startsWith('')) display = display.slice(''.length); renderResult(ui, display); }, ); clearInterval(tick); const elapsedMs = performance.now() - t0; const elapsed = (elapsedMs / 1000).toFixed(2); const tps = (result.ids.length / (elapsedMs / 1000)).toFixed(1); renderResult(ui, result.text); setStatus(ui, `ready · ${elapsed}s · ${result.ids.length} tok · ${tps} tok/s`); } catch (e) { clearInterval(tick); renderError(ui, `Generation failed: ${(e as Error).message}`); setStatus(ui, 'ready'); } finally { running = false; } }); } boot();