Spaces:
Running
Running
| import './style.css'; | |
| import { createTokenizer } from './tokenizer'; | |
| import type { Tokenizer } from './tokenizer'; | |
| import { loadSessions } from './runtime'; | |
| import type { NeedleSessions } from './runtime'; | |
| import { generate } from './generate'; | |
| import { mountUI, setStatus, renderResult, renderError, readTools, setInteractiveEnabled } from './ui'; | |
| import type { UI } from './ui'; | |
| import { TOKENIZER_URL, SPECIALS_URL } from './config'; | |
| interface Specials { pad: number; eos: number; bos: number; tool_call: number; tools: number; } | |
| async function fetchBytes(url: string): Promise<Uint8Array> { | |
| const resp = await fetch(url); | |
| if (!resp.ok) throw new Error(`fetch ${url}: ${resp.status}`); | |
| return new Uint8Array(await resp.arrayBuffer()); | |
| } | |
| async function fetchJson<T>(url: string): Promise<T> { | |
| const resp = await fetch(url); | |
| if (!resp.ok) throw new Error(`fetch ${url}: ${resp.status}`); | |
| return resp.json(); | |
| } | |
| async function boot() { | |
| const ui = mountUI(); | |
| try { | |
| setStatus(ui, 'loading model…', true); | |
| const t0 = performance.now(); | |
| const [sessions, tokenizerBytes, specials] = await Promise.all([ | |
| loadSessions(m => setStatus(ui, m, true)), | |
| fetchBytes(TOKENIZER_URL), | |
| fetchJson<Specials>(SPECIALS_URL), | |
| ]); | |
| const tokenizer = await createTokenizer(tokenizerBytes); | |
| const loadSecs = ((performance.now() - t0) / 1000).toFixed(1); | |
| setStatus(ui, `ready · loaded in ${loadSecs}s`); | |
| setInteractiveEnabled(ui, true); | |
| wireRun(ui, sessions, tokenizer, specials); | |
| } catch (e) { | |
| setStatus(ui, 'failed'); | |
| renderError(ui, `Failed to load model: ${(e as Error).message}`); | |
| } | |
| } | |
| function wireRun(ui: UI, sessions: NeedleSessions, tokenizer: Tokenizer, specials: Specials) { | |
| let running = false; | |
| ui.queryEl.addEventListener('change', async () => { | |
| if (running) return; | |
| const tools = readTools(ui); | |
| if (!tools.ok) { renderError(ui, tools.error); return; } | |
| const query = ui.queryEl.value.trim(); | |
| if (!query) return; | |
| running = true; | |
| let tokensSoFar = 0; | |
| const t0 = performance.now(); | |
| const tick = setInterval(() => { | |
| const elapsed = ((performance.now() - t0) / 1000).toFixed(1); | |
| setStatus(ui, `generating… ${elapsed}s · ${tokensSoFar} tok`, true); | |
| }, 100); | |
| try { | |
| const result = await generate( | |
| sessions, tokenizer, query, tools.tools, | |
| { | |
| eosTokenId: specials.eos, | |
| bosOrPrefixTokenId: specials.eos, // Cactus seeds decoder with EOS, not BOS | |
| toolsTokenId: specials.tools, | |
| maxNewTokens: 256, | |
| }, | |
| (_id, decodedSoFar) => { | |
| tokensSoFar += 1; | |
| let display = decodedSoFar; | |
| if (display.startsWith('<tool_call>')) display = display.slice('<tool_call>'.length); | |
| renderResult(ui, display); | |
| }, | |
| ); | |
| clearInterval(tick); | |
| const elapsedMs = performance.now() - t0; | |
| const elapsed = (elapsedMs / 1000).toFixed(2); | |
| const tps = (result.ids.length / (elapsedMs / 1000)).toFixed(1); | |
| renderResult(ui, result.text); | |
| setStatus(ui, `ready · ${elapsed}s · ${result.ids.length} tok · ${tps} tok/s`); | |
| } catch (e) { | |
| clearInterval(tick); | |
| renderError(ui, `Generation failed: ${(e as Error).message}`); | |
| setStatus(ui, 'ready'); | |
| } finally { | |
| running = false; | |
| } | |
| }); | |
| } | |
| boot(); | |