Spaces:
Running
Running
In-browser models: catalog + picker + cache delete, live tok/s stats, raw 'thinking' stream
Browse files- modelCatalog.js: 8 verified ungated GGUFs ≤~2GB (SmolLM2 360M/1.7B, Qwen2.5
0.5B/1.5B/3B, Qwen3 1.7B, Llama 3.2 1B/3B) with real sizes; bigger (Gemma 4 E2B
3GB, Qwen3-4B, MiniMax) excluded — exceed the browser 2GB single-file limit.
- wllamaLlm.js: setModel/ensureModel per-model, cache list/delete via wllama
ModelManager (exit() + remove()), tok/s + first-token stats during streaming.
- modelBar.js: model dropdown (label · size · ✓downloaded) + 🗑 delete-from-cache.
- persona/diary panels: model picker, live tok/s, and a raw 'thinking' stream view
so you watch tokens arrive.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
- web/diaryPanel.js +18 -11
- web/modelBar.js +51 -0
- web/modelCatalog.js +42 -0
- web/personaPanel.js +29 -16
- web/shell/persona.css +38 -0
- web/wllamaLlm.js +71 -21
web/diaryPanel.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
-
// War-diary panel — vanilla DOM, mounted
|
| 2 |
-
//
|
| 3 |
-
//
|
| 4 |
-
import { streamChat, ensureModel,
|
|
|
|
| 5 |
import { DIARY_SYSTEM, diaryUserPrompt } from '/web/personaPrompts.js'
|
| 6 |
|
| 7 |
function el(tag, props = {}, kids = []) {
|
|
@@ -16,37 +17,43 @@ function el(tag, props = {}, kids = []) {
|
|
| 16 |
}
|
| 17 |
|
| 18 |
export function mountDiaryPanel(host) {
|
|
|
|
| 19 |
const unit = el('input', { class: 'persona-input', type: 'text', value: 'Bram the Warrior' })
|
| 20 |
const traits = el('input', { class: 'persona-input', type: 'text', value: 'Cautious, Veteran, Vengeful' })
|
|
|
|
| 21 |
const status = el('div', { class: 'persona-status' }, 'Runs on your device — no cloud.')
|
| 22 |
const btn = el('button', { class: 'persona-go', type: 'button' }, '✒ Write war diary')
|
| 23 |
const out = el('div', { class: 'persona-about' }, 'A first-person diary entry, written by a small llama.cpp model in your browser.')
|
| 24 |
|
| 25 |
const controls = el('aside', { class: 'persona-controls' }, [
|
| 26 |
-
|
| 27 |
el('label', { class: 'persona-label' }, 'Unit'), unit,
|
| 28 |
el('label', { class: 'persona-label' }, 'Traits'), traits,
|
| 29 |
-
btn, status,
|
| 30 |
])
|
| 31 |
const result = el('div', { class: 'persona-result' }, [out])
|
| 32 |
host.appendChild(el('div', { class: 'persona-view' }, [controls, result]))
|
| 33 |
|
|
|
|
|
|
|
| 34 |
let busy = false
|
| 35 |
async function write() {
|
| 36 |
if (busy) return
|
| 37 |
-
busy = true; btn.disabled = true
|
| 38 |
const header = `— Diary of ${(unit.value || 'a nameless soldier').trim()} —\n\n`
|
| 39 |
out.textContent = header
|
| 40 |
try {
|
| 41 |
-
status.textContent =
|
| 42 |
-
await ensureModel((frac) => { status.textContent = `downloading
|
| 43 |
-
status.textContent = `writing with ${
|
| 44 |
let first = true
|
| 45 |
await streamChat(DIARY_SYSTEM, diaryUserPrompt(unit.value, traits.value), {
|
| 46 |
-
maxTokens:
|
| 47 |
onToken: (piece) => { if (first) { out.textContent = header; first = false } out.textContent += piece },
|
|
|
|
| 48 |
})
|
| 49 |
status.textContent = 'written ✓ (generated locally)'
|
|
|
|
| 50 |
} catch (e) {
|
| 51 |
status.textContent = `couldn't run the local model: ${e.message || e}`
|
| 52 |
} finally {
|
|
|
|
| 1 |
+
// War-diary panel — vanilla DOM, mounted into #diary-stage. Streams a first-person
|
| 2 |
+
// diary entry generated ON THE USER'S DEVICE via wllama (llama.cpp WASM). Shares the
|
| 3 |
+
// persona styling (.persona-*), the model picker (modelBar), and tok/s stats.
|
| 4 |
+
import { streamChat, ensureModel, getCurrentModel } from '/web/wllamaLlm.js'
|
| 5 |
+
import { mountModelBar } from '/web/modelBar.js'
|
| 6 |
import { DIARY_SYSTEM, diaryUserPrompt } from '/web/personaPrompts.js'
|
| 7 |
|
| 8 |
function el(tag, props = {}, kids = []) {
|
|
|
|
| 17 |
}
|
| 18 |
|
| 19 |
export function mountDiaryPanel(host) {
|
| 20 |
+
const modelHost = el('div')
|
| 21 |
const unit = el('input', { class: 'persona-input', type: 'text', value: 'Bram the Warrior' })
|
| 22 |
const traits = el('input', { class: 'persona-input', type: 'text', value: 'Cautious, Veteran, Vengeful' })
|
| 23 |
+
const stats = el('div', { class: 'persona-stats' })
|
| 24 |
const status = el('div', { class: 'persona-status' }, 'Runs on your device — no cloud.')
|
| 25 |
const btn = el('button', { class: 'persona-go', type: 'button' }, '✒ Write war diary')
|
| 26 |
const out = el('div', { class: 'persona-about' }, 'A first-person diary entry, written by a small llama.cpp model in your browser.')
|
| 27 |
|
| 28 |
const controls = el('aside', { class: 'persona-controls' }, [
|
| 29 |
+
modelHost,
|
| 30 |
el('label', { class: 'persona-label' }, 'Unit'), unit,
|
| 31 |
el('label', { class: 'persona-label' }, 'Traits'), traits,
|
| 32 |
+
btn, stats, status,
|
| 33 |
])
|
| 34 |
const result = el('div', { class: 'persona-result' }, [out])
|
| 35 |
host.appendChild(el('div', { class: 'persona-view' }, [controls, result]))
|
| 36 |
|
| 37 |
+
const bar = mountModelBar(modelHost)
|
| 38 |
+
|
| 39 |
let busy = false
|
| 40 |
async function write() {
|
| 41 |
if (busy) return
|
| 42 |
+
busy = true; btn.disabled = true; stats.textContent = ''
|
| 43 |
const header = `— Diary of ${(unit.value || 'a nameless soldier').trim()} —\n\n`
|
| 44 |
out.textContent = header
|
| 45 |
try {
|
| 46 |
+
status.textContent = `loading ${getCurrentModel().label} into your browser…`
|
| 47 |
+
await ensureModel((frac) => { status.textContent = `downloading ${getCurrentModel().label}… ${Math.round(frac * 100)}% (one-time)` })
|
| 48 |
+
status.textContent = `writing on your device with ${getCurrentModel().label}…`
|
| 49 |
let first = true
|
| 50 |
await streamChat(DIARY_SYSTEM, diaryUserPrompt(unit.value, traits.value), {
|
| 51 |
+
maxTokens: 220, temperature: 0.9,
|
| 52 |
onToken: (piece) => { if (first) { out.textContent = header; first = false } out.textContent += piece },
|
| 53 |
+
onStats: (s) => { stats.textContent = `● ${s.tokPerSec} tok/s · ${s.tokens} tok${s.ttftSeconds != null ? ` · first ${s.ttftSeconds}s` : ''}` },
|
| 54 |
})
|
| 55 |
status.textContent = 'written ✓ (generated locally)'
|
| 56 |
+
bar.refresh()
|
| 57 |
} catch (e) {
|
| 58 |
status.textContent = `couldn't run the local model: ${e.message || e}`
|
| 59 |
} finally {
|
web/modelBar.js
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Shared model picker + cache controls for the in-browser panels. Lets you choose a
|
| 2 |
+
// model from the catalog (showing size + whether it's already downloaded) and delete
|
| 3 |
+
// a downloaded model from the browser cache — like the wllama demo space.
|
| 4 |
+
import { listModels, getCurrentModel, setModel, cachedSet, deleteCached } from '/web/wllamaLlm.js'
|
| 5 |
+
import { fmtBytes } from '/web/modelCatalog.js'
|
| 6 |
+
|
| 7 |
+
function el(tag, props = {}, kids = []) {
|
| 8 |
+
const n = document.createElement(tag)
|
| 9 |
+
for (const [k, v] of Object.entries(props)) {
|
| 10 |
+
if (k === 'class') n.className = v
|
| 11 |
+
else if (k.startsWith('on') && typeof v === 'function') n.addEventListener(k.slice(2), v)
|
| 12 |
+
else if (v != null) n.setAttribute(k, v)
|
| 13 |
+
}
|
| 14 |
+
for (const kid of [].concat(kids)) if (kid != null) n.append(kid)
|
| 15 |
+
return n
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
export function mountModelBar(host, { onChange } = {}) {
|
| 19 |
+
const models = listModels()
|
| 20 |
+
const sel = el('select', { class: 'model-select' })
|
| 21 |
+
const del = el('button', { class: 'model-del', type: 'button', title: 'Delete this model from your browser cache' }, '🗑 delete')
|
| 22 |
+
const info = el('div', { class: 'model-info' })
|
| 23 |
+
host.append(el('div', { class: 'model-bar' }, [
|
| 24 |
+
el('label', { class: 'persona-label' }, 'Model (runs in your browser)'),
|
| 25 |
+
sel, el('div', { class: 'model-row' }, [info, del]),
|
| 26 |
+
]))
|
| 27 |
+
|
| 28 |
+
let cached = new Set()
|
| 29 |
+
function render() {
|
| 30 |
+
const cur = getCurrentModel().id
|
| 31 |
+
sel.replaceChildren(...models.map((m) =>
|
| 32 |
+
el('option', { value: m.id }, `${m.label} · ${fmtBytes(m.bytes)}${cached.has(m.id) ? ' · ✓ downloaded' : ''}`)))
|
| 33 |
+
sel.value = cur
|
| 34 |
+
const m = getCurrentModel()
|
| 35 |
+
info.textContent = `${m.params} · ${fmtBytes(m.bytes)} · ${cached.has(m.id) ? 'cached on device' : 'downloads on first use'}`
|
| 36 |
+
del.style.display = cached.has(m.id) ? '' : 'none'
|
| 37 |
+
}
|
| 38 |
+
async function refresh() { cached = await cachedSet(); render() }
|
| 39 |
+
|
| 40 |
+
sel.addEventListener('change', async () => { await setModel(sel.value); render(); onChange && onChange(sel.value) })
|
| 41 |
+
del.addEventListener('click', async () => {
|
| 42 |
+
del.disabled = true; const prev = info.textContent; info.textContent = 'deleting from cache…'
|
| 43 |
+
try { await deleteCached(sel.value) } catch (e) { info.textContent = 'delete failed: ' + (e.message || e) }
|
| 44 |
+
await refresh(); del.disabled = false
|
| 45 |
+
if (info.textContent.startsWith('delete failed')) setTimeout(() => { info.textContent = prev }, 2500)
|
| 46 |
+
})
|
| 47 |
+
|
| 48 |
+
render()
|
| 49 |
+
refresh()
|
| 50 |
+
return { refresh }
|
| 51 |
+
}
|
web/modelCatalog.js
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Curated small instruct models for the IN-BROWSER (wllama / llama.cpp WASM) path.
|
| 2 |
+
// Constraints that shaped this list (verified June 2026 via the HF API):
|
| 3 |
+
// • Ungated — wllama fetches the GGUF anonymously; gated repos (official Llama/
|
| 4 |
+
// Gemma) won't load, so we use ungated mirrors (bartowski, unsloth, Qwen, SmolLM).
|
| 5 |
+
// • ≤ ~2 GB single file — the browser's ArrayBuffer cap is 2 GB (bigger needs split
|
| 6 |
+
// GGUFs). That's why Gemma 4 E2B (3.1 GB) and Qwen3-4B (2.5 GB) are server-only,
|
| 7 |
+
// and big ones like Qwen3.5-9B / MiniMax don't fit in-browser at all.
|
| 8 |
+
// • Q4_K_M quant (good size/quality), CPU-WASM friendly.
|
| 9 |
+
// Sizes are the real download bytes. The hackathon's "≤32B" is the *runtime* cap; the
|
| 10 |
+
// browser is far smaller, so this list is sub-3B.
|
| 11 |
+
export const MODELS = [
|
| 12 |
+
{ id: 'smollm2-360m', label: 'SmolLM2 360M', params: '360M', bytes: 386e6,
|
| 13 |
+
repo: 'HuggingFaceTB/SmolLM2-360M-Instruct-GGUF', file: 'smollm2-360m-instruct-q8_0.gguf',
|
| 14 |
+
note: 'tiniest — fastest, roughest' },
|
| 15 |
+
{ id: 'qwen2.5-0.5b', label: 'Qwen2.5 0.5B', params: '0.5B', bytes: 491e6,
|
| 16 |
+
repo: 'Qwen/Qwen2.5-0.5B-Instruct-GGUF', file: 'qwen2.5-0.5b-instruct-q4_k_m.gguf',
|
| 17 |
+
note: 'default — good speed/quality balance' },
|
| 18 |
+
{ id: 'llama3.2-1b', label: 'Llama 3.2 1B', params: '1B', bytes: 808e6,
|
| 19 |
+
repo: 'bartowski/Llama-3.2-1B-Instruct-GGUF', file: 'Llama-3.2-1B-Instruct-Q4_K_M.gguf',
|
| 20 |
+
note: 'solid 1B all-rounder' },
|
| 21 |
+
{ id: 'smollm2-1.7b', label: 'SmolLM2 1.7B', params: '1.7B', bytes: 1056e6,
|
| 22 |
+
repo: 'HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF', file: 'smollm2-1.7b-instruct-q4_k_m.gguf',
|
| 23 |
+
note: 'strong tiny model' },
|
| 24 |
+
{ id: 'qwen3-1.7b', label: 'Qwen3 1.7B', params: '1.7B', bytes: 1107e6,
|
| 25 |
+
repo: 'unsloth/Qwen3-1.7B-GGUF', file: 'Qwen3-1.7B-Q4_K_M.gguf',
|
| 26 |
+
note: 'newer Qwen3 — has a thinking mode' },
|
| 27 |
+
{ id: 'qwen2.5-1.5b', label: 'Qwen2.5 1.5B', params: '1.5B', bytes: 1117e6,
|
| 28 |
+
repo: 'Qwen/Qwen2.5-1.5B-Instruct-GGUF', file: 'qwen2.5-1.5b-instruct-q4_k_m.gguf',
|
| 29 |
+
note: 'reliable, clean JSON' },
|
| 30 |
+
{ id: 'llama3.2-3b', label: 'Llama 3.2 3B', params: '3B', bytes: 2019e6,
|
| 31 |
+
repo: 'bartowski/Llama-3.2-3B-Instruct-GGUF', file: 'Llama-3.2-3B-Instruct-Q4_K_M.gguf',
|
| 32 |
+
note: 'bigger/better, slower in-browser' },
|
| 33 |
+
{ id: 'qwen2.5-3b', label: 'Qwen2.5 3B', params: '3B', bytes: 2105e6,
|
| 34 |
+
repo: 'Qwen/Qwen2.5-3B-Instruct-GGUF', file: 'qwen2.5-3b-instruct-q4_k_m.gguf',
|
| 35 |
+
note: 'best quality here; near the 2 GB browser limit' },
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
export const DEFAULT_MODEL = 'qwen2.5-0.5b'
|
| 39 |
+
|
| 40 |
+
export const getModel = (id) => MODELS.find((m) => m.id === id) || MODELS.find((m) => m.id === DEFAULT_MODEL)
|
| 41 |
+
|
| 42 |
+
export const fmtBytes = (b) => (b >= 1e9 ? (b / 1e9).toFixed(1) + ' GB' : Math.round(b / 1e6) + ' MB')
|
web/personaPanel.js
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
// Tiny Army persona panel — vanilla DOM, mounted by tiny.js into #persona-stage.
|
| 2 |
-
// Generation runs ON THE USER'S DEVICE via wllama (llama.cpp WASM)
|
| 3 |
-
//
|
| 4 |
-
//
|
| 5 |
-
import { streamChat, ensureModel,
|
|
|
|
| 6 |
import { extractLivePersona } from '/web/personaStream.js'
|
| 7 |
import { parsePersonaJson } from '/web/personaParse.js'
|
| 8 |
import { PERSONA_SYSTEM, personaUserPrompt } from '/web/personaPrompts.js'
|
|
@@ -13,7 +14,6 @@ function el(tag, props = {}, kids = []) {
|
|
| 13 |
const n = document.createElement(tag)
|
| 14 |
for (const [k, v] of Object.entries(props)) {
|
| 15 |
if (k === 'class') n.className = v
|
| 16 |
-
else if (k === 'html') n.innerHTML = v
|
| 17 |
else if (k.startsWith('on') && typeof v === 'function') n.addEventListener(k.slice(2), v)
|
| 18 |
else if (v != null) n.setAttribute(k, v)
|
| 19 |
}
|
|
@@ -21,50 +21,61 @@ function el(tag, props = {}, kids = []) {
|
|
| 21 |
return n
|
| 22 |
}
|
| 23 |
|
| 24 |
-
export function mountPersonaPanel(host
|
| 25 |
-
const
|
| 26 |
-
|
| 27 |
-
const sel = el('select', { class: 'persona-input' }, classes.map((c) => el('option', { value: c }, c)))
|
| 28 |
const seed = el('input', { class: 'persona-input', type: 'text', placeholder: 'a word, a vibe… (optional)' })
|
|
|
|
| 29 |
const status = el('div', { class: 'persona-status' }, 'Runs on your device — no cloud.')
|
| 30 |
const btn = el('button', { class: 'persona-go', type: 'button' }, '⚔ Recruit a soldier')
|
| 31 |
|
| 32 |
const nameEl = el('div', { class: 'persona-name' }, 'Your soldier')
|
| 33 |
const tagsEl = el('div', { class: 'persona-tags' })
|
| 34 |
const aboutEl = el('div', { class: 'persona-about' }, 'Pick a class and recruit — a small llama.cpp model in your browser writes their legend.')
|
|
|
|
|
|
|
| 35 |
|
| 36 |
const controls = el('aside', { class: 'persona-controls' }, [
|
| 37 |
-
|
| 38 |
el('label', { class: 'persona-label' }, 'Class'), sel,
|
| 39 |
el('label', { class: 'persona-label' }, 'Seed'), seed,
|
| 40 |
-
btn, status,
|
| 41 |
])
|
| 42 |
-
const result = el('div', { class: 'persona-result' }, [nameEl, tagsEl, aboutEl])
|
| 43 |
host.appendChild(el('div', { class: 'persona-view' }, [controls, result]))
|
| 44 |
|
|
|
|
|
|
|
| 45 |
function setTags(p) {
|
| 46 |
tagsEl.replaceChildren(...[p.specialty, p.personality, p.vibe].filter(Boolean)
|
| 47 |
.map((t) => el('span', { class: 'persona-tag' }, t)))
|
| 48 |
}
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
let busy = false
|
| 51 |
async function generate() {
|
| 52 |
if (busy) return
|
| 53 |
busy = true; btn.disabled = true
|
| 54 |
nameEl.textContent = '…'; aboutEl.textContent = ''; tagsEl.replaceChildren()
|
|
|
|
| 55 |
try {
|
| 56 |
-
status.textContent =
|
| 57 |
-
await ensureModel((frac) => { status.textContent = `downloading
|
| 58 |
-
status.textContent = `writing with ${
|
| 59 |
let acc = ''
|
| 60 |
await streamChat(PERSONA_SYSTEM, personaUserPrompt(sel.value, seed.value), {
|
| 61 |
-
maxTokens:
|
| 62 |
onToken: (piece) => {
|
| 63 |
acc += piece
|
|
|
|
|
|
|
| 64 |
const live = extractLivePersona(acc)
|
| 65 |
if (live.name) nameEl.textContent = live.name
|
| 66 |
if (live.about) aboutEl.textContent = live.about
|
| 67 |
},
|
|
|
|
| 68 |
})
|
| 69 |
try {
|
| 70 |
const p = parsePersonaJson(acc)
|
|
@@ -72,9 +83,11 @@ export function mountPersonaPanel(host, opts = {}) {
|
|
| 72 |
aboutEl.textContent = p.about
|
| 73 |
setTags(p)
|
| 74 |
status.textContent = 'enlisted ✓ (generated locally)'
|
|
|
|
| 75 |
} catch (e) {
|
| 76 |
status.textContent = `the model rambled — couldn't parse a clean persona (${e.message || e})`
|
| 77 |
}
|
|
|
|
| 78 |
} catch (e) {
|
| 79 |
status.textContent = `couldn't run the local model: ${e.message || e}`
|
| 80 |
} finally {
|
|
|
|
| 1 |
// Tiny Army persona panel — vanilla DOM, mounted by tiny.js into #persona-stage.
|
| 2 |
+
// Generation runs ON THE USER'S DEVICE via wllama (llama.cpp WASM). Model is pickable
|
| 3 |
+
// (modelBar), generation streams into a live "thinking" view + parsed result, and we
|
| 4 |
+
// show tok/s. Reuses woid's persona parser + extractLivePersona verbatim.
|
| 5 |
+
import { streamChat, ensureModel, getCurrentModel } from '/web/wllamaLlm.js'
|
| 6 |
+
import { mountModelBar } from '/web/modelBar.js'
|
| 7 |
import { extractLivePersona } from '/web/personaStream.js'
|
| 8 |
import { parsePersonaJson } from '/web/personaParse.js'
|
| 9 |
import { PERSONA_SYSTEM, personaUserPrompt } from '/web/personaPrompts.js'
|
|
|
|
| 14 |
const n = document.createElement(tag)
|
| 15 |
for (const [k, v] of Object.entries(props)) {
|
| 16 |
if (k === 'class') n.className = v
|
|
|
|
| 17 |
else if (k.startsWith('on') && typeof v === 'function') n.addEventListener(k.slice(2), v)
|
| 18 |
else if (v != null) n.setAttribute(k, v)
|
| 19 |
}
|
|
|
|
| 21 |
return n
|
| 22 |
}
|
| 23 |
|
| 24 |
+
export function mountPersonaPanel(host) {
|
| 25 |
+
const modelHost = el('div')
|
| 26 |
+
const sel = el('select', { class: 'persona-input' }, CLASSES.map((c) => el('option', { value: c }, c)))
|
|
|
|
| 27 |
const seed = el('input', { class: 'persona-input', type: 'text', placeholder: 'a word, a vibe… (optional)' })
|
| 28 |
+
const stats = el('div', { class: 'persona-stats' })
|
| 29 |
const status = el('div', { class: 'persona-status' }, 'Runs on your device — no cloud.')
|
| 30 |
const btn = el('button', { class: 'persona-go', type: 'button' }, '⚔ Recruit a soldier')
|
| 31 |
|
| 32 |
const nameEl = el('div', { class: 'persona-name' }, 'Your soldier')
|
| 33 |
const tagsEl = el('div', { class: 'persona-tags' })
|
| 34 |
const aboutEl = el('div', { class: 'persona-about' }, 'Pick a class and recruit — a small llama.cpp model in your browser writes their legend.')
|
| 35 |
+
const thinkEl = el('pre', { class: 'persona-think' })
|
| 36 |
+
const thinkWrap = el('details', { class: 'persona-think-wrap' }, [el('summary', {}, 'model output (raw)'), thinkEl])
|
| 37 |
|
| 38 |
const controls = el('aside', { class: 'persona-controls' }, [
|
| 39 |
+
modelHost,
|
| 40 |
el('label', { class: 'persona-label' }, 'Class'), sel,
|
| 41 |
el('label', { class: 'persona-label' }, 'Seed'), seed,
|
| 42 |
+
btn, stats, status,
|
| 43 |
])
|
| 44 |
+
const result = el('div', { class: 'persona-result' }, [nameEl, tagsEl, aboutEl, thinkWrap])
|
| 45 |
host.appendChild(el('div', { class: 'persona-view' }, [controls, result]))
|
| 46 |
|
| 47 |
+
const bar = mountModelBar(modelHost)
|
| 48 |
+
|
| 49 |
function setTags(p) {
|
| 50 |
tagsEl.replaceChildren(...[p.specialty, p.personality, p.vibe].filter(Boolean)
|
| 51 |
.map((t) => el('span', { class: 'persona-tag' }, t)))
|
| 52 |
}
|
| 53 |
+
function showStats(s) {
|
| 54 |
+
stats.textContent = `● ${s.tokPerSec} tok/s · ${s.tokens} tok${s.ttftSeconds != null ? ` · first ${s.ttftSeconds}s` : ''}`
|
| 55 |
+
}
|
| 56 |
|
| 57 |
let busy = false
|
| 58 |
async function generate() {
|
| 59 |
if (busy) return
|
| 60 |
busy = true; btn.disabled = true
|
| 61 |
nameEl.textContent = '…'; aboutEl.textContent = ''; tagsEl.replaceChildren()
|
| 62 |
+
thinkEl.textContent = ''; thinkWrap.open = true; stats.textContent = ''
|
| 63 |
try {
|
| 64 |
+
status.textContent = `loading ${getCurrentModel().label} into your browser…`
|
| 65 |
+
await ensureModel((frac) => { status.textContent = `downloading ${getCurrentModel().label}… ${Math.round(frac * 100)}% (one-time)` })
|
| 66 |
+
status.textContent = `writing on your device with ${getCurrentModel().label}…`
|
| 67 |
let acc = ''
|
| 68 |
await streamChat(PERSONA_SYSTEM, personaUserPrompt(sel.value, seed.value), {
|
| 69 |
+
maxTokens: 220,
|
| 70 |
onToken: (piece) => {
|
| 71 |
acc += piece
|
| 72 |
+
thinkEl.textContent = acc
|
| 73 |
+
thinkEl.scrollTop = thinkEl.scrollHeight
|
| 74 |
const live = extractLivePersona(acc)
|
| 75 |
if (live.name) nameEl.textContent = live.name
|
| 76 |
if (live.about) aboutEl.textContent = live.about
|
| 77 |
},
|
| 78 |
+
onStats: showStats,
|
| 79 |
})
|
| 80 |
try {
|
| 81 |
const p = parsePersonaJson(acc)
|
|
|
|
| 83 |
aboutEl.textContent = p.about
|
| 84 |
setTags(p)
|
| 85 |
status.textContent = 'enlisted ✓ (generated locally)'
|
| 86 |
+
thinkWrap.open = false
|
| 87 |
} catch (e) {
|
| 88 |
status.textContent = `the model rambled — couldn't parse a clean persona (${e.message || e})`
|
| 89 |
}
|
| 90 |
+
bar.refresh() // it's now cached
|
| 91 |
} catch (e) {
|
| 92 |
status.textContent = `couldn't run the local model: ${e.message || e}`
|
| 93 |
} finally {
|
web/shell/persona.css
CHANGED
|
@@ -63,6 +63,44 @@
|
|
| 63 |
white-space: pre-wrap;
|
| 64 |
}
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
@media (max-width: 768px) {
|
| 67 |
.persona-view { flex-direction: column; }
|
| 68 |
.persona-controls { width: 100%; border-right: 0; border-bottom: 2px solid var(--p-ink); }
|
|
|
|
| 63 |
white-space: pre-wrap;
|
| 64 |
}
|
| 65 |
|
| 66 |
+
/* ── Model picker + cache controls ─────────────────────────────────────────── */
|
| 67 |
+
.model-bar { display: flex; flex-direction: column; gap: 4px; padding-bottom: 10px; margin-bottom: 6px; border-bottom: 1px dashed var(--p-ink); }
|
| 68 |
+
.model-select {
|
| 69 |
+
font-family: var(--p-sans) !important; font-size: 13px !important; color: var(--p-ink) !important;
|
| 70 |
+
background: var(--p-card) !important; border: 1.5px solid var(--p-ink) !important;
|
| 71 |
+
border-radius: 0 !important; padding: 6px 8px !important; width: 100%;
|
| 72 |
+
}
|
| 73 |
+
.model-row { display: flex; align-items: center; justify-content: space-between; gap: 8px; }
|
| 74 |
+
.model-info { font-family: var(--p-mono); font-size: 9px; letter-spacing: .04em; color: var(--p-muted); line-height: 1.4; flex: 1; }
|
| 75 |
+
.model-del {
|
| 76 |
+
font-family: var(--p-mono) !important; font-size: 9px !important; letter-spacing: .04em; text-transform: uppercase;
|
| 77 |
+
color: var(--p-transmit) !important; background: var(--p-card) !important; border: 1.5px solid var(--p-transmit) !important;
|
| 78 |
+
border-radius: 0 !important; padding: 3px 6px !important; cursor: pointer; flex-shrink: 0;
|
| 79 |
+
}
|
| 80 |
+
.model-del:hover { background: var(--p-transmit) !important; color: var(--p-card) !important; }
|
| 81 |
+
.model-del:disabled { opacity: .5; cursor: default; }
|
| 82 |
+
|
| 83 |
+
/* ── Live stats (tok/s) ────────────────────────────────────────────────────── */
|
| 84 |
+
.persona-stats {
|
| 85 |
+
font-family: var(--p-mono); font-size: 11px; letter-spacing: .04em; color: var(--p-transmit);
|
| 86 |
+
min-height: 14px; margin-top: 6px;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
/* ── "Thinking" raw stream (see progress as tokens arrive) ──────────────────── */
|
| 90 |
+
.persona-think-wrap { margin-top: 22px; }
|
| 91 |
+
.persona-think-wrap > summary {
|
| 92 |
+
cursor: pointer; font-family: var(--p-mono); font-size: 10px; letter-spacing: .12em; text-transform: uppercase;
|
| 93 |
+
color: var(--p-muted); list-style: none;
|
| 94 |
+
}
|
| 95 |
+
.persona-think-wrap > summary::-webkit-details-marker { display: none; }
|
| 96 |
+
.persona-think-wrap > summary::before { content: '▸ '; }
|
| 97 |
+
.persona-think-wrap[open] > summary::before { content: '▾ '; }
|
| 98 |
+
.persona-think {
|
| 99 |
+
margin: 8px 0 0; max-height: 240px; overflow-y: auto; white-space: pre-wrap; word-break: break-word;
|
| 100 |
+
font-family: var(--p-mono); font-size: 11px; line-height: 1.5; color: var(--p-muted);
|
| 101 |
+
background: var(--p-paper-2); border: 1px solid var(--p-ink); padding: 8px 10px;
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
@media (max-width: 768px) {
|
| 105 |
.persona-view { flex-direction: column; }
|
| 106 |
.persona-controls { width: 100%; border-right: 0; border-bottom: 2px solid var(--p-ink); }
|
web/wllamaLlm.js
CHANGED
|
@@ -1,55 +1,105 @@
|
|
| 1 |
-
// In-browser llama.cpp via wllama (WASM)
|
| 2 |
-
//
|
| 3 |
-
//
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
// path did, so the persona/diary panels barely change.
|
| 7 |
-
import { Wllama } from 'https://cdn.jsdelivr.net/npm/@wllama/wllama@3.4.1/esm/index.js'
|
| 8 |
|
| 9 |
const WLLAMA_VER = '3.4.1'
|
| 10 |
const WASM = { default: `https://cdn.jsdelivr.net/npm/@wllama/wllama@${WLLAMA_VER}/esm/wasm/wllama.wasm` }
|
| 11 |
-
// Small instruct GGUF: ~380 MB, downloaded once then cached by the browser.
|
| 12 |
-
const MODEL = { repo: 'Qwen/Qwen2.5-0.5B-Instruct-GGUF', file: 'qwen2.5-0.5b-instruct-q4_k_m.gguf' }
|
| 13 |
|
|
|
|
| 14 |
let _wllama = null
|
|
|
|
| 15 |
let _loadPromise = null
|
| 16 |
let _chain = Promise.resolve() // serialize completions (one model, no parallel decode)
|
|
|
|
| 17 |
|
| 18 |
-
export function
|
|
|
|
| 19 |
|
| 20 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
export function ensureModel(onProgress) {
|
| 22 |
-
|
|
|
|
| 23 |
if (_loadPromise) return _loadPromise
|
| 24 |
_loadPromise = (async () => {
|
| 25 |
const w = new Wllama(WASM)
|
| 26 |
-
await w.loadModelFromHF(
|
| 27 |
n_ctx: 2048,
|
| 28 |
progressCallback: ({ loaded, total }) => onProgress && onProgress(total ? loaded / total : 0),
|
| 29 |
})
|
| 30 |
-
_wllama = w
|
| 31 |
return w
|
| 32 |
})().catch((e) => { _loadPromise = null; throw e })
|
| 33 |
return _loadPromise
|
| 34 |
}
|
| 35 |
|
| 36 |
-
// Stream a chat completion in-browser.
|
| 37 |
-
// Serialized so two panels can't decode at once.
|
| 38 |
-
export function streamChat(system, user, { maxTokens = 200, temperature = 0.8, onToken } = {}) {
|
| 39 |
const run = async () => {
|
| 40 |
const w = await ensureModel()
|
| 41 |
-
let full = ''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
const stream = await w.createChatCompletion({
|
| 43 |
messages: [{ role: 'system', content: system }, { role: 'user', content: user }],
|
| 44 |
max_tokens: maxTokens, temperature, top_k: 40, top_p: 0.9, stream: true,
|
| 45 |
})
|
| 46 |
for await (const chunk of stream) {
|
| 47 |
const piece = chunk?.choices?.[0]?.delta?.content || ''
|
| 48 |
-
if (piece)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
}
|
| 50 |
-
|
|
|
|
|
|
|
| 51 |
}
|
| 52 |
const p = _chain.then(run, run)
|
| 53 |
-
_chain = p.catch(() => {})
|
| 54 |
return p
|
| 55 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// In-browser llama.cpp via wllama (WASM). Local-first (🔌 Off the Grid) + llama.cpp
|
| 2 |
+
// (🦙 Llama Champion). Adds model selection from a catalog, cache management
|
| 3 |
+
// (download/delete via wllama's ModelManager), and live generation stats (tok/s).
|
| 4 |
+
import { Wllama, ModelManager } from 'https://cdn.jsdelivr.net/npm/@wllama/wllama@3.4.1/esm/index.js'
|
| 5 |
+
import { MODELS, getModel, DEFAULT_MODEL } from '/web/modelCatalog.js'
|
|
|
|
|
|
|
| 6 |
|
| 7 |
const WLLAMA_VER = '3.4.1'
|
| 8 |
const WASM = { default: `https://cdn.jsdelivr.net/npm/@wllama/wllama@${WLLAMA_VER}/esm/wasm/wllama.wasm` }
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
let currentId = DEFAULT_MODEL
|
| 11 |
let _wllama = null
|
| 12 |
+
let _loadedId = null
|
| 13 |
let _loadPromise = null
|
| 14 |
let _chain = Promise.resolve() // serialize completions (one model, no parallel decode)
|
| 15 |
+
const mm = new ModelManager()
|
| 16 |
|
| 17 |
+
export function listModels() { return MODELS }
|
| 18 |
+
export function getCurrentModel() { return getModel(currentId) }
|
| 19 |
|
| 20 |
+
// Switch the active model. Unloads the loaded one so the next ensureModel loads fresh.
|
| 21 |
+
export async function setModel(id) {
|
| 22 |
+
if (id === currentId) return
|
| 23 |
+
currentId = id
|
| 24 |
+
if (_wllama) { try { await _wllama.exit() } catch { /* ignore */ } }
|
| 25 |
+
_wllama = null; _loadedId = null; _loadPromise = null
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
// Load (download + init) the current model. onProgress(fraction 0..1) during download.
|
| 29 |
export function ensureModel(onProgress) {
|
| 30 |
+
const m = getModel(currentId)
|
| 31 |
+
if (_wllama && _loadedId === m.id) return Promise.resolve(_wllama)
|
| 32 |
if (_loadPromise) return _loadPromise
|
| 33 |
_loadPromise = (async () => {
|
| 34 |
const w = new Wllama(WASM)
|
| 35 |
+
await w.loadModelFromHF({ repo: m.repo, file: m.file }, {
|
| 36 |
n_ctx: 2048,
|
| 37 |
progressCallback: ({ loaded, total }) => onProgress && onProgress(total ? loaded / total : 0),
|
| 38 |
})
|
| 39 |
+
_wllama = w; _loadedId = m.id
|
| 40 |
return w
|
| 41 |
})().catch((e) => { _loadPromise = null; throw e })
|
| 42 |
return _loadPromise
|
| 43 |
}
|
| 44 |
|
| 45 |
+
// Stream a chat completion in-browser. onToken(piece); onStats({tokens,tokPerSec,seconds}).
|
| 46 |
+
// Serialized so two panels can't decode at once. Returns { text, stats }.
|
| 47 |
+
export function streamChat(system, user, { maxTokens = 200, temperature = 0.8, onToken, onStats } = {}) {
|
| 48 |
const run = async () => {
|
| 49 |
const w = await ensureModel()
|
| 50 |
+
let full = ''; let n = 0
|
| 51 |
+
const t0 = performance.now(); let tFirst = null
|
| 52 |
+
const emitStats = (final) => {
|
| 53 |
+
if (!onStats) return
|
| 54 |
+
const secs = (performance.now() - t0) / 1000
|
| 55 |
+
const gen = tFirst ? (performance.now() - tFirst) / 1000 : 0
|
| 56 |
+
onStats({ tokens: n, seconds: +secs.toFixed(1),
|
| 57 |
+
tokPerSec: gen > 0 ? +(n / gen).toFixed(1) : 0,
|
| 58 |
+
ttftSeconds: tFirst ? +((tFirst - t0) / 1000).toFixed(1) : null, final: !!final })
|
| 59 |
+
}
|
| 60 |
const stream = await w.createChatCompletion({
|
| 61 |
messages: [{ role: 'system', content: system }, { role: 'user', content: user }],
|
| 62 |
max_tokens: maxTokens, temperature, top_k: 40, top_p: 0.9, stream: true,
|
| 63 |
})
|
| 64 |
for await (const chunk of stream) {
|
| 65 |
const piece = chunk?.choices?.[0]?.delta?.content || ''
|
| 66 |
+
if (!piece) continue
|
| 67 |
+
if (tFirst === null) tFirst = performance.now()
|
| 68 |
+
full += piece; n++
|
| 69 |
+
if (onToken) onToken(piece)
|
| 70 |
+
emitStats(false)
|
| 71 |
}
|
| 72 |
+
emitStats(true)
|
| 73 |
+
const gen = tFirst ? (performance.now() - tFirst) / 1000 : (performance.now() - t0) / 1000
|
| 74 |
+
return { text: full, stats: { tokens: n, tokPerSec: gen > 0 ? +(n / gen).toFixed(1) : 0 } }
|
| 75 |
}
|
| 76 |
const p = _chain.then(run, run)
|
| 77 |
+
_chain = p.catch(() => {})
|
| 78 |
return p
|
| 79 |
}
|
| 80 |
+
|
| 81 |
+
// ── Cache management (wllama ModelManager) ────────────────────────────────────
|
| 82 |
+
// Match cached files to catalog entries by GGUF filename (cache names embed it).
|
| 83 |
+
async function _cachedModels() {
|
| 84 |
+
try { return await mm.getModels() } catch { return [] }
|
| 85 |
+
}
|
| 86 |
+
function _matches(model, entry) {
|
| 87 |
+
const names = (model.files || []).map((f) => f.name || '').join('|')
|
| 88 |
+
return names.includes(entry.file)
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
// Set of catalog model ids currently downloaded in the browser.
|
| 92 |
+
export async function cachedSet() {
|
| 93 |
+
const models = await _cachedModels()
|
| 94 |
+
const ids = new Set()
|
| 95 |
+
for (const m of models) for (const c of MODELS) if (_matches(m, c)) ids.add(c.id)
|
| 96 |
+
return ids
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
// Delete a model from the browser cache (unloading it first if it's the active one).
|
| 100 |
+
export async function deleteCached(id) {
|
| 101 |
+
const c = getModel(id)
|
| 102 |
+
if (_loadedId === id && _wllama) { try { await _wllama.exit() } catch { /* ignore */ } _wllama = null; _loadedId = null; _loadPromise = null }
|
| 103 |
+
const models = await _cachedModels()
|
| 104 |
+
for (const m of models) if (_matches(m, c) && m.remove) await m.remove()
|
| 105 |
+
}
|