polats Claude Opus 4.8 (1M context) commited on
Commit
03708ca
·
1 Parent(s): bd4a81a

Personas + War Diary now run llama.cpp IN THE BROWSER via wllama (local-first)

Browse files

Generation moved off the slow Space CPU (0.58 tok/s) onto the visitor's device:
wllama (llama.cpp compiled to WASM) loads a small GGUF from HF and streams tokens
client-side. Earns 🔌 Off the Grid (no cloud/server inference) + 🦙 Llama Champion
(it IS llama.cpp). Reuses woid's persona parser (vendored personaParse.js) +
extractLivePersona. New web/wllamaLlm.js + personaPrompts.js + diaryPanel.js; the
Barracks + Personas tabs are now gr.HTML divs filled by the in-browser panels. The
Python llama-cpp-python path stays as a lazy fallback (no startup prewarm).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

Files changed (7) hide show
  1. app.py +12 -12
  2. web/diaryPanel.js +57 -0
  3. web/personaPanel.js +30 -28
  4. web/personaParse.js +100 -0
  5. web/personaPrompts.js +30 -0
  6. web/tiny.js +3 -1
  7. web/wllamaLlm.js +55 -0
app.py CHANGED
@@ -91,10 +91,11 @@ THEME = ('<style>'
91
  # Gradio still hides it (display:none on the inactive tab's ancestor).
92
  '.gradio-container .tabitem{padding:0 !important;}'
93
  '.gradio-container .tabs{border:0 !important;}'
94
- '#sprite-stage,#persona-stage{position:fixed !important;top:0;bottom:0;right:0;'
95
- 'left:var(--tac-w,240px);height:auto !important;z-index:1;}'
96
- 'body.tac-collapsed #sprite-stage,body.tac-collapsed #persona-stage{left:0;}'
97
- '@media (max-width:768px){#sprite-stage,#persona-stage{left:0;}}'
 
98
  '</style>')
99
  HEAD = ('<meta http-equiv="Content-Security-Policy" content="upgrade-insecure-requests">'
100
  + HIDE_TABS + FONTS + THEME +
@@ -187,14 +188,11 @@ with gr.Blocks(title="Tiny Army") as demo:
187
  battle_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
188
  sprite_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
189
  with gr.Tab("Barracks"):
190
- with gr.Row():
191
- unit = gr.Textbox("Bram the Warrior", label="Unit")
192
- traits = gr.Textbox("Cautious, Veteran, Vengeful", label="Traits")
193
- out = gr.Textbox(label="War diary", lines=6)
194
- gr.Button("Write diary", variant="primary").click(diary, [unit, traits], out)
195
  with gr.Tab("Personas"):
196
- # The vanilla persona panel (web/personaPanel.js) builds the whole page
197
- # into this div and streams from /persona/generate/stream.
198
  gr.HTML('<div id="persona-stage" style="overflow:hidden"></div>')
199
 
200
  # Mount Gradio on FastAPI so we can also serve the JS module + the sprite assets.
@@ -312,7 +310,9 @@ app = gr.mount_gradio_app(fastapi_app, demo, path="/", head=HEAD, theme=gr.theme
312
 
313
 
314
  if __name__ == "__main__":
315
- llm.prewarm() # load the GGUF in the background so the first request is warm
 
 
316
  # proxy_headers + trusting forwarded IPs lets Gradio honour X-Forwarded-Proto
317
  # from HF's edge, so it generates https (not http) asset URLs behind the proxy.
318
  uvicorn.run(app, host="0.0.0.0", port=7860,
 
91
  # Gradio still hides it (display:none on the inactive tab's ancestor).
92
  '.gradio-container .tabitem{padding:0 !important;}'
93
  '.gradio-container .tabs{border:0 !important;}'
94
+ '#sprite-stage,#persona-stage,#diary-stage{position:fixed !important;top:0;bottom:0;'
95
+ 'right:0;left:var(--tac-w,240px);height:auto !important;z-index:1;}'
96
+ 'body.tac-collapsed #sprite-stage,body.tac-collapsed #persona-stage,'
97
+ 'body.tac-collapsed #diary-stage{left:0;}'
98
+ '@media (max-width:768px){#sprite-stage,#persona-stage,#diary-stage{left:0;}}'
99
  '</style>')
100
  HEAD = ('<meta http-equiv="Content-Security-Policy" content="upgrade-insecure-requests">'
101
  + HIDE_TABS + FONTS + THEME +
 
188
  battle_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
189
  sprite_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
190
  with gr.Tab("Barracks"):
191
+ # In-browser war-diary (web/diaryPanel.js → wllama, llama.cpp WASM). Runs
192
+ # entirely on the visitor's device — no server inference.
193
+ gr.HTML('<div id="diary-stage" style="overflow:hidden"></div>')
 
 
194
  with gr.Tab("Personas"):
195
+ # In-browser persona generator (web/personaPanel.js wllama).
 
196
  gr.HTML('<div id="persona-stage" style="overflow:hidden"></div>')
197
 
198
  # Mount Gradio on FastAPI so we can also serve the JS module + the sprite assets.
 
310
 
311
 
312
  if __name__ == "__main__":
313
+ # The default UI runs the model IN THE BROWSER (wllama). The Python llama.cpp path
314
+ # stays as a lazy fallback (only loads if /persona/generate/stream is hit), so we
315
+ # don't pre-download it here.
316
  # proxy_headers + trusting forwarded IPs lets Gradio honour X-Forwarded-Proto
317
  # from HF's edge, so it generates https (not http) asset URLs behind the proxy.
318
  uvicorn.run(app, host="0.0.0.0", port=7860,
web/diaryPanel.js ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // War-diary panel — vanilla DOM, mounted by tiny.js into #diary-stage. Streams a
2
+ // first-person diary entry generated ON THE USER'S DEVICE via wllama (llama.cpp
3
+ // WASM). Shares the persona panel's styling (.persona-*) and the same local model.
4
+ import { streamChat, ensureModel, modelLabel } from '/web/wllamaLlm.js'
5
+ import { DIARY_SYSTEM, diaryUserPrompt } from '/web/personaPrompts.js'
6
+
7
+ function el(tag, props = {}, kids = []) {
8
+ const n = document.createElement(tag)
9
+ for (const [k, v] of Object.entries(props)) {
10
+ if (k === 'class') n.className = v
11
+ else if (k.startsWith('on') && typeof v === 'function') n.addEventListener(k.slice(2), v)
12
+ else if (v != null) n.setAttribute(k, v)
13
+ }
14
+ for (const kid of [].concat(kids)) if (kid != null) n.append(kid)
15
+ return n
16
+ }
17
+
18
+ export function mountDiaryPanel(host) {
19
+ const unit = el('input', { class: 'persona-input', type: 'text', value: 'Bram the Warrior' })
20
+ const traits = el('input', { class: 'persona-input', type: 'text', value: 'Cautious, Veteran, Vengeful' })
21
+ const status = el('div', { class: 'persona-status' }, 'Runs on your device — no cloud.')
22
+ const btn = el('button', { class: 'persona-go', type: 'button' }, '✒ Write war diary')
23
+ const out = el('div', { class: 'persona-about' }, 'A first-person diary entry, written by a small llama.cpp model in your browser.')
24
+
25
+ const controls = el('aside', { class: 'persona-controls' }, [
26
+ el('h2', { class: 'persona-title' }, 'War Diary'),
27
+ el('label', { class: 'persona-label' }, 'Unit'), unit,
28
+ el('label', { class: 'persona-label' }, 'Traits'), traits,
29
+ btn, status,
30
+ ])
31
+ const result = el('div', { class: 'persona-result' }, [out])
32
+ host.appendChild(el('div', { class: 'persona-view' }, [controls, result]))
33
+
34
+ let busy = false
35
+ async function write() {
36
+ if (busy) return
37
+ busy = true; btn.disabled = true
38
+ const header = `— Diary of ${(unit.value || 'a nameless soldier').trim()} —\n\n`
39
+ out.textContent = header
40
+ try {
41
+ status.textContent = 'loading the model into your browser…'
42
+ await ensureModel((frac) => { status.textContent = `downloading model… ${Math.round(frac * 100)}% (one-time, then cached)` })
43
+ status.textContent = `writing with ${modelLabel()} — on your device…`
44
+ let first = true
45
+ await streamChat(DIARY_SYSTEM, diaryUserPrompt(unit.value, traits.value), {
46
+ maxTokens: 200, temperature: 0.9,
47
+ onToken: (piece) => { if (first) { out.textContent = header; first = false } out.textContent += piece },
48
+ })
49
+ status.textContent = 'written ✓ (generated locally)'
50
+ } catch (e) {
51
+ status.textContent = `couldn't run the local model: ${e.message || e}`
52
+ } finally {
53
+ busy = false; btn.disabled = false
54
+ }
55
+ }
56
+ btn.addEventListener('click', write)
57
+ }
web/personaPanel.js CHANGED
@@ -1,8 +1,11 @@
1
  // Tiny Army persona panel — vanilla DOM, mounted by tiny.js into #persona-stage.
2
- // Reuses woid's persona SSE client (/web/personaStream.js) VERBATIM against the
3
- // Space's own /persona/generate/stream endpoint, live-updating name/about as tokens
4
- // stream (the same extractLivePersona trick woid uses). No Pixi, no framework.
5
- import { streamGenerateProfile, extractLivePersona } from '/web/personaStream.js'
 
 
 
6
 
7
  const CLASSES = ['Warrior', 'Ranger', 'Monk', 'Assassin', 'Mage', 'Paladin', 'Cleric', 'Knight']
8
 
@@ -19,17 +22,16 @@ function el(tag, props = {}, kids = []) {
19
  }
20
 
21
  export function mountPersonaPanel(host, opts = {}) {
22
- const path = opts.path || '/persona/generate/stream'
23
  const classes = opts.classes || CLASSES
24
 
25
  const sel = el('select', { class: 'persona-input' }, classes.map((c) => el('option', { value: c }, c)))
26
  const seed = el('input', { class: 'persona-input', type: 'text', placeholder: 'a word, a vibe… (optional)' })
27
- const status = el('div', { class: 'persona-status' })
28
  const btn = el('button', { class: 'persona-go', type: 'button' }, '⚔ Recruit a soldier')
29
 
30
  const nameEl = el('div', { class: 'persona-name' }, 'Your soldier')
31
  const tagsEl = el('div', { class: 'persona-tags' })
32
- const aboutEl = el('div', { class: 'persona-about' }, 'Pick a class and recruit — the model writes their legend.')
33
 
34
  const controls = el('aside', { class: 'persona-controls' }, [
35
  el('h2', { class: 'persona-title' }, 'Recruit'),
@@ -50,31 +52,31 @@ export function mountPersonaPanel(host, opts = {}) {
50
  if (busy) return
51
  busy = true; btn.disabled = true
52
  nameEl.textContent = '…'; aboutEl.textContent = ''; tagsEl.replaceChildren()
53
- status.textContent = 'summoning the model… (first run downloads it)'
54
- let acc = ''
55
  try {
56
- await streamGenerateProfile({
57
- bridgeUrl: '', path, body: { class: sel.value, seed: seed.value },
58
- onEvent: (evt, parsed) => {
59
- if (evt === 'model') status.textContent = `writing with ${parsed?.model || 'the model'}…`
60
- else if (evt === 'delta') {
61
- acc += (parsed?.content || '')
62
- const live = extractLivePersona(acc)
63
- if (live.name) nameEl.textContent = live.name
64
- if (live.about) aboutEl.textContent = live.about
65
- } else if (evt === 'persona-done') {
66
- if (parsed?.name) nameEl.textContent = parsed.name
67
- if (parsed?.about) aboutEl.textContent = parsed.about
68
- setTags(parsed || {})
69
- } else if (evt === 'done') {
70
- status.textContent = 'enlisted ✓'
71
- } else if (evt === 'error') {
72
- status.textContent = `couldn't recruit: ${parsed?.error || 'unknown error'}`
73
- }
74
  },
75
  })
 
 
 
 
 
 
 
 
 
76
  } catch (e) {
77
- status.textContent = `couldn't recruit: ${e.message || e}`
78
  } finally {
79
  busy = false; btn.disabled = false
80
  }
 
1
  // Tiny Army persona panel — vanilla DOM, mounted by tiny.js into #persona-stage.
2
+ // Generation runs ON THE USER'S DEVICE via wllama (llama.cpp WASM) no server, no
3
+ // cloud (🔌 Off the Grid + 🦙 Llama Champion). Reuses woid's persona JSON parser
4
+ // (personaParse.js) + live-extraction (extractLivePersona) verbatim.
5
+ import { streamChat, ensureModel, modelLabel } from '/web/wllamaLlm.js'
6
+ import { extractLivePersona } from '/web/personaStream.js'
7
+ import { parsePersonaJson } from '/web/personaParse.js'
8
+ import { PERSONA_SYSTEM, personaUserPrompt } from '/web/personaPrompts.js'
9
 
10
  const CLASSES = ['Warrior', 'Ranger', 'Monk', 'Assassin', 'Mage', 'Paladin', 'Cleric', 'Knight']
11
 
 
22
  }
23
 
24
  export function mountPersonaPanel(host, opts = {}) {
 
25
  const classes = opts.classes || CLASSES
26
 
27
  const sel = el('select', { class: 'persona-input' }, classes.map((c) => el('option', { value: c }, c)))
28
  const seed = el('input', { class: 'persona-input', type: 'text', placeholder: 'a word, a vibe… (optional)' })
29
+ const status = el('div', { class: 'persona-status' }, 'Runs on your device — no cloud.')
30
  const btn = el('button', { class: 'persona-go', type: 'button' }, '⚔ Recruit a soldier')
31
 
32
  const nameEl = el('div', { class: 'persona-name' }, 'Your soldier')
33
  const tagsEl = el('div', { class: 'persona-tags' })
34
+ const aboutEl = el('div', { class: 'persona-about' }, 'Pick a class and recruit — a small llama.cpp model in your browser writes their legend.')
35
 
36
  const controls = el('aside', { class: 'persona-controls' }, [
37
  el('h2', { class: 'persona-title' }, 'Recruit'),
 
52
  if (busy) return
53
  busy = true; btn.disabled = true
54
  nameEl.textContent = '…'; aboutEl.textContent = ''; tagsEl.replaceChildren()
 
 
55
  try {
56
+ status.textContent = 'loading the model into your browser…'
57
+ await ensureModel((frac) => { status.textContent = `downloading model… ${Math.round(frac * 100)}% (one-time, then cached)` })
58
+ status.textContent = `writing with ${modelLabel()} — on your device…`
59
+ let acc = ''
60
+ await streamChat(PERSONA_SYSTEM, personaUserPrompt(sel.value, seed.value), {
61
+ maxTokens: 200,
62
+ onToken: (piece) => {
63
+ acc += piece
64
+ const live = extractLivePersona(acc)
65
+ if (live.name) nameEl.textContent = live.name
66
+ if (live.about) aboutEl.textContent = live.about
 
 
 
 
 
 
 
67
  },
68
  })
69
+ try {
70
+ const p = parsePersonaJson(acc)
71
+ if (p.name) nameEl.textContent = p.name
72
+ aboutEl.textContent = p.about
73
+ setTags(p)
74
+ status.textContent = 'enlisted ✓ (generated locally)'
75
+ } catch (e) {
76
+ status.textContent = `the model rambled — couldn't parse a clean persona (${e.message || e})`
77
+ }
78
  } catch (e) {
79
+ status.textContent = `couldn't run the local model: ${e.message || e}`
80
  } finally {
81
  busy = false; btn.disabled = false
82
  }
web/personaParse.js ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Persona JSON parsing helpers.
3
+ *
4
+ * Canonical home: agent-sandbox/woid-core/persona/parse.js.
5
+ *
6
+ * LLMs return persona JSON wrapped in noise: code fences, preambles,
7
+ * trailing commentary, occasionally multi-object emissions. These helpers
8
+ * defensively extract the first valid JSON object and sanitize the
9
+ * standard fields (name, about, specialty, personality).
10
+ *
11
+ * Pi-bridge has its own generatePersona() that uses these as building
12
+ * blocks. Brain-server's lib/persona.js uses these via a thin wrapper.
13
+ */
14
+
15
+ /**
16
+ * Tighten a model-returned name: strip wrapping punctuation, collapse
17
+ * whitespace, reject obvious "name: foo" LLM leakage. Returns "" if
18
+ * the name fails sanity (too short/long, or looks like a key-value pair).
19
+ */
20
+ export function sanitizeName(raw) {
21
+ const s = String(raw ?? "")
22
+ .replace(/^[\s"'“”‘’`]+|[\s"'“”‘’`]+$/gu, "")
23
+ .replace(/\s+/g, " ")
24
+ .trim();
25
+ if (s.length < 2 || s.length > 40) return "";
26
+ if (/^(name|character|persona)\s*[:=]/i.test(s)) return "";
27
+ return s;
28
+ }
29
+
30
+ /**
31
+ * Trim a short tag (specialty / personality). Returns null for empty
32
+ * input, ellipsizes anything over 48 chars to 46+ellipsis.
33
+ */
34
+ export function trimTag(raw) {
35
+ if (typeof raw !== "string") return null;
36
+ const s = raw.trim().replace(/\.\s*$/, "");
37
+ if (!s) return null;
38
+ return s.length > 48 ? s.slice(0, 46).trim() + "…" : s;
39
+ }
40
+
41
+ /**
42
+ * Walk forward from each `{` until we find a bracket-balanced, string-aware
43
+ * matching `}`. First successful parse wins. Handles trailing prose, multi-
44
+ * object emissions, and embedded `}` characters inside string literals.
45
+ * Returns the parsed object or null.
46
+ */
47
+ export function extractFirstJsonObject(raw) {
48
+ for (let i = 0; i < raw.length; i++) {
49
+ if (raw[i] !== "{") continue;
50
+ let depth = 0, inStr = false, esc = false;
51
+ for (let j = i; j < raw.length; j++) {
52
+ const ch = raw[j];
53
+ if (inStr) {
54
+ if (esc) esc = false;
55
+ else if (ch === "\\") esc = true;
56
+ else if (ch === '"') inStr = false;
57
+ continue;
58
+ }
59
+ if (ch === '"') inStr = true;
60
+ else if (ch === "{") depth++;
61
+ else if (ch === "}") {
62
+ depth--;
63
+ if (depth === 0) {
64
+ const slice = raw.slice(i, j + 1);
65
+ try { return JSON.parse(slice); } catch { break; }
66
+ }
67
+ }
68
+ }
69
+ }
70
+ return null;
71
+ }
72
+
73
+ /**
74
+ * Parse a persona JSON response from an LLM. Strips ```json fences,
75
+ * uses bracket-balanced extraction, sanitizes name and trims tags.
76
+ *
77
+ * Throws if no parseable JSON or no `about` field — these are the two
78
+ * load-bearing fields. Optional fields: avatar_hint, vibe, specialty,
79
+ * personality. avatar_hint / vibe are brain-server-style; specialty /
80
+ * personality are pi-bridge-style. Both are surfaced if present.
81
+ *
82
+ * @returns {{name:string|null, about:string, avatar_hint:string, vibe:string, specialty:string|null, personality:string|null}}
83
+ */
84
+ export function parsePersonaJson(raw) {
85
+ const fenced = String(raw ?? "").match(/```(?:json)?\s*([\s\S]*?)```/i);
86
+ const candidate = (fenced?.[1] ?? String(raw ?? "")).trim();
87
+ const parsed = extractFirstJsonObject(candidate);
88
+ if (!parsed) throw new Error("model did not return a parseable JSON object");
89
+
90
+ const name = sanitizeName(parsed.name ?? parsed.callSign ?? "");
91
+ const about = (typeof parsed.about === "string" ? parsed.about.trim() : "").slice(0, 1000);
92
+ if (!about) throw new Error("model did not return an about");
93
+
94
+ const avatar_hint = String(parsed.avatar_hint ?? parsed.avatarHint ?? "").slice(0, 200);
95
+ const vibe = String(parsed.vibe ?? "").slice(0, 40);
96
+ const specialty = trimTag(parsed.specialty ?? parsed.role ?? parsed.job ?? null);
97
+ const personality = trimTag(parsed.personality ?? parsed.personalityTag ?? null);
98
+
99
+ return { name: name || null, about, avatar_hint, vibe, specialty, personality };
100
+ }
web/personaPrompts.js ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Tiny-Army persona + war-diary prompts (mirrors the Python prompts.py for the
2
+ // in-browser path). War-legend tone, not woid's.
3
+
4
+ export const PERSONA_SYSTEM =
5
+ 'You invent tiny soldiers for a fantasy auto-battler called Tiny Army, where every ' +
6
+ 'fighter writes its own legend. Given a class and an optional seed, return ONE JSON ' +
7
+ 'object and NOTHING else, with exactly these keys:\n' +
8
+ ' "name": a short evocative soldier name (2-4 words),\n' +
9
+ ' "about": 1-3 sentences of backstory in a heroic, slightly wry war-legend tone,\n' +
10
+ ' "specialty": a 1-3 word combat specialty,\n' +
11
+ ' "personality": a 1-3 word personality tag,\n' +
12
+ ' "vibe": a 1-3 word vibe.\n' +
13
+ 'Output strictly valid JSON. No preamble, no code fences, no commentary.'
14
+
15
+ export const DIARY_SYSTEM =
16
+ 'You are a tiny soldier in the auto-battler Tiny Army, writing a short first-person ' +
17
+ 'war-diary entry. Given your name and traits, write 2-4 vivid sentences in first ' +
18
+ 'person about a day on the battlefield — heroic, grounded, a touch of dark humor. ' +
19
+ 'Prose only: no headings, no lists, no preamble.'
20
+
21
+ export function personaUserPrompt(unitClass = '', seed = '') {
22
+ const s = seed && seed.trim() ? ` Seed inspiration: "${seed.trim()}".` : ''
23
+ return `Class: ${(unitClass || 'soldier').trim()}.${s} Return the JSON object now.`
24
+ }
25
+
26
+ export function diaryUserPrompt(unit = '', traits = '') {
27
+ const u = (unit || 'a nameless soldier').trim()
28
+ const t = (traits || 'untested').trim()
29
+ return `Name: ${u}. Traits: ${t}. Write the diary entry.`
30
+ }
web/tiny.js CHANGED
@@ -8,6 +8,7 @@ import { makeTeamBattle, step, FIELD } from '/web/engine.js'
8
  import { sliceGridWith, cellOf, rowFor, facingFor, ANIM } from '/web/sheet.js'
9
  import { mountSpritePlayground } from '/web/playground.js'
10
  import { mountPersonaPanel } from '/web/personaPanel.js'
 
11
 
12
  function whenEl(id, cb) {
13
  const found = document.getElementById(id)
@@ -53,8 +54,9 @@ whenEl('sprite-stage', async (el) => {
53
  playground = mountSpritePlayground(PIXI, el, { packs: man.packs || [], urlFor: spriteUrl })
54
  })
55
 
56
- // ── Personas tabvanilla persona panel streaming from /persona/generate/stream ──
57
  whenEl('persona-stage', (el) => { mountPersonaPanel(el) })
 
58
 
59
  // ── Battle tab (real sprites, reusing the engine + shared renderer) ──────────
60
  const PLAYERS = [
 
8
  import { sliceGridWith, cellOf, rowFor, facingFor, ANIM } from '/web/sheet.js'
9
  import { mountSpritePlayground } from '/web/playground.js'
10
  import { mountPersonaPanel } from '/web/personaPanel.js'
11
+ import { mountDiaryPanel } from '/web/diaryPanel.js'
12
 
13
  function whenEl(id, cb) {
14
  const found = document.getElementById(id)
 
54
  playground = mountSpritePlayground(PIXI, el, { packs: man.packs || [], urlFor: spriteUrl })
55
  })
56
 
57
+ // ── Personas + War Diary tabs in-browser llama.cpp (wllama), runs on the device ──
58
  whenEl('persona-stage', (el) => { mountPersonaPanel(el) })
59
+ whenEl('diary-stage', (el) => { mountDiaryPanel(el) })
60
 
61
  // ── Battle tab (real sprites, reusing the engine + shared renderer) ──────────
62
  const PLAYERS = [
web/wllamaLlm.js ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // In-browser llama.cpp via wllama (WASM) — runs a GGUF from Hugging Face on the
2
+ // USER's device. This is the local-first path: zero cloud/server inference, so it
3
+ // earns 🔌 Off the Grid AND 🦙 Llama Champion (wllama IS llama.cpp, compiled to
4
+ // WebAssembly). It also dodges the free Space's ~0.6 tok/s CPU — generation runs on
5
+ // the visitor's hardware (typically 10–50× faster). Streams tokens like the server
6
+ // path did, so the persona/diary panels barely change.
7
+ import { Wllama } from 'https://cdn.jsdelivr.net/npm/@wllama/wllama@3.4.1/esm/index.js'
8
+
9
+ const WLLAMA_VER = '3.4.1'
10
+ const WASM = { default: `https://cdn.jsdelivr.net/npm/@wllama/wllama@${WLLAMA_VER}/esm/wasm/wllama.wasm` }
11
+ // Small instruct GGUF: ~380 MB, downloaded once then cached by the browser.
12
+ const MODEL = { repo: 'Qwen/Qwen2.5-0.5B-Instruct-GGUF', file: 'qwen2.5-0.5b-instruct-q4_k_m.gguf' }
13
+
14
+ let _wllama = null
15
+ let _loadPromise = null
16
+ let _chain = Promise.resolve() // serialize completions (one model, no parallel decode)
17
+
18
+ export function modelLabel() { return MODEL.repo.split('/').pop() }
19
+
20
+ // Lazy-load wllama + the GGUF (cached after first download). onProgress(fraction 0..1).
21
+ export function ensureModel(onProgress) {
22
+ if (_wllama) return Promise.resolve(_wllama)
23
+ if (_loadPromise) return _loadPromise
24
+ _loadPromise = (async () => {
25
+ const w = new Wllama(WASM)
26
+ await w.loadModelFromHF(MODEL, {
27
+ n_ctx: 2048,
28
+ progressCallback: ({ loaded, total }) => onProgress && onProgress(total ? loaded / total : 0),
29
+ })
30
+ _wllama = w
31
+ return w
32
+ })().catch((e) => { _loadPromise = null; throw e })
33
+ return _loadPromise
34
+ }
35
+
36
+ // Stream a chat completion in-browser. Calls onToken(piece) per chunk; returns full text.
37
+ // Serialized so two panels can't decode at once.
38
+ export function streamChat(system, user, { maxTokens = 200, temperature = 0.8, onToken } = {}) {
39
+ const run = async () => {
40
+ const w = await ensureModel()
41
+ let full = ''
42
+ const stream = await w.createChatCompletion({
43
+ messages: [{ role: 'system', content: system }, { role: 'user', content: user }],
44
+ max_tokens: maxTokens, temperature, top_k: 40, top_p: 0.9, stream: true,
45
+ })
46
+ for await (const chunk of stream) {
47
+ const piece = chunk?.choices?.[0]?.delta?.content || ''
48
+ if (piece) { full += piece; if (onToken) onToken(piece) }
49
+ }
50
+ return full
51
+ }
52
+ const p = _chain.then(run, run)
53
+ _chain = p.catch(() => {}) // keep the chain alive after errors
54
+ return p
55
+ }