Spaces:

build-small-hackathon
/

tiny-army

Running

polats Claude Opus 4.8 (1M context) commited on Jun 4

Commit

898540a

1 Parent(s): cd43499

Settings: model section to top; default WebLLM + Qwen3 0.6B; copyable debug

- Inject the "Local AI Model" section at the TOP of Gradio's settings page.
- Default engine = WebLLM (best on mobile WebGPU; falls back to wllama with no
WebGPU) and default model = Qwen3 0.6B.
- Persona + diary panels gain a "📋 Copy debug" button that copies a paste-ready
report (engine · backend · model · input · outcome · raw output · stripped text,
plus exception/stack), so failures like the empty-JSON persona can be sent back
verbatim. Clipboard-blocked contexts fall back to selecting the text.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

Files changed (6) hide show

web/diaryPanel.js +38 -6
web/engineWebllm.js +1 -1
web/personaPanel.js +49 -11
web/runtime.js +3 -1
web/settingsPanel.js +1 -1
web/shell/persona.css +6 -0

web/diaryPanel.js CHANGED Viewed

@@ -2,7 +2,7 @@
 // diary entry generated ON THE USER'S DEVICE via the LLM facade, and can READ IT ALOUD
 // on the user's device too (Kokoro / Kitten / Web Speech via the TTS facade). Shares
 // the persona styling (.persona-*), the model picker, and tok/s stats.
-import { streamChat, ensureModel, currentModel } from '/web/runtime.js'
 import { mountTtsBar } from '/web/ttsBar.js'
 import { makeNarrator, ensureTts } from '/web/tts.js'
 import { DIARY_SYSTEM, diaryUserPrompt, stripThink, noThink, thinkMaxTokens } from '/web/personaPrompts.js'
@@ -28,6 +28,9 @@ export function mountDiaryPanel(host) {
   const narrateBtn = el('button', { class: 'persona-go persona-go-alt', type: 'button' }, '🔊 Read aloud')
   const ttsStatus = el('div', { class: 'persona-status tts-status' })
   const out = el('div', { class: 'persona-about' }, 'A first-person diary entry, written by a small model in your browser — and read aloud on your device.')
   // On phones the voice bar collapses behind a tap-to-expand summary so the story
   // isn't pushed off-screen; on desktop it stays open (summary hidden via CSS).
@@ -40,11 +43,36 @@ export function mountDiaryPanel(host) {
     btn, stats, status,
     ttsWrap, narrateBtn, ttsStatus,
   ])
-  const result = el('div', { class: 'persona-result' }, [out])
   host.appendChild(el('div', { class: 'persona-view' }, [controls, result]))
   const ttsBar = mountTtsBar(ttsHost)
   let busy = false
   let lastBody = '' // diary text (no header), what gets narrated
   let narrator = null
@@ -100,6 +128,7 @@ export function mountDiaryPanel(host) {
     // If auto-narrate is on, prepare a live narrator before generation starts.
     let live = null
     let spokenLen = 0
     if (ttsBar.autoNarrate()) {
       try { setSpeaking(true); narrator = live = await makeReadyNarrator() }
       catch (e) { setSpeaking(false); ttsStatus.textContent = `voice unavailable: ${e.message || e}` }
@@ -109,9 +138,8 @@ export function mountDiaryPanel(host) {
       status.textContent = `loading ${currentModel().label} into your browser…`
       await ensureModel((frac, label) => { status.textContent = label || `downloading ${currentModel().label}… ${Math.round(frac * 100)}% (one-time)` })
       status.textContent = `writing on your device with ${currentModel().label}…`
-      let raw = ''
-      await streamChat(DIARY_SYSTEM, diaryUserPrompt(unit.value, traits.value) + noThink(currentModel().id), {
-        maxTokens: thinkMaxTokens(currentModel().id, 220), temperature: 0.9,
         onToken: (piece) => {
           raw += piece
           lastBody = stripThink(raw)
@@ -121,9 +149,13 @@ export function mountDiaryPanel(host) {
         onStats: (s) => { stats.textContent = `● ${s.tokPerSec} tok/s · ${s.tokens} tok${s.ttftSeconds != null ? ` · first ${s.ttftSeconds}s` : ''}` },
       })
       status.textContent = 'written ✓ (generated locally)'
       if (live) live.end() // flush the tail sentence; onState resets the button
     } catch (e) {
-      status.textContent = `couldn't run the local model: ${e.message || e}`
       if (live) live.stop()
     } finally {
       busy = false; btn.disabled = false

 // diary entry generated ON THE USER'S DEVICE via the LLM facade, and can READ IT ALOUD
 // on the user's device too (Kokoro / Kitten / Web Speech via the TTS facade). Shares
 // the persona styling (.persona-*), the model picker, and tok/s stats.
+import { streamChat, ensureModel, currentModel, currentModelId, getEngineId, backendLabel } from '/web/runtime.js'
 import { mountTtsBar } from '/web/ttsBar.js'
 import { makeNarrator, ensureTts } from '/web/tts.js'
 import { DIARY_SYSTEM, diaryUserPrompt, stripThink, noThink, thinkMaxTokens } from '/web/personaPrompts.js'
   const narrateBtn = el('button', { class: 'persona-go persona-go-alt', type: 'button' }, '🔊 Read aloud')
   const ttsStatus = el('div', { class: 'persona-status tts-status' })
   const out = el('div', { class: 'persona-about' }, 'A first-person diary entry, written by a small model in your browser — and read aloud on your device.')
+  const dbgEl = el('pre', { class: 'persona-think' })
+  const copyBtn = el('button', { class: 'persona-copy', type: 'button' }, '📋 Copy debug')
+  const dbgWrap = el('details', { class: 'persona-think-wrap' }, [el('summary', {}, 'model output / debug (raw)'), copyBtn, dbgEl])
   // On phones the voice bar collapses behind a tap-to-expand summary so the story
   // isn't pushed off-screen; on desktop it stays open (summary hidden via CSS).
     btn, stats, status,
     ttsWrap, narrateBtn, ttsStatus,
   ])
+  const result = el('div', { class: 'persona-result' }, [out, dbgWrap])
   host.appendChild(el('div', { class: 'persona-view' }, [controls, result]))
   const ttsBar = mountTtsBar(ttsHost)
+  let lastDebug = ''
+  function buildDebug(outcome, raw) {
+    return [
+      '=== TINY ARMY · DIARY DEBUG ===',
+      `engine:   ${getEngineId()} · ${backendLabel()}`,
+      `model:    ${currentModelId()} (${currentModel().label})`,
+      `input:    unit=${unit.value} traits=${traits.value} maxTokens=${thinkMaxTokens(currentModelId(), 220)}`,
+      `outcome:  ${outcome}`,
+      `--- raw output (${(raw || '').length} chars) ---`,
+      raw || '(empty)',
+    ].join('\n')
+  }
+  copyBtn.addEventListener('click', async () => {
+    const text = lastDebug || buildDebug('(no generation yet)', '')
+    try {
+      await navigator.clipboard.writeText(text)
+      copyBtn.textContent = '✓ copied'; setTimeout(() => { copyBtn.textContent = '📋 Copy debug' }, 1600)
+    } catch {
+      dbgEl.textContent = text; dbgWrap.open = true
+      const r = document.createRange(); r.selectNodeContents(dbgEl)
+      const s = getSelection(); s.removeAllRanges(); s.addRange(r)
+      copyBtn.textContent = 'selected ↓ — ⌘/Ctrl+C'
+    }
+  })
   let busy = false
   let lastBody = '' // diary text (no header), what gets narrated
   let narrator = null
     // If auto-narrate is on, prepare a live narrator before generation starts.
     let live = null
     let spokenLen = 0
+    let raw = ''
     if (ttsBar.autoNarrate()) {
       try { setSpeaking(true); narrator = live = await makeReadyNarrator() }
       catch (e) { setSpeaking(false); ttsStatus.textContent = `voice unavailable: ${e.message || e}` }
       status.textContent = `loading ${currentModel().label} into your browser…`
       await ensureModel((frac, label) => { status.textContent = label || `downloading ${currentModel().label}… ${Math.round(frac * 100)}% (one-time)` })
       status.textContent = `writing on your device with ${currentModel().label}…`
+      await streamChat(DIARY_SYSTEM, diaryUserPrompt(unit.value, traits.value) + noThink(currentModelId()), {
+        maxTokens: thinkMaxTokens(currentModelId(), 220), temperature: 0.9,
         onToken: (piece) => {
           raw += piece
           lastBody = stripThink(raw)
         onStats: (s) => { stats.textContent = `● ${s.tokPerSec} tok/s · ${s.tokens} tok${s.ttftSeconds != null ? ` · first ${s.ttftSeconds}s` : ''}` },
       })
       status.textContent = 'written ✓ (generated locally)'
+      lastDebug = buildDebug(lastBody.trim() ? 'written OK' : 'EMPTY OUTPUT', raw)
+      dbgEl.textContent = raw
       if (live) live.end() // flush the tail sentence; onState resets the button
     } catch (e) {
+      status.textContent = `couldn't run the local model: ${e.message || e} · 📋 Copy debug`
+      lastDebug = buildDebug('EXCEPTION: ' + (e.message || e) + (e.stack ? '\n' + e.stack : ''), raw)
+      dbgEl.textContent = raw; dbgWrap.open = true
       if (live) live.stop()
     } finally {
       busy = false; btn.disabled = false

web/engineWebllm.js CHANGED Viewed

@@ -82,7 +82,7 @@ export const engine = {
   requiresWebGPU: true,
   available: () => hasGPU(),
   models: MODELS,
-  defaultModel: 'qwen2.5-0.5b',
   ensure, stream,
   backendLabel: () => (hasGPU() ? '⚡ WebGPU' : 'needs WebGPU'),
   // Cache list/delete via MLC's own helpers (Cache API or IndexedDB, per appConfig).

   requiresWebGPU: true,
   available: () => hasGPU(),
   models: MODELS,
+  defaultModel: 'qwen3-0.6b',
   ensure, stream,
   backendLabel: () => (hasGPU() ? '⚡ WebGPU' : 'needs WebGPU'),
   // Cache list/delete via MLC's own helpers (Cache API or IndexedDB, per appConfig).

web/personaPanel.js CHANGED Viewed

@@ -1,8 +1,8 @@
 // Tiny Army persona panel — vanilla DOM, mounted by tiny.js into #persona-stage.
-// Generation runs ON THE USER'S DEVICE via wllama (llama.cpp WASM). Model is pickable
-// (modelBar), generation streams into a live "thinking" view + parsed result, and we
-// show tok/s. Reuses woid's persona parser + extractLivePersona verbatim.
-import { streamChat, ensureModel, currentModel } from '/web/runtime.js'
 import { extractLivePersona } from '/web/personaStream.js'
 import { parsePersonaJson } from '/web/personaParse.js'
 import { PERSONA_SYSTEM, personaUserPrompt, stripThink, noThink, thinkMaxTokens } from '/web/personaPrompts.js'
@@ -29,9 +29,11 @@ export function mountPersonaPanel(host) {
   const nameEl = el('div', { class: 'persona-name' }, 'Your soldier')
   const tagsEl = el('div', { class: 'persona-tags' })
-  const aboutEl = el('div', { class: 'persona-about' }, 'Pick a class and recruit — a small llama.cpp model in your browser writes their legend.')
   const thinkEl = el('pre', { class: 'persona-think' })
-  const thinkWrap = el('details', { class: 'persona-think-wrap' }, [el('summary', {}, 'model output (raw)'), thinkEl])
   const controls = el('aside', { class: 'persona-controls' }, [
     el('label', { class: 'persona-label' }, 'Class'), sel,
@@ -49,6 +51,37 @@ export function mountPersonaPanel(host) {
     stats.textContent = `● ${s.tokPerSec} tok/s · ${s.tokens} tok${s.ttftSeconds != null ? ` · first ${s.ttftSeconds}s` : ''}`
   }
   let busy = false
   async function generate() {
     if (busy) return
@@ -56,13 +89,13 @@ export function mountPersonaPanel(host) {
     if (window.innerWidth <= 768) result.scrollIntoView({ behavior: 'smooth', block: 'start' })
     nameEl.textContent = '…'; aboutEl.textContent = ''; tagsEl.replaceChildren()
     thinkEl.textContent = ''; thinkWrap.open = true; stats.textContent = ''
     try {
       status.textContent = `loading ${currentModel().label} into your browser…`
       await ensureModel((frac, label) => { status.textContent = label || `downloading ${currentModel().label}… ${Math.round(frac * 100)}% (one-time)` })
       status.textContent = `writing on your device with ${currentModel().label}…`
-      let acc = ''
-      await streamChat(PERSONA_SYSTEM, personaUserPrompt(sel.value, seed.value) + noThink(currentModel().id), {
-        maxTokens: thinkMaxTokens(currentModel().id, 220),
         onToken: (piece) => {
           acc += piece
           thinkEl.textContent = acc  // raw view shows the model's <think> reasoning too
@@ -79,12 +112,17 @@ export function mountPersonaPanel(host) {
         aboutEl.textContent = p.about
         setTags(p)
         status.textContent = 'enlisted ✓ (generated locally)'
         thinkWrap.open = false
       } catch (e) {
-        status.textContent = `the model rambled — couldn't parse a clean persona (${e.message || e})`
       }
     } catch (e) {
-      status.textContent = `couldn't run the local model: ${e.message || e}`
     } finally {
       busy = false; btn.disabled = false
     }

 // Tiny Army persona panel — vanilla DOM, mounted by tiny.js into #persona-stage.
+// Generation runs ON THE USER'S DEVICE via the chosen engine (Settings). Streams into a
+// live "thinking" view + parsed result, shows tok/s, and exposes a one-tap "Copy debug"
+// report (engine/model/raw output/error) so failures can be pasted back for triage.
+import { streamChat, ensureModel, currentModel, currentModelId, getEngineId, backendLabel } from '/web/runtime.js'
 import { extractLivePersona } from '/web/personaStream.js'
 import { parsePersonaJson } from '/web/personaParse.js'
 import { PERSONA_SYSTEM, personaUserPrompt, stripThink, noThink, thinkMaxTokens } from '/web/personaPrompts.js'
   const nameEl = el('div', { class: 'persona-name' }, 'Your soldier')
   const tagsEl = el('div', { class: 'persona-tags' })
+  const aboutEl = el('div', { class: 'persona-about' }, 'Pick a class and recruit — a small model in your browser writes their legend.')
   const thinkEl = el('pre', { class: 'persona-think' })
+  const copyBtn = el('button', { class: 'persona-copy', type: 'button' }, '📋 Copy debug')
+  const thinkWrap = el('details', { class: 'persona-think-wrap' },
+    [el('summary', {}, 'model output / debug (raw)'), copyBtn, thinkEl])
   const controls = el('aside', { class: 'persona-controls' }, [
     el('label', { class: 'persona-label' }, 'Class'), sel,
     stats.textContent = `● ${s.tokPerSec} tok/s · ${s.tokens} tok${s.ttftSeconds != null ? ` · first ${s.ttftSeconds}s` : ''}`
   }
+  // A self-contained, paste-ready report of the last run.
+  let lastDebug = ''
+  function buildDebug(outcome, acc) {
+    const stripped = stripThink(acc || '')
+    return [
+      '=== TINY ARMY · PERSONA DEBUG ===',
+      `engine:   ${getEngineId()} · ${backendLabel()}`,
+      `model:    ${currentModelId()} (${currentModel().label})`,
+      `input:    class=${sel.value} seed=${seed.value || '(none)'} maxTokens=${thinkMaxTokens(currentModelId(), 220)}`,
+      `outcome:  ${outcome}`,
+      `--- raw output (${(acc || '').length} chars) ---`,
+      acc || '(empty)',
+      `--- after stripThink → parser (${stripped.length} chars) ---`,
+      stripped || '(empty)',
+    ].join('\n')
+  }
+  copyBtn.addEventListener('click', async () => {
+    const text = lastDebug || buildDebug('(no generation yet)', '')
+    try {
+      await navigator.clipboard.writeText(text)
+      copyBtn.textContent = '✓ copied'
+      setTimeout(() => { copyBtn.textContent = '📋 Copy debug' }, 1600)
+    } catch {
+      // Clipboard blocked (insecure context / permissions) — show it selected to copy by hand.
+      thinkEl.textContent = text; thinkWrap.open = true
+      const r = document.createRange(); r.selectNodeContents(thinkEl)
+      const s = getSelection(); s.removeAllRanges(); s.addRange(r)
+      copyBtn.textContent = 'selected ↓ — ⌘/Ctrl+C'
+    }
+  })
   let busy = false
   async function generate() {
     if (busy) return
     if (window.innerWidth <= 768) result.scrollIntoView({ behavior: 'smooth', block: 'start' })
     nameEl.textContent = '…'; aboutEl.textContent = ''; tagsEl.replaceChildren()
     thinkEl.textContent = ''; thinkWrap.open = true; stats.textContent = ''
+    let acc = ''
     try {
       status.textContent = `loading ${currentModel().label} into your browser…`
       await ensureModel((frac, label) => { status.textContent = label || `downloading ${currentModel().label}… ${Math.round(frac * 100)}% (one-time)` })
       status.textContent = `writing on your device with ${currentModel().label}…`
+      await streamChat(PERSONA_SYSTEM, personaUserPrompt(sel.value, seed.value) + noThink(currentModelId()), {
+        maxTokens: thinkMaxTokens(currentModelId(), 220),
         onToken: (piece) => {
           acc += piece
           thinkEl.textContent = acc  // raw view shows the model's <think> reasoning too
         aboutEl.textContent = p.about
         setTags(p)
         status.textContent = 'enlisted ✓ (generated locally)'
+        lastDebug = buildDebug('parsed OK', acc)
         thinkWrap.open = false
       } catch (e) {
+        status.textContent = `the model rambled — couldn't parse a clean persona (${e.message || e}) · 📋 Copy debug`
+        lastDebug = buildDebug('PARSE ERROR: ' + (e.message || e), acc)
+        thinkWrap.open = true
       }
     } catch (e) {
+      status.textContent = `couldn't run the local model: ${e.message || e} · 📋 Copy debug`
+      lastDebug = buildDebug('EXCEPTION: ' + (e.message || e) + (e.stack ? '\n' + e.stack : ''), acc)
+      thinkWrap.open = true
     } finally {
       busy = false; btn.disabled = false
     }

web/runtime.js CHANGED Viewed

@@ -8,7 +8,9 @@ import { engine as webllm } from '/web/engineWebllm.js'
 import { ensurePersistentStorage } from '/web/storage.js'
 const ENGINES = [wllama, transformers, webllm]
-let activeId = 'wllama'
 const modelSel = {} // engineId -> chosen model id (remembered per engine)
 const eng = () => ENGINES.find((e) => e.id === activeId) || ENGINES[0]

 import { ensurePersistentStorage } from '/web/storage.js'
 const ENGINES = [wllama, transformers, webllm]
+// Default to WebLLM (fastest on mobile WebGPU); fall back to wllama where there's no
+// WebGPU so the app still works. Both default to Qwen3 0.6B (see each engine).
+let activeId = webllm.available() ? 'webllm' : 'wllama'
 const modelSel = {} // engineId -> chosen model id (remembered per engine)
 const eng = () => ENGINES.find((e) => e.id === activeId) || ENGINES[0]

web/settingsPanel.js CHANGED Viewed

@@ -32,7 +32,7 @@ function injectInto(sampleSection) {
     'Personas and War Diaries. Runs on your device; models cache in your browser.')
   const modelHost = el('div')
   section.append(h, intro, modelHost)
-  list.appendChild(section)
   mountModelBar(modelHost)
 }

     'Personas and War Diaries. Runs on your device; models cache in your browser.')
   const modelHost = el('div')
   section.append(h, intro, modelHost)
+  list.insertBefore(section, list.firstChild) // top of the settings page, above Display Theme
   mountModelBar(modelHost)
 }

web/shell/persona.css CHANGED Viewed

@@ -102,6 +102,12 @@
   font-family: var(--p-mono); font-size: 11px; line-height: 1.5; color: var(--p-muted);
   background: var(--p-paper-2); border: 1px solid var(--p-ink); padding: 8px 10px;
 }
 /* ── TTS / voice controls (war-diary read-aloud) ───────────────────────────── */
 .tts-bar { margin-top: 16px; }

   font-family: var(--p-mono); font-size: 11px; line-height: 1.5; color: var(--p-muted);
   background: var(--p-paper-2); border: 1px solid var(--p-ink); padding: 8px 10px;
 }
+.persona-copy {
+  margin-top: 8px; font-family: var(--p-mono) !important; font-size: 10px !important; letter-spacing: .04em; text-transform: uppercase;
+  color: var(--p-transmit) !important; background: var(--p-card) !important; border: 1.5px solid var(--p-transmit) !important;
+  border-radius: 0 !important; padding: 5px 9px !important; cursor: pointer;
+}
+.persona-copy:hover { background: var(--p-transmit) !important; color: var(--p-card) !important; }
 /* ── TTS / voice controls (war-diary read-aloud) ───────────────────────────── */
 .tts-bar { margin-top: 16px; }