Spaces:

build-small-hackathon
/

tiny-army

Running

polats Claude Opus 4.8 (1M context) commited on 5 days ago

Commit

62070d0

1 Parent(s): 5264fdb

Fix empty-JSON: keep the answer when a model leaves <think> unclosed

Root cause (from a real debug report): Qwen3 on WebLLM opens a <think> tag and jumps
straight to the answer WITHOUT closing it. Our stripThink had a rule that deletes an
unterminated <think> all the way to end-of-string — so it nuked the entire JSON answer
and the parser saw 0 chars ("model returned empty / unparseable JSON"). The model was
actually producing a perfect, fully-filled persona.

Add stripThinkFinal() (drop closed <think>…</think> blocks + stray unclosed tags, KEEP
the answer) and use it for the final persona parse + diary display; keep the aggressive
stripThink only for the live streaming preview. Verified against the exact failing
output: parses name/about/specialty/personality/vibe correctly.

Also: settings section now inserts directly above "Display Theme" (below the Settings
title), not at the very top of the page.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

Files changed (4) hide show

web/diaryPanel.js +5 -1
web/personaPanel.js +3 -3
web/personaPrompts.js +14 -2
web/settingsPanel.js +1 -1

web/diaryPanel.js CHANGED Viewed

@@ -5,7 +5,7 @@
 import { streamChat, ensureModel, currentModel, currentModelId, getEngineId, backendLabel } from '/web/runtime.js'
 import { mountTtsBar } from '/web/ttsBar.js'
 import { makeNarrator, ensureTts } from '/web/tts.js'
-import { DIARY_SYSTEM, diaryUserPrompt, stripThink, noThink, thinkMaxTokens } from '/web/personaPrompts.js'
 function el(tag, props = {}, kids = []) {
   const n = document.createElement(tag)
@@ -148,6 +148,10 @@ export function mountDiaryPanel(host) {
         },
         onStats: (s) => { stats.textContent = `● ${s.tokPerSec} tok/s · ${s.tokens} tok${s.ttftSeconds != null ? ` · first ${s.ttftSeconds}s` : ''}` },
       })
       status.textContent = 'written ✓ (generated locally)'
       lastDebug = buildDebug(lastBody.trim() ? 'written OK' : 'EMPTY OUTPUT', raw)
       dbgEl.textContent = raw

 import { streamChat, ensureModel, currentModel, currentModelId, getEngineId, backendLabel } from '/web/runtime.js'
 import { mountTtsBar } from '/web/ttsBar.js'
 import { makeNarrator, ensureTts } from '/web/tts.js'
+import { DIARY_SYSTEM, diaryUserPrompt, stripThink, stripThinkFinal, noThink, thinkMaxTokens } from '/web/personaPrompts.js'
 function el(tag, props = {}, kids = []) {
   const n = document.createElement(tag)
         },
         onStats: (s) => { stats.textContent = `● ${s.tokPerSec} tok/s · ${s.tokens} tok${s.ttftSeconds != null ? ` · first ${s.ttftSeconds}s` : ''}` },
       })
+      // Final pass: keep the answer even if the model left a <think> unclosed (which the
+      // live streaming strip would have emptied).
+      const finalBody = stripThinkFinal(raw)
+      if (finalBody && finalBody !== lastBody) { lastBody = finalBody; out.textContent = header + lastBody }
       status.textContent = 'written ✓ (generated locally)'
       lastDebug = buildDebug(lastBody.trim() ? 'written OK' : 'EMPTY OUTPUT', raw)
       dbgEl.textContent = raw

web/personaPanel.js CHANGED Viewed

@@ -5,7 +5,7 @@
 import { streamChat, ensureModel, currentModel, currentModelId, getEngineId, backendLabel } from '/web/runtime.js'
 import { extractLivePersona } from '/web/personaStream.js'
 import { parsePersonaJson } from '/web/personaParse.js'
-import { PERSONA_SYSTEM, personaUserPrompt, stripThink, noThink, thinkMaxTokens } from '/web/personaPrompts.js'
 const CLASSES = ['Warrior', 'Ranger', 'Monk', 'Assassin', 'Mage', 'Paladin', 'Cleric', 'Knight']
@@ -54,7 +54,7 @@ export function mountPersonaPanel(host) {
   // A self-contained, paste-ready report of the last run.
   let lastDebug = ''
   function buildDebug(outcome, acc) {
-    const stripped = stripThink(acc || '')
     return [
       '=== TINY ARMY · PERSONA DEBUG ===',
       `engine:   ${getEngineId()} · ${backendLabel()}`,
@@ -107,7 +107,7 @@ export function mountPersonaPanel(host) {
         onStats: showStats,
       })
       try {
-        const p = parsePersonaJson(stripThink(acc))
         if (p.name) nameEl.textContent = p.name
         aboutEl.textContent = p.about
         setTags(p)

 import { streamChat, ensureModel, currentModel, currentModelId, getEngineId, backendLabel } from '/web/runtime.js'
 import { extractLivePersona } from '/web/personaStream.js'
 import { parsePersonaJson } from '/web/personaParse.js'
+import { PERSONA_SYSTEM, personaUserPrompt, stripThink, stripThinkFinal, noThink, thinkMaxTokens } from '/web/personaPrompts.js'
 const CLASSES = ['Warrior', 'Ranger', 'Monk', 'Assassin', 'Mage', 'Paladin', 'Cleric', 'Knight']
   // A self-contained, paste-ready report of the last run.
   let lastDebug = ''
   function buildDebug(outcome, acc) {
+    const stripped = stripThinkFinal(acc || '')
     return [
       '=== TINY ARMY · PERSONA DEBUG ===',
       `engine:   ${getEngineId()} · ${backendLabel()}`,
         onStats: showStats,
       })
       try {
+        const p = parsePersonaJson(stripThinkFinal(acc))
         if (p.name) nameEl.textContent = p.name
         aboutEl.textContent = p.about
         setTags(p)

web/personaPrompts.js CHANGED Viewed

@@ -29,8 +29,8 @@ export function diaryUserPrompt(unit = '', traits = '') {
   return `Name: ${u}. Traits: ${t}. Write the diary entry.`
 }
-// Remove a model's <think>…</think> reasoning (Qwen3 etc.) from the visible answer —
-// including a still-open, unterminated block while it's mid-thought.
 export function stripThink(text) {
   return String(text || '')
     .replace(/<think>[\s\S]*?<\/think>/gi, '')
@@ -38,6 +38,18 @@ export function stripThink(text) {
     .replace(/^\s+/, '')
 }
 // Qwen3 is a thinking model: left alone it burns the whole token budget on a
 // <think> block and never reaches the JSON/answer.
 export const isThinking = (modelId) => /qwen3/i.test(String(modelId || ''))

   return `Name: ${u}. Traits: ${t}. Write the diary entry.`
 }
+// LIVE/streaming strip: hide the model's reasoning as it streams, including a still-open
+// <think> block (so the reasoning doesn't flash in the preview mid-thought).
 export function stripThink(text) {
   return String(text || '')
     .replace(/<think>[\s\S]*?<\/think>/gi, '')
     .replace(/^\s+/, '')
 }
+// FINAL strip (after generation is done): drop completed <think>…</think> blocks and any
+// stray, never-closed <think>/</think> tags, but KEEP the answer that follows. Some models
+// (Qwen3 on WebLLM/MLC) open <think> and jump straight to the answer WITHOUT ever closing
+// it — the aggressive streaming strip above would delete the whole answer, so the parser
+// saw 0 chars. This keeps it. Use this for parsing/final display, not for the live preview.
+export function stripThinkFinal(text) {
+  return String(text || '')
+    .replace(/<think>[\s\S]*?<\/think>/gi, '')
+    .replace(/<\/?think>/gi, '')
+    .trim()
+}
 // Qwen3 is a thinking model: left alone it burns the whole token budget on a
 // <think> block and never reaches the JSON/answer.
 export const isThinking = (modelId) => /qwen3/i.test(String(modelId || ''))

web/settingsPanel.js CHANGED Viewed

@@ -32,7 +32,7 @@ function injectInto(sampleSection) {
     'Personas and War Diaries. Runs on your device; models cache in your browser.')
   const modelHost = el('div')
   section.append(h, intro, modelHost)
-  list.insertBefore(section, list.firstChild) // top of the settings page, above Display Theme
   mountModelBar(modelHost)
 }

     'Personas and War Diaries. Runs on your device; models cache in your browser.')
   const modelHost = el('div')
   section.append(h, intro, modelHost)
+  list.insertBefore(section, sampleSection) // directly above Display Theme (below the title)
   mountModelBar(modelHost)
 }