polats Claude Opus 4.8 (1M context) commited on
Commit
62070d0
·
1 Parent(s): 5264fdb

Fix empty-JSON: keep the answer when a model leaves <think> unclosed

Browse files

Root cause (from a real debug report): Qwen3 on WebLLM opens a <think> tag and jumps
straight to the answer WITHOUT closing it. Our stripThink had a rule that deletes an
unterminated <think> all the way to end-of-string — so it nuked the entire JSON answer
and the parser saw 0 chars ("model returned empty / unparseable JSON"). The model was
actually producing a perfect, fully-filled persona.

Add stripThinkFinal() (drop closed <think>…</think> blocks + stray unclosed tags, KEEP
the answer) and use it for the final persona parse + diary display; keep the aggressive
stripThink only for the live streaming preview. Verified against the exact failing
output: parses name/about/specialty/personality/vibe correctly.

Also: settings section now inserts directly above "Display Theme" (below the Settings
title), not at the very top of the page.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

web/diaryPanel.js CHANGED
@@ -5,7 +5,7 @@
5
  import { streamChat, ensureModel, currentModel, currentModelId, getEngineId, backendLabel } from '/web/runtime.js'
6
  import { mountTtsBar } from '/web/ttsBar.js'
7
  import { makeNarrator, ensureTts } from '/web/tts.js'
8
- import { DIARY_SYSTEM, diaryUserPrompt, stripThink, noThink, thinkMaxTokens } from '/web/personaPrompts.js'
9
 
10
  function el(tag, props = {}, kids = []) {
11
  const n = document.createElement(tag)
@@ -148,6 +148,10 @@ export function mountDiaryPanel(host) {
148
  },
149
  onStats: (s) => { stats.textContent = `● ${s.tokPerSec} tok/s · ${s.tokens} tok${s.ttftSeconds != null ? ` · first ${s.ttftSeconds}s` : ''}` },
150
  })
 
 
 
 
151
  status.textContent = 'written ✓ (generated locally)'
152
  lastDebug = buildDebug(lastBody.trim() ? 'written OK' : 'EMPTY OUTPUT', raw)
153
  dbgEl.textContent = raw
 
5
  import { streamChat, ensureModel, currentModel, currentModelId, getEngineId, backendLabel } from '/web/runtime.js'
6
  import { mountTtsBar } from '/web/ttsBar.js'
7
  import { makeNarrator, ensureTts } from '/web/tts.js'
8
+ import { DIARY_SYSTEM, diaryUserPrompt, stripThink, stripThinkFinal, noThink, thinkMaxTokens } from '/web/personaPrompts.js'
9
 
10
  function el(tag, props = {}, kids = []) {
11
  const n = document.createElement(tag)
 
148
  },
149
  onStats: (s) => { stats.textContent = `● ${s.tokPerSec} tok/s · ${s.tokens} tok${s.ttftSeconds != null ? ` · first ${s.ttftSeconds}s` : ''}` },
150
  })
151
+ // Final pass: keep the answer even if the model left a <think> unclosed (which the
152
+ // live streaming strip would have emptied).
153
+ const finalBody = stripThinkFinal(raw)
154
+ if (finalBody && finalBody !== lastBody) { lastBody = finalBody; out.textContent = header + lastBody }
155
  status.textContent = 'written ✓ (generated locally)'
156
  lastDebug = buildDebug(lastBody.trim() ? 'written OK' : 'EMPTY OUTPUT', raw)
157
  dbgEl.textContent = raw
web/personaPanel.js CHANGED
@@ -5,7 +5,7 @@
5
  import { streamChat, ensureModel, currentModel, currentModelId, getEngineId, backendLabel } from '/web/runtime.js'
6
  import { extractLivePersona } from '/web/personaStream.js'
7
  import { parsePersonaJson } from '/web/personaParse.js'
8
- import { PERSONA_SYSTEM, personaUserPrompt, stripThink, noThink, thinkMaxTokens } from '/web/personaPrompts.js'
9
 
10
  const CLASSES = ['Warrior', 'Ranger', 'Monk', 'Assassin', 'Mage', 'Paladin', 'Cleric', 'Knight']
11
 
@@ -54,7 +54,7 @@ export function mountPersonaPanel(host) {
54
  // A self-contained, paste-ready report of the last run.
55
  let lastDebug = ''
56
  function buildDebug(outcome, acc) {
57
- const stripped = stripThink(acc || '')
58
  return [
59
  '=== TINY ARMY · PERSONA DEBUG ===',
60
  `engine: ${getEngineId()} · ${backendLabel()}`,
@@ -107,7 +107,7 @@ export function mountPersonaPanel(host) {
107
  onStats: showStats,
108
  })
109
  try {
110
- const p = parsePersonaJson(stripThink(acc))
111
  if (p.name) nameEl.textContent = p.name
112
  aboutEl.textContent = p.about
113
  setTags(p)
 
5
  import { streamChat, ensureModel, currentModel, currentModelId, getEngineId, backendLabel } from '/web/runtime.js'
6
  import { extractLivePersona } from '/web/personaStream.js'
7
  import { parsePersonaJson } from '/web/personaParse.js'
8
+ import { PERSONA_SYSTEM, personaUserPrompt, stripThink, stripThinkFinal, noThink, thinkMaxTokens } from '/web/personaPrompts.js'
9
 
10
  const CLASSES = ['Warrior', 'Ranger', 'Monk', 'Assassin', 'Mage', 'Paladin', 'Cleric', 'Knight']
11
 
 
54
  // A self-contained, paste-ready report of the last run.
55
  let lastDebug = ''
56
  function buildDebug(outcome, acc) {
57
+ const stripped = stripThinkFinal(acc || '')
58
  return [
59
  '=== TINY ARMY · PERSONA DEBUG ===',
60
  `engine: ${getEngineId()} · ${backendLabel()}`,
 
107
  onStats: showStats,
108
  })
109
  try {
110
+ const p = parsePersonaJson(stripThinkFinal(acc))
111
  if (p.name) nameEl.textContent = p.name
112
  aboutEl.textContent = p.about
113
  setTags(p)
web/personaPrompts.js CHANGED
@@ -29,8 +29,8 @@ export function diaryUserPrompt(unit = '', traits = '') {
29
  return `Name: ${u}. Traits: ${t}. Write the diary entry.`
30
  }
31
 
32
- // Remove a model's <think>…</think> reasoning (Qwen3 etc.) from the visible answer —
33
- // including a still-open, unterminated block while it's mid-thought.
34
  export function stripThink(text) {
35
  return String(text || '')
36
  .replace(/<think>[\s\S]*?<\/think>/gi, '')
@@ -38,6 +38,18 @@ export function stripThink(text) {
38
  .replace(/^\s+/, '')
39
  }
40
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  // Qwen3 is a thinking model: left alone it burns the whole token budget on a
42
  // <think> block and never reaches the JSON/answer.
43
  export const isThinking = (modelId) => /qwen3/i.test(String(modelId || ''))
 
29
  return `Name: ${u}. Traits: ${t}. Write the diary entry.`
30
  }
31
 
32
+ // LIVE/streaming strip: hide the model's reasoning as it streams, including a still-open
33
+ // <think> block (so the reasoning doesn't flash in the preview mid-thought).
34
  export function stripThink(text) {
35
  return String(text || '')
36
  .replace(/<think>[\s\S]*?<\/think>/gi, '')
 
38
  .replace(/^\s+/, '')
39
  }
40
 
41
+ // FINAL strip (after generation is done): drop completed <think>…</think> blocks and any
42
+ // stray, never-closed <think>/</think> tags, but KEEP the answer that follows. Some models
43
+ // (Qwen3 on WebLLM/MLC) open <think> and jump straight to the answer WITHOUT ever closing
44
+ // it — the aggressive streaming strip above would delete the whole answer, so the parser
45
+ // saw 0 chars. This keeps it. Use this for parsing/final display, not for the live preview.
46
+ export function stripThinkFinal(text) {
47
+ return String(text || '')
48
+ .replace(/<think>[\s\S]*?<\/think>/gi, '')
49
+ .replace(/<\/?think>/gi, '')
50
+ .trim()
51
+ }
52
+
53
  // Qwen3 is a thinking model: left alone it burns the whole token budget on a
54
  // <think> block and never reaches the JSON/answer.
55
  export const isThinking = (modelId) => /qwen3/i.test(String(modelId || ''))
web/settingsPanel.js CHANGED
@@ -32,7 +32,7 @@ function injectInto(sampleSection) {
32
  'Personas and War Diaries. Runs on your device; models cache in your browser.')
33
  const modelHost = el('div')
34
  section.append(h, intro, modelHost)
35
- list.insertBefore(section, list.firstChild) // top of the settings page, above Display Theme
36
  mountModelBar(modelHost)
37
  }
38
 
 
32
  'Personas and War Diaries. Runs on your device; models cache in your browser.')
33
  const modelHost = el('div')
34
  section.append(h, intro, modelHost)
35
+ list.insertBefore(section, sampleSection) // directly above Display Theme (below the title)
36
  mountModelBar(modelHost)
37
  }
38