Spaces:

build-small-hackathon
/

tiny-army

Running

polats Claude Opus 4.8 (1M context) commited on Jun 6

Commit

1f1908e

1 Parent(s): de78f87

Add Coding Model setting + Skill Forge sandbox

New "Coding Model" settings section (below Persona Prompt) selecting between
two backends:
• Nemotron 3 Nano 30B-A3B via NVIDIA NIM (integrate.api.nvidia.com,
reasoning off) — too large to self-host on ZeroGPU.
• Mellum2 12B-A2.5B via a ZeroGPU sidecar (polats/tiny-army-mellum-zerogpu),
same /generate(+_stream) contract as the Aya/MiniCPM5 sidecars.

New "Skill Forge" tab (Sandbox group, after Sprite Animations): pick a
recruited hero, describe a skill, and the chosen coding model authors a
tailored skill definition. Reuses personaStore (roster) + streamCoding.

Backend: app.py routes mellum2-zerogpu and nemotron-3-nano-30b-nim model ids
in /text/generate/stream; _nim_text_stream() streams NIM's OpenAI-compatible
SSE. Frontend: codingModel.js store (separate from the persona/diary model),
codingModelBar.js picker, shared sseText.js SSE reader (extracted from
engineServer.js).

Needs Space config: TINY_MELLUM_SPACE + NVIDIA_NIM_API_KEY.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

Files changed (10) hide show

.gitignore +1 -0
app.py +66 -0
web/codingModel.js +59 -0
web/codingModelBar.js +40 -0
web/engineServer.js +2 -36
web/settingsPanel.js +5 -0
web/shell/nav.json +1 -0
web/skillForgePanel.js +106 -0
web/sseText.js +38 -0
web/tiny.js +2 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,4 @@
 __pycache__/
 *.pyc
 .venv/

+.env
 __pycache__/
 *.pyc
 .venv/

app.py CHANGED Viewed

@@ -203,6 +203,10 @@ with gr.Blocks(title="Tiny Army") as ui:
             # div. No dark box here: the picker is the sidebar, the canvas is the
             # stage (framed by CSS), so it mirrors auto-battler's layout.
             gr.HTML('<div id="sprite-stage" style="overflow:hidden"></div>')
         # Pixi canvases start hidden (0×0); re-measure them when a tab is shown.
         battle_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
         sprite_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
@@ -265,6 +269,9 @@ TTS_MODE = os.environ.get("TINY_TTS_MODE", "").strip().lower()
 VOXCPM_SPACE = os.environ.get("TINY_VOXCPM_SPACE", "").strip()
 TINY_AYA_SPACE = os.environ.get("TINY_AYA_SPACE", "").strip()
 MINICPM5_SPACE = os.environ.get("TINY_MINICPM5_SPACE", "").strip()
 _local_tts = None       # VoiceDesign model
 _local_clone = None     # Base model (voice clone) — lazy, only if a clone is requested
 _local_tts_lock = threading.Lock()
@@ -513,6 +520,47 @@ def _minicpm5_stream(system, user, max_tokens, temperature):
     yield from _space_text_stream(MINICPM5_SPACE, system, user, max_tokens, temperature)
 @fastapi_app.post("/voxcpm-tts")
 async def voxcpm_tts(request: Request):
     body = await request.json()
@@ -557,6 +605,10 @@ IMAGE_MODE = os.environ.get("TINY_IMAGE_MODE", "").strip().lower()
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
 NIM_KEY = os.environ.get("NVIDIA_NIM_API_KEY", "")
 _NIM_BASE = "https://ai.api.nvidia.com/v1/genai"
 # id -> NIM FLUX preset (same shapes woid uses: schnell fast, dev higher quality).
 _NIM_PROVIDERS = {
     "flux-schnell": {"model": "black-forest-labs/flux.1-schnell", "steps": 4, "cfg": 0.0},
@@ -819,6 +871,20 @@ async def text_generate_stream(request: Request):
                         if stop.is_set():
                             break
                         loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
                 else:
                     for chunk in llm.stream_chat(
                         system,

             # div. No dark box here: the picker is the sidebar, the canvas is the
             # stage (framed by CSS), so it mirrors auto-battler's layout.
             gr.HTML('<div id="sprite-stage" style="overflow:hidden"></div>')
+        with gr.Tab("Skill Forge"):
+            # Sandbox: the Coding Model (Settings → Coding Model) authors a combat skill
+            # for a chosen hero. Filled by web/skillForgePanel.js.
+            gr.HTML('<div id="skillforge-stage" style="overflow:hidden"></div>')
         # Pixi canvases start hidden (0×0); re-measure them when a tab is shown.
         battle_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
         sprite_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
 VOXCPM_SPACE = os.environ.get("TINY_VOXCPM_SPACE", "").strip()
 TINY_AYA_SPACE = os.environ.get("TINY_AYA_SPACE", "").strip()
 MINICPM5_SPACE = os.environ.get("TINY_MINICPM5_SPACE", "").strip()
+# Coding model (Skill Forge): Mellum2 is a ZeroGPU sidecar (same /generate contract as
+# Aya); Nemotron-30B is too big to self-host, so it runs via hosted NVIDIA NIM (below).
+MELLUM_SPACE = os.environ.get("TINY_MELLUM_SPACE", "").strip()
 _local_tts = None       # VoiceDesign model
 _local_clone = None     # Base model (voice clone) — lazy, only if a clone is requested
 _local_tts_lock = threading.Lock()
     yield from _space_text_stream(MINICPM5_SPACE, system, user, max_tokens, temperature)
+def _mellum_stream(system, user, max_tokens, temperature):
+    yield from _space_text_stream(MELLUM_SPACE, system, user, max_tokens, temperature)
+def _nim_text_stream(system, user, max_tokens, temperature, model=None):
+    """Stream from NVIDIA NIM's OpenAI-compatible chat endpoint (hosted Nemotron). Same
+    nvapi-… key as the portrait NIM. reasoning_budget=0 keeps the coding output clean
+    (Nemotron defaults thinking ON, which would otherwise emit a <think> trace)."""
+    model = model or _NIM_NEMOTRON_MODEL  # defined later in the file; resolve at call time
+    messages = []
+    if system and system.strip():
+        messages.append({"role": "system", "content": system.strip()})
+    messages.append({"role": "user", "content": (user or "").strip()})
+    body = _json.dumps({
+        "model": model,
+        "messages": messages,
+        "max_tokens": int(max_tokens or 512),
+        "temperature": float(temperature if temperature is not None else 0.6),
+        "top_p": 0.95,
+        "stream": True,
+        "reasoning_budget": 0,
+    }).encode()
+    req = urllib.request.Request(_NIM_TEXT_URL, data=body, method="POST", headers={
+        "Authorization": f"Bearer {NIM_KEY}", "Content-Type": "application/json", "Accept": "text/event-stream",
+    })
+    with urllib.request.urlopen(req, timeout=120) as resp:
+        for raw in resp:
+            line = raw.decode("utf-8").strip()
+            if not line.startswith("data:"):
+                continue
+            data = line[5:].strip()
+            if data == "[DONE]":
+                break
+            try:
+                delta = _json.loads(data)["choices"][0]["delta"].get("content")
+            except Exception:  # noqa: BLE001
+                continue
+            if delta:
+                yield delta
 @fastapi_app.post("/voxcpm-tts")
 async def voxcpm_tts(request: Request):
     body = await request.json()
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
 NIM_KEY = os.environ.get("NVIDIA_NIM_API_KEY", "")
 _NIM_BASE = "https://ai.api.nvidia.com/v1/genai"
+# NIM text/LLM lives on a DIFFERENT host (OpenAI-compatible chat completions) than the
+# image/genai host above, but uses the same nvapi-… key. Powers the Nemotron coding model.
+_NIM_TEXT_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
+_NIM_NEMOTRON_MODEL = os.environ.get("TINY_NEMOTRON_NIM_MODEL", "nvidia/nemotron-3-nano-30b-a3b")
 # id -> NIM FLUX preset (same shapes woid uses: schnell fast, dev higher quality).
 _NIM_PROVIDERS = {
     "flux-schnell": {"model": "black-forest-labs/flux.1-schnell", "steps": 4, "cfg": 0.0},
                         if stop.is_set():
                             break
                         loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
+                elif model == "mellum2-zerogpu":
+                    if not MELLUM_SPACE:
+                        raise llm.LlmUnavailable("TINY_MELLUM_SPACE not set")
+                    for chunk in _mellum_stream(system, user, max_tokens, temperature):
+                        if stop.is_set():
+                            break
+                        loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
+                elif model == "nemotron-3-nano-30b-nim":
+                    if not NIM_KEY:
+                        raise llm.LlmUnavailable("NVIDIA_NIM_API_KEY not set")
+                    for chunk in _nim_text_stream(system, user, max_tokens, temperature):
+                        if stop.is_set():
+                            break
+                        loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
                 else:
                     for chunk in llm.stream_chat(
                         system,

web/codingModel.js ADDED Viewed

	@@ -0,0 +1,59 @@

+// Coding-model store for the Skill Forge. SEPARATE from runtime.js (the persona/diary
+// "Text Generation Model") so picking a coding model never clobbers the writer model.
+// Both candidates are large (Mellum2 ~8GB, Nemotron-30B ~24GB) with no browser-viable
+// build, so this is ZeroGPU-only: every choice routes through the same server endpoint
+// (/text/generate/stream) the `server` engine uses, by model id. Mellum2 is a ZeroGPU
+// sidecar (TINY_MELLUM_SPACE); Nemotron-30B routes through hosted NVIDIA NIM
+// (NVIDIA_NIM_API_KEY) since it's too big to self-host.
+import { statsTracker } from '/web/genStats.js'
+import { streamSse } from '/web/sseText.js'
+const MODELS = [
+  { id: 'nemotron-3-nano-30b-nim', label: 'Nemotron 3 Nano 30B-A3B', params: '30B (3B active)', backend: 'NVIDIA NIM', note: 'reasoning + agentic code (NVIDIA)' },
+  { id: 'mellum2-zerogpu', label: 'Mellum2 12B-A2.5B', params: '12B (2.5B active)', backend: 'ZeroGPU sidecar', note: 'code model (JetBrains)' },
+]
+const DEFAULT = 'nemotron-3-nano-30b-nim'
+const KEY = 'tinyarmy.codingModel'
+const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
+const loadStr = (k) => { try { return localStorage.getItem(k) || '' } catch { return '' } }
+let _sel = (() => { const s = loadStr(KEY); return MODELS.some((m) => m.id === s) ? s : DEFAULT })()
+const _listeners = new Set()
+export function onCodingModelChange(fn) { _listeners.add(fn); return () => _listeners.delete(fn) }
+const _notify = () => { for (const fn of _listeners) { try { fn() } catch { /* ignore */ } } }
+export const listCodingModels = () => MODELS
+export const getCodingModelId = () => _sel
+export const currentCodingModel = () => get(_sel)
+export function setCodingModel(id) {
+  if (!MODELS.some((m) => m.id === id) || id === _sel) return
+  _sel = id
+  try { localStorage.setItem(KEY, id) } catch { /* ignore */ }
+  _notify()
+}
+// Stream a coding-model completion. Same delta protocol as engineServer.stream.
+export async function streamCoding(system, user, { maxTokens = 512, temperature = 0.6, onToken, onStats, signal } = {}) {
+  const st = statsTracker(onStats)
+  let full = ''
+  await streamSse('/text/generate/stream', {
+    model: _sel,
+    system,
+    user,
+    max_tokens: maxTokens,
+    temperature,
+  }, {
+    signal,
+    onEvent(evt, parsed) {
+      if (evt !== 'delta') return
+      const piece = parsed?.content || ''
+      if (!piece) return
+      full += piece
+      onToken?.(piece)
+      st.tick()
+    },
+  })
+  return { text: full, stats: st.finish() }
+}

web/codingModelBar.js ADDED Viewed

	@@ -0,0 +1,40 @@

+// Coding-model picker for the Settings page. A trimmed modelBar: no engine selector and
+// no cache/delete controls (these are ZeroGPU sidecars with no browser download). Drives
+// the shared codingModel.js store, so the Skill Forge uses whatever is chosen here.
+import {
+  listCodingModels, getCodingModelId, setCodingModel, currentCodingModel, onCodingModelChange,
+} from '/web/codingModel.js'
+function el(tag, props = {}, kids = []) {
+  const n = document.createElement(tag)
+  for (const [k, v] of Object.entries(props)) {
+    if (k === 'class') n.className = v
+    else if (k.startsWith('on') && typeof v === 'function') n.addEventListener(k.slice(2), v)
+    else if (v != null) n.setAttribute(k, v)
+  }
+  for (const kid of [].concat(kids)) if (kid != null) n.append(kid)
+  return n
+}
+export function mountCodingModelBar(host, { onChange } = {}) {
+  const sel = el('select', { class: 'model-select' })
+  const info = el('div', { class: 'model-info' })
+  host.append(el('div', { class: 'model-bar' }, [
+    el('label', { class: 'persona-label' }, 'Model'),
+    sel,
+    el('div', { class: 'model-row' }, [info]),
+  ]))
+  function render() {
+    sel.replaceChildren(...listCodingModels().map((m) =>
+      el('option', { value: m.id }, `${m.label}${m.params ? ` · ${m.params}` : ''}`)))
+    sel.value = getCodingModelId()
+    const m = currentCodingModel()
+    info.textContent = [m.params, m.backend, m.note].filter(Boolean).join(' · ')
+  }
+  sel.addEventListener('change', () => { setCodingModel(sel.value); render(); onChange && onChange(sel.value) })
+  onCodingModelChange(render)
+  render()
+  return { refresh: render }
+}

web/engineServer.js CHANGED Viewed

@@ -2,6 +2,7 @@
 // lets the same picker choose either a configured local llama.cpp server or a ZeroGPU
 // hosted model such as Tiny Aya Global.
 import { statsTracker } from '/web/genStats.js'
 const MODELS = [
   { id: 'server-local', label: 'Configured server model', params: 'local/remote', note: 'uses TINY_LLM_* on the Space or local app' },
@@ -10,46 +11,11 @@ const MODELS = [
 ]
 const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
-async function streamSse(body, { onEvent, signal } = {}) {
-  const res = await fetch('/text/generate/stream', {
-    method: 'POST',
-    headers: { 'Content-Type': 'application/json' },
-    body: JSON.stringify(body),
-    signal,
-  })
-  if (!res.ok || !res.body) throw new Error(`HTTP ${res.status}`)
-  const reader = res.body.getReader()
-  const decoder = new TextDecoder()
-  let buf = ''
-  while (true) {
-    const { value, done } = await reader.read()
-    if (done) break
-    buf += decoder.decode(value, { stream: true })
-    const events = buf.split(/\n\n/)
-    buf = events.pop() ?? ''
-    for (const evChunk of events) {
-      const lines = evChunk.split('\n')
-      let evt = 'message'
-      const dataLines = []
-      for (const line of lines) {
-        if (line.startsWith('event:')) evt = line.slice(6).trim()
-        else if (line.startsWith('data:')) dataLines.push(line.slice(5).trimStart())
-      }
-      const data = dataLines.join('\n')
-      if (!data) continue
-      let parsed = null
-      try { parsed = JSON.parse(data) } catch { /* ignore */ }
-      if (evt === 'error') throw new Error(parsed?.error || data)
-      onEvent?.(evt, parsed, data)
-    }
-  }
-}
 async function stream(id, system, user, { maxTokens = 200, temperature = 0.8, onToken, onStats, signal } = {}) {
   const m = get(id)
   const st = statsTracker(onStats)
   let full = ''
-  await streamSse({
     model: m.id,
     system,
     user,

 // lets the same picker choose either a configured local llama.cpp server or a ZeroGPU
 // hosted model such as Tiny Aya Global.
 import { statsTracker } from '/web/genStats.js'
+import { streamSse } from '/web/sseText.js'
 const MODELS = [
   { id: 'server-local', label: 'Configured server model', params: 'local/remote', note: 'uses TINY_LLM_* on the Space or local app' },
 ]
 const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
 async function stream(id, system, user, { maxTokens = 200, temperature = 0.8, onToken, onStats, signal } = {}) {
   const m = get(id)
   const st = statsTracker(onStats)
   let full = ''
+  await streamSse('/text/generate/stream', {
     model: m.id,
     system,
     user,

web/settingsPanel.js CHANGED Viewed

@@ -11,6 +11,7 @@ import { mountTtsBar } from '/web/ttsBar.js'
 import { mountImagenBar } from '/web/imagenBar.js'
 import { mountPersonaPromptBar } from '/web/personaPromptBar.js'
 import { mountQualityBar } from '/web/qualityBar.js'
 function el(tag, props = {}, kids = []) {
   const n = document.createElement(tag)
@@ -58,6 +59,10 @@ export function mountSettingsPanel() {
     injectSection(sample, 'tac-persona-prompt-settings', 'Persona Prompt',
       'The system prompt that writes each hero (name, about, quote and voice design). ' +
       'Edit it to change their style; Save uses it on the next “Recruit hero”.', mountPersonaPromptBar)
   }
   new MutationObserver(tryInject).observe(document.body, { childList: true, subtree: true })
   tryInject()

 import { mountImagenBar } from '/web/imagenBar.js'
 import { mountPersonaPromptBar } from '/web/personaPromptBar.js'
 import { mountQualityBar } from '/web/qualityBar.js'
+import { mountCodingModelBar } from '/web/codingModelBar.js'
 function el(tag, props = {}, kids = []) {
   const n = document.createElement(tag)
     injectSection(sample, 'tac-persona-prompt-settings', 'Persona Prompt',
       'The system prompt that writes each hero (name, about, quote and voice design). ' +
       'Edit it to change their style; Save uses it on the next “Recruit hero”.', mountPersonaPromptBar)
+    injectSection(sample, 'tac-coding-model-settings', 'Coding Model',
+      'The model that powers the Skill Forge — it writes a skill for a chosen hero. ' +
+      'Nemotron 3 Nano (NVIDIA) runs via NVIDIA NIM; Mellum2 (JetBrains) runs as a ' +
+      'ZeroGPU sidecar.', mountCodingModelBar)
   }
   new MutationObserver(tryInject).observe(document.body, { childList: true, subtree: true })
   tryInject()

web/shell/nav.json CHANGED Viewed

@@ -12,6 +12,7 @@
       "title": "Sandbox",
       "items": [
         { "label": "Sprite Animations", "icon": "🎞", "href": "#/sandbox/sprite-animations", "view": "sandbox", "page": "movement", "space": "Sprite Animations" },
         { "label": "Classes", "href": "#/sandbox/classes", "view": "sandbox", "page": "classes" },
         { "label": "Enemies", "href": "#/sandbox/enemies", "view": "sandbox", "page": "enemies" },
         { "label": "Levels", "href": "#/sandbox/levels", "view": "sandbox", "page": "levels" },

       "title": "Sandbox",
       "items": [
         { "label": "Sprite Animations", "icon": "🎞", "href": "#/sandbox/sprite-animations", "view": "sandbox", "page": "movement", "space": "Sprite Animations" },
+        { "label": "Skill Forge", "icon": "⚒", "href": "#/sandbox/skill-forge", "view": "sandbox", "page": "skill-forge", "space": "Skill Forge" },
         { "label": "Classes", "href": "#/sandbox/classes", "view": "sandbox", "page": "classes" },
         { "label": "Enemies", "href": "#/sandbox/enemies", "view": "sandbox", "page": "enemies" },
         { "label": "Levels", "href": "#/sandbox/levels", "view": "sandbox", "page": "levels" },

web/skillForgePanel.js ADDED Viewed

	@@ -0,0 +1,106 @@

+// Skill Forge — the Sandbox surface that uses the Coding Model to author a combat
+// skill for a chosen hero. Pick a recruited persona, describe the skill you want, and
+// the coding model (Nemotron via NIM, or Mellum2 via ZeroGPU — see Settings → Coding
+// Model) writes a self-contained skill definition tailored to that hero. For now it
+// just generates and shows the skill; wiring it into the battle engine comes later.
+import { streamCoding, currentCodingModel, onCodingModelChange } from '/web/codingModel.js'
+import { listPersonas, getPersona, onRosterChange } from '/web/personaStore.js'
+function el(tag, props = {}, kids = []) {
+  const n = document.createElement(tag)
+  for (const [k, v] of Object.entries(props)) {
+    if (k === 'class') n.className = v
+    else if (k.startsWith('on') && typeof v === 'function') n.addEventListener(k.slice(2), v)
+    else if (v != null) n.setAttribute(k, v)
+  }
+  for (const kid of [].concat(kids)) if (kid != null) n.append(kid)
+  return n
+}
+const SYSTEM = [
+  'You are the Skill Forge for a fantasy auto-battler. You author ONE combat skill for a',
+  'specific hero, tailored to their class, story and personality. Respond with exactly:',
+  'a one-line skill name, a one-sentence flavour description, then a fenced ```json block',
+  'with fields {name, type:"active"|"passive", target:"self"|"ally"|"enemy"|"area",',
+  'cooldown:<int seconds>, effect:"<short mechanical description>"}.',
+  'Keep it concise and balanced. Output only the skill — no preamble, no commentary.',
+].join(' ')
+function personaBlock(p) {
+  if (!p) return ''
+  return [
+    `Hero: ${p.name || 'Unnamed'}${p.unitClass ? ` — ${p.unitClass}` : ''}`,
+    p.about ? `About: ${p.about}` : '',
+    p.personality ? `Personality: ${p.personality}` : '',
+    p.specialty ? `Specialty: ${p.specialty}` : '',
+  ].filter(Boolean).join('\n')
+}
+export function mountSkillForgePanel(host) {
+  const sel = el('select', { class: 'persona-input skillforge-hero' })
+  const req = el('textarea', { class: 'persona-prompt-edit skillforge-req', rows: 4,
+    placeholder: 'what skill should this hero learn? (e.g. “a defensive shout that shields nearby allies”)' })
+  const btn = el('button', { class: 'persona-go', type: 'button' }, '⚒ Forge skill')
+  const status = el('div', { class: 'persona-status' })
+  const out = el('pre', { class: 'persona-think skillforge-out' })
+  const empty = el('div', { class: 'persona-roster-empty' },
+    'No heroes yet — recruit one in the Personas tab, then come back to forge its skills.')
+  const controls = el('aside', { class: 'persona-controls skillforge' }, [
+    el('div', { class: 'persona-sec' }, [el('div', { class: 'persona-sec-title' }, 'Skill Forge'), el('span')]),
+    el('label', { class: 'persona-label' }, 'Hero'), sel,
+    empty,
+    el('label', { class: 'persona-label' }, 'Skill request'), req,
+    el('div', { class: 'persona-prompt-actions' }, [btn]),
+    status,
+    el('label', { class: 'persona-label' }, 'Forged skill'), out,
+  ])
+  host.append(controls)
+  function refreshHeroes() {
+    const people = listPersonas()
+    const prev = sel.value
+    sel.replaceChildren(...people.map((p) => el('option', { value: p.id }, p.name || 'Unnamed hero')))
+    if (people.some((p) => p.id === prev)) sel.value = prev
+    const none = people.length === 0
+    empty.style.display = none ? '' : 'none'
+    sel.style.display = none ? 'none' : ''
+    btn.disabled = none
+  }
+  function refreshStatus() {
+    if (!status.dataset.busy) status.textContent = `Coding model: ${currentCodingModel().label}`
+  }
+  let running = false
+  async function forge() {
+    if (running) return
+    const p = getPersona(sel.value)
+    if (!p) { status.textContent = 'Pick a hero first.'; return }
+    const ask = req.value.trim()
+    if (!ask) { status.textContent = 'Describe the skill you want.'; return }
+    running = true; status.dataset.busy = '1'; btn.disabled = true
+    out.textContent = ''
+    status.textContent = `Forging with ${currentCodingModel().label}…`
+    const user = `${personaBlock(p)}\n\nSkill to create: ${ask}`
+    try {
+      const { stats } = await streamCoding(SYSTEM, user, {
+        maxTokens: 512,
+        temperature: 0.6,
+        onToken: (t) => { out.textContent += t },
+      })
+      const tps = stats && stats.tokPerSec ? ` · ${stats.tokPerSec} tok/s` : ''
+      status.textContent = `Done${tps}.`
+    } catch (e) {
+      status.textContent = 'Forge failed: ' + (e && e.message ? e.message : e)
+    } finally {
+      running = false; delete status.dataset.busy; btn.disabled = listPersonas().length === 0
+    }
+  }
+  btn.addEventListener('click', forge)
+  onRosterChange(refreshHeroes)
+  onCodingModelChange(refreshStatus)
+  refreshHeroes(); refreshStatus()
+  return { refresh: refreshHeroes }
+}

web/sseText.js ADDED Viewed

	@@ -0,0 +1,38 @@

+// Shared SSE reader for the server text-generation endpoint (/text/generate/stream).
+// Parses the `event:`/`data:` wire format and dispatches each event to onEvent.
+// Used by both engineServer.js (the model-bar "Server / ZeroGPU" engine) and
+// codingModel.js (the Skill Forge coding-model picker) so the parser lives once.
+export async function streamSse(url, body, { onEvent, signal } = {}) {
+  const res = await fetch(url, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(body),
+    signal,
+  })
+  if (!res.ok || !res.body) throw new Error(`HTTP ${res.status}`)
+  const reader = res.body.getReader()
+  const decoder = new TextDecoder()
+  let buf = ''
+  while (true) {
+    const { value, done } = await reader.read()
+    if (done) break
+    buf += decoder.decode(value, { stream: true })
+    const events = buf.split(/\n\n/)
+    buf = events.pop() ?? ''
+    for (const evChunk of events) {
+      const lines = evChunk.split('\n')
+      let evt = 'message'
+      const dataLines = []
+      for (const line of lines) {
+        if (line.startsWith('event:')) evt = line.slice(6).trim()
+        else if (line.startsWith('data:')) dataLines.push(line.slice(5).trimStart())
+      }
+      const data = dataLines.join('\n')
+      if (!data) continue
+      let parsed = null
+      try { parsed = JSON.parse(data) } catch { /* ignore */ }
+      if (evt === 'error') throw new Error(parsed?.error || data)
+      onEvent?.(evt, parsed, data)
+    }
+  }
+}

web/tiny.js CHANGED Viewed

@@ -10,6 +10,7 @@ import { mountSpritePlayground } from '/web/playground.js'
 import { mountPersonaPanel } from '/web/personaPanel.js'
 import { mountDiaryPanel } from '/web/diaryPanel.js'
 import { mountSettingsPanel } from '/web/settingsPanel.js'
 function whenEl(id, cb) {
   const found = document.getElementById(id)
@@ -58,6 +59,7 @@ whenEl('sprite-stage', async (el) => {
 // ── Personas + War Diary tabs — in-browser llama.cpp (wllama), runs on the device ──
 whenEl('persona-stage', (el) => { mountPersonaPanel(el) })
 whenEl('diary-stage', (el) => { mountDiaryPanel(el) })
 // Engine + model + voice pickers are injected into Gradio's own Settings page (footer
 // link / sidebar ⚙), shared across pages via the runtime.js + tts.js singletons.
 mountSettingsPanel()

 import { mountPersonaPanel } from '/web/personaPanel.js'
 import { mountDiaryPanel } from '/web/diaryPanel.js'
 import { mountSettingsPanel } from '/web/settingsPanel.js'
+import { mountSkillForgePanel } from '/web/skillForgePanel.js'
 function whenEl(id, cb) {
   const found = document.getElementById(id)
 // ── Personas + War Diary tabs — in-browser llama.cpp (wllama), runs on the device ──
 whenEl('persona-stage', (el) => { mountPersonaPanel(el) })
 whenEl('diary-stage', (el) => { mountDiaryPanel(el) })
+whenEl('skillforge-stage', (el) => { mountSkillForgePanel(el) })
 // Engine + model + voice pickers are injected into Gradio's own Settings page (footer
 // link / sidebar ⚙), shared across pages via the runtime.js + tts.js singletons.
 mountSettingsPanel()