polats Claude Opus 4.8 (1M context) commited on
Commit
1f1908e
·
1 Parent(s): de78f87

Add Coding Model setting + Skill Forge sandbox

Browse files

New "Coding Model" settings section (below Persona Prompt) selecting between
two backends:
• Nemotron 3 Nano 30B-A3B via NVIDIA NIM (integrate.api.nvidia.com,
reasoning off) — too large to self-host on ZeroGPU.
• Mellum2 12B-A2.5B via a ZeroGPU sidecar (polats/tiny-army-mellum-zerogpu),
same /generate(+_stream) contract as the Aya/MiniCPM5 sidecars.

New "Skill Forge" tab (Sandbox group, after Sprite Animations): pick a
recruited hero, describe a skill, and the chosen coding model authors a
tailored skill definition. Reuses personaStore (roster) + streamCoding.

Backend: app.py routes mellum2-zerogpu and nemotron-3-nano-30b-nim model ids
in /text/generate/stream; _nim_text_stream() streams NIM's OpenAI-compatible
SSE. Frontend: codingModel.js store (separate from the persona/diary model),
codingModelBar.js picker, shared sseText.js SSE reader (extracted from
engineServer.js).

Needs Space config: TINY_MELLUM_SPACE + NVIDIA_NIM_API_KEY.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

.gitignore CHANGED
@@ -1,3 +1,4 @@
 
1
  __pycache__/
2
  *.pyc
3
  .venv/
 
1
+ .env
2
  __pycache__/
3
  *.pyc
4
  .venv/
app.py CHANGED
@@ -203,6 +203,10 @@ with gr.Blocks(title="Tiny Army") as ui:
203
  # div. No dark box here: the picker is the sidebar, the canvas is the
204
  # stage (framed by CSS), so it mirrors auto-battler's layout.
205
  gr.HTML('<div id="sprite-stage" style="overflow:hidden"></div>')
 
 
 
 
206
  # Pixi canvases start hidden (0×0); re-measure them when a tab is shown.
207
  battle_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
208
  sprite_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
@@ -265,6 +269,9 @@ TTS_MODE = os.environ.get("TINY_TTS_MODE", "").strip().lower()
265
  VOXCPM_SPACE = os.environ.get("TINY_VOXCPM_SPACE", "").strip()
266
  TINY_AYA_SPACE = os.environ.get("TINY_AYA_SPACE", "").strip()
267
  MINICPM5_SPACE = os.environ.get("TINY_MINICPM5_SPACE", "").strip()
 
 
 
268
  _local_tts = None # VoiceDesign model
269
  _local_clone = None # Base model (voice clone) — lazy, only if a clone is requested
270
  _local_tts_lock = threading.Lock()
@@ -513,6 +520,47 @@ def _minicpm5_stream(system, user, max_tokens, temperature):
513
  yield from _space_text_stream(MINICPM5_SPACE, system, user, max_tokens, temperature)
514
 
515
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
516
  @fastapi_app.post("/voxcpm-tts")
517
  async def voxcpm_tts(request: Request):
518
  body = await request.json()
@@ -557,6 +605,10 @@ IMAGE_MODE = os.environ.get("TINY_IMAGE_MODE", "").strip().lower()
557
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
558
  NIM_KEY = os.environ.get("NVIDIA_NIM_API_KEY", "")
559
  _NIM_BASE = "https://ai.api.nvidia.com/v1/genai"
 
 
 
 
560
  # id -> NIM FLUX preset (same shapes woid uses: schnell fast, dev higher quality).
561
  _NIM_PROVIDERS = {
562
  "flux-schnell": {"model": "black-forest-labs/flux.1-schnell", "steps": 4, "cfg": 0.0},
@@ -819,6 +871,20 @@ async def text_generate_stream(request: Request):
819
  if stop.is_set():
820
  break
821
  loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
822
  else:
823
  for chunk in llm.stream_chat(
824
  system,
 
203
  # div. No dark box here: the picker is the sidebar, the canvas is the
204
  # stage (framed by CSS), so it mirrors auto-battler's layout.
205
  gr.HTML('<div id="sprite-stage" style="overflow:hidden"></div>')
206
+ with gr.Tab("Skill Forge"):
207
+ # Sandbox: the Coding Model (Settings → Coding Model) authors a combat skill
208
+ # for a chosen hero. Filled by web/skillForgePanel.js.
209
+ gr.HTML('<div id="skillforge-stage" style="overflow:hidden"></div>')
210
  # Pixi canvases start hidden (0×0); re-measure them when a tab is shown.
211
  battle_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
212
  sprite_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
 
269
  VOXCPM_SPACE = os.environ.get("TINY_VOXCPM_SPACE", "").strip()
270
  TINY_AYA_SPACE = os.environ.get("TINY_AYA_SPACE", "").strip()
271
  MINICPM5_SPACE = os.environ.get("TINY_MINICPM5_SPACE", "").strip()
272
+ # Coding model (Skill Forge): Mellum2 is a ZeroGPU sidecar (same /generate contract as
273
+ # Aya); Nemotron-30B is too big to self-host, so it runs via hosted NVIDIA NIM (below).
274
+ MELLUM_SPACE = os.environ.get("TINY_MELLUM_SPACE", "").strip()
275
  _local_tts = None # VoiceDesign model
276
  _local_clone = None # Base model (voice clone) — lazy, only if a clone is requested
277
  _local_tts_lock = threading.Lock()
 
520
  yield from _space_text_stream(MINICPM5_SPACE, system, user, max_tokens, temperature)
521
 
522
 
523
+ def _mellum_stream(system, user, max_tokens, temperature):
524
+ yield from _space_text_stream(MELLUM_SPACE, system, user, max_tokens, temperature)
525
+
526
+
527
+ def _nim_text_stream(system, user, max_tokens, temperature, model=None):
528
+ """Stream from NVIDIA NIM's OpenAI-compatible chat endpoint (hosted Nemotron). Same
529
+ nvapi-… key as the portrait NIM. reasoning_budget=0 keeps the coding output clean
530
+ (Nemotron defaults thinking ON, which would otherwise emit a <think> trace)."""
531
+ model = model or _NIM_NEMOTRON_MODEL # defined later in the file; resolve at call time
532
+ messages = []
533
+ if system and system.strip():
534
+ messages.append({"role": "system", "content": system.strip()})
535
+ messages.append({"role": "user", "content": (user or "").strip()})
536
+ body = _json.dumps({
537
+ "model": model,
538
+ "messages": messages,
539
+ "max_tokens": int(max_tokens or 512),
540
+ "temperature": float(temperature if temperature is not None else 0.6),
541
+ "top_p": 0.95,
542
+ "stream": True,
543
+ "reasoning_budget": 0,
544
+ }).encode()
545
+ req = urllib.request.Request(_NIM_TEXT_URL, data=body, method="POST", headers={
546
+ "Authorization": f"Bearer {NIM_KEY}", "Content-Type": "application/json", "Accept": "text/event-stream",
547
+ })
548
+ with urllib.request.urlopen(req, timeout=120) as resp:
549
+ for raw in resp:
550
+ line = raw.decode("utf-8").strip()
551
+ if not line.startswith("data:"):
552
+ continue
553
+ data = line[5:].strip()
554
+ if data == "[DONE]":
555
+ break
556
+ try:
557
+ delta = _json.loads(data)["choices"][0]["delta"].get("content")
558
+ except Exception: # noqa: BLE001
559
+ continue
560
+ if delta:
561
+ yield delta
562
+
563
+
564
  @fastapi_app.post("/voxcpm-tts")
565
  async def voxcpm_tts(request: Request):
566
  body = await request.json()
 
605
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
606
  NIM_KEY = os.environ.get("NVIDIA_NIM_API_KEY", "")
607
  _NIM_BASE = "https://ai.api.nvidia.com/v1/genai"
608
+ # NIM text/LLM lives on a DIFFERENT host (OpenAI-compatible chat completions) than the
609
+ # image/genai host above, but uses the same nvapi-… key. Powers the Nemotron coding model.
610
+ _NIM_TEXT_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
611
+ _NIM_NEMOTRON_MODEL = os.environ.get("TINY_NEMOTRON_NIM_MODEL", "nvidia/nemotron-3-nano-30b-a3b")
612
  # id -> NIM FLUX preset (same shapes woid uses: schnell fast, dev higher quality).
613
  _NIM_PROVIDERS = {
614
  "flux-schnell": {"model": "black-forest-labs/flux.1-schnell", "steps": 4, "cfg": 0.0},
 
871
  if stop.is_set():
872
  break
873
  loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
874
+ elif model == "mellum2-zerogpu":
875
+ if not MELLUM_SPACE:
876
+ raise llm.LlmUnavailable("TINY_MELLUM_SPACE not set")
877
+ for chunk in _mellum_stream(system, user, max_tokens, temperature):
878
+ if stop.is_set():
879
+ break
880
+ loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
881
+ elif model == "nemotron-3-nano-30b-nim":
882
+ if not NIM_KEY:
883
+ raise llm.LlmUnavailable("NVIDIA_NIM_API_KEY not set")
884
+ for chunk in _nim_text_stream(system, user, max_tokens, temperature):
885
+ if stop.is_set():
886
+ break
887
+ loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
888
  else:
889
  for chunk in llm.stream_chat(
890
  system,
web/codingModel.js ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Coding-model store for the Skill Forge. SEPARATE from runtime.js (the persona/diary
2
+ // "Text Generation Model") so picking a coding model never clobbers the writer model.
3
+ // Both candidates are large (Mellum2 ~8GB, Nemotron-30B ~24GB) with no browser-viable
4
+ // build, so this is ZeroGPU-only: every choice routes through the same server endpoint
5
+ // (/text/generate/stream) the `server` engine uses, by model id. Mellum2 is a ZeroGPU
6
+ // sidecar (TINY_MELLUM_SPACE); Nemotron-30B routes through hosted NVIDIA NIM
7
+ // (NVIDIA_NIM_API_KEY) since it's too big to self-host.
8
+ import { statsTracker } from '/web/genStats.js'
9
+ import { streamSse } from '/web/sseText.js'
10
+
11
+ const MODELS = [
12
+ { id: 'nemotron-3-nano-30b-nim', label: 'Nemotron 3 Nano 30B-A3B', params: '30B (3B active)', backend: 'NVIDIA NIM', note: 'reasoning + agentic code (NVIDIA)' },
13
+ { id: 'mellum2-zerogpu', label: 'Mellum2 12B-A2.5B', params: '12B (2.5B active)', backend: 'ZeroGPU sidecar', note: 'code model (JetBrains)' },
14
+ ]
15
+ const DEFAULT = 'nemotron-3-nano-30b-nim'
16
+ const KEY = 'tinyarmy.codingModel'
17
+
18
+ const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
19
+ const loadStr = (k) => { try { return localStorage.getItem(k) || '' } catch { return '' } }
20
+
21
+ let _sel = (() => { const s = loadStr(KEY); return MODELS.some((m) => m.id === s) ? s : DEFAULT })()
22
+
23
+ const _listeners = new Set()
24
+ export function onCodingModelChange(fn) { _listeners.add(fn); return () => _listeners.delete(fn) }
25
+ const _notify = () => { for (const fn of _listeners) { try { fn() } catch { /* ignore */ } } }
26
+
27
+ export const listCodingModels = () => MODELS
28
+ export const getCodingModelId = () => _sel
29
+ export const currentCodingModel = () => get(_sel)
30
+ export function setCodingModel(id) {
31
+ if (!MODELS.some((m) => m.id === id) || id === _sel) return
32
+ _sel = id
33
+ try { localStorage.setItem(KEY, id) } catch { /* ignore */ }
34
+ _notify()
35
+ }
36
+
37
+ // Stream a coding-model completion. Same delta protocol as engineServer.stream.
38
+ export async function streamCoding(system, user, { maxTokens = 512, temperature = 0.6, onToken, onStats, signal } = {}) {
39
+ const st = statsTracker(onStats)
40
+ let full = ''
41
+ await streamSse('/text/generate/stream', {
42
+ model: _sel,
43
+ system,
44
+ user,
45
+ max_tokens: maxTokens,
46
+ temperature,
47
+ }, {
48
+ signal,
49
+ onEvent(evt, parsed) {
50
+ if (evt !== 'delta') return
51
+ const piece = parsed?.content || ''
52
+ if (!piece) return
53
+ full += piece
54
+ onToken?.(piece)
55
+ st.tick()
56
+ },
57
+ })
58
+ return { text: full, stats: st.finish() }
59
+ }
web/codingModelBar.js ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Coding-model picker for the Settings page. A trimmed modelBar: no engine selector and
2
+ // no cache/delete controls (these are ZeroGPU sidecars with no browser download). Drives
3
+ // the shared codingModel.js store, so the Skill Forge uses whatever is chosen here.
4
+ import {
5
+ listCodingModels, getCodingModelId, setCodingModel, currentCodingModel, onCodingModelChange,
6
+ } from '/web/codingModel.js'
7
+
8
+ function el(tag, props = {}, kids = []) {
9
+ const n = document.createElement(tag)
10
+ for (const [k, v] of Object.entries(props)) {
11
+ if (k === 'class') n.className = v
12
+ else if (k.startsWith('on') && typeof v === 'function') n.addEventListener(k.slice(2), v)
13
+ else if (v != null) n.setAttribute(k, v)
14
+ }
15
+ for (const kid of [].concat(kids)) if (kid != null) n.append(kid)
16
+ return n
17
+ }
18
+
19
+ export function mountCodingModelBar(host, { onChange } = {}) {
20
+ const sel = el('select', { class: 'model-select' })
21
+ const info = el('div', { class: 'model-info' })
22
+ host.append(el('div', { class: 'model-bar' }, [
23
+ el('label', { class: 'persona-label' }, 'Model'),
24
+ sel,
25
+ el('div', { class: 'model-row' }, [info]),
26
+ ]))
27
+
28
+ function render() {
29
+ sel.replaceChildren(...listCodingModels().map((m) =>
30
+ el('option', { value: m.id }, `${m.label}${m.params ? ` · ${m.params}` : ''}`)))
31
+ sel.value = getCodingModelId()
32
+ const m = currentCodingModel()
33
+ info.textContent = [m.params, m.backend, m.note].filter(Boolean).join(' · ')
34
+ }
35
+
36
+ sel.addEventListener('change', () => { setCodingModel(sel.value); render(); onChange && onChange(sel.value) })
37
+ onCodingModelChange(render)
38
+ render()
39
+ return { refresh: render }
40
+ }
web/engineServer.js CHANGED
@@ -2,6 +2,7 @@
2
  // lets the same picker choose either a configured local llama.cpp server or a ZeroGPU
3
  // hosted model such as Tiny Aya Global.
4
  import { statsTracker } from '/web/genStats.js'
 
5
 
6
  const MODELS = [
7
  { id: 'server-local', label: 'Configured server model', params: 'local/remote', note: 'uses TINY_LLM_* on the Space or local app' },
@@ -10,46 +11,11 @@ const MODELS = [
10
  ]
11
  const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
12
 
13
- async function streamSse(body, { onEvent, signal } = {}) {
14
- const res = await fetch('/text/generate/stream', {
15
- method: 'POST',
16
- headers: { 'Content-Type': 'application/json' },
17
- body: JSON.stringify(body),
18
- signal,
19
- })
20
- if (!res.ok || !res.body) throw new Error(`HTTP ${res.status}`)
21
- const reader = res.body.getReader()
22
- const decoder = new TextDecoder()
23
- let buf = ''
24
- while (true) {
25
- const { value, done } = await reader.read()
26
- if (done) break
27
- buf += decoder.decode(value, { stream: true })
28
- const events = buf.split(/\n\n/)
29
- buf = events.pop() ?? ''
30
- for (const evChunk of events) {
31
- const lines = evChunk.split('\n')
32
- let evt = 'message'
33
- const dataLines = []
34
- for (const line of lines) {
35
- if (line.startsWith('event:')) evt = line.slice(6).trim()
36
- else if (line.startsWith('data:')) dataLines.push(line.slice(5).trimStart())
37
- }
38
- const data = dataLines.join('\n')
39
- if (!data) continue
40
- let parsed = null
41
- try { parsed = JSON.parse(data) } catch { /* ignore */ }
42
- if (evt === 'error') throw new Error(parsed?.error || data)
43
- onEvent?.(evt, parsed, data)
44
- }
45
- }
46
- }
47
-
48
  async function stream(id, system, user, { maxTokens = 200, temperature = 0.8, onToken, onStats, signal } = {}) {
49
  const m = get(id)
50
  const st = statsTracker(onStats)
51
  let full = ''
52
- await streamSse({
53
  model: m.id,
54
  system,
55
  user,
 
2
  // lets the same picker choose either a configured local llama.cpp server or a ZeroGPU
3
  // hosted model such as Tiny Aya Global.
4
  import { statsTracker } from '/web/genStats.js'
5
+ import { streamSse } from '/web/sseText.js'
6
 
7
  const MODELS = [
8
  { id: 'server-local', label: 'Configured server model', params: 'local/remote', note: 'uses TINY_LLM_* on the Space or local app' },
 
11
  ]
12
  const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  async function stream(id, system, user, { maxTokens = 200, temperature = 0.8, onToken, onStats, signal } = {}) {
15
  const m = get(id)
16
  const st = statsTracker(onStats)
17
  let full = ''
18
+ await streamSse('/text/generate/stream', {
19
  model: m.id,
20
  system,
21
  user,
web/settingsPanel.js CHANGED
@@ -11,6 +11,7 @@ import { mountTtsBar } from '/web/ttsBar.js'
11
  import { mountImagenBar } from '/web/imagenBar.js'
12
  import { mountPersonaPromptBar } from '/web/personaPromptBar.js'
13
  import { mountQualityBar } from '/web/qualityBar.js'
 
14
 
15
  function el(tag, props = {}, kids = []) {
16
  const n = document.createElement(tag)
@@ -58,6 +59,10 @@ export function mountSettingsPanel() {
58
  injectSection(sample, 'tac-persona-prompt-settings', 'Persona Prompt',
59
  'The system prompt that writes each hero (name, about, quote and voice design). ' +
60
  'Edit it to change their style; Save uses it on the next “Recruit hero”.', mountPersonaPromptBar)
 
 
 
 
61
  }
62
  new MutationObserver(tryInject).observe(document.body, { childList: true, subtree: true })
63
  tryInject()
 
11
  import { mountImagenBar } from '/web/imagenBar.js'
12
  import { mountPersonaPromptBar } from '/web/personaPromptBar.js'
13
  import { mountQualityBar } from '/web/qualityBar.js'
14
+ import { mountCodingModelBar } from '/web/codingModelBar.js'
15
 
16
  function el(tag, props = {}, kids = []) {
17
  const n = document.createElement(tag)
 
59
  injectSection(sample, 'tac-persona-prompt-settings', 'Persona Prompt',
60
  'The system prompt that writes each hero (name, about, quote and voice design). ' +
61
  'Edit it to change their style; Save uses it on the next “Recruit hero”.', mountPersonaPromptBar)
62
+ injectSection(sample, 'tac-coding-model-settings', 'Coding Model',
63
+ 'The model that powers the Skill Forge — it writes a skill for a chosen hero. ' +
64
+ 'Nemotron 3 Nano (NVIDIA) runs via NVIDIA NIM; Mellum2 (JetBrains) runs as a ' +
65
+ 'ZeroGPU sidecar.', mountCodingModelBar)
66
  }
67
  new MutationObserver(tryInject).observe(document.body, { childList: true, subtree: true })
68
  tryInject()
web/shell/nav.json CHANGED
@@ -12,6 +12,7 @@
12
  "title": "Sandbox",
13
  "items": [
14
  { "label": "Sprite Animations", "icon": "🎞", "href": "#/sandbox/sprite-animations", "view": "sandbox", "page": "movement", "space": "Sprite Animations" },
 
15
  { "label": "Classes", "href": "#/sandbox/classes", "view": "sandbox", "page": "classes" },
16
  { "label": "Enemies", "href": "#/sandbox/enemies", "view": "sandbox", "page": "enemies" },
17
  { "label": "Levels", "href": "#/sandbox/levels", "view": "sandbox", "page": "levels" },
 
12
  "title": "Sandbox",
13
  "items": [
14
  { "label": "Sprite Animations", "icon": "🎞", "href": "#/sandbox/sprite-animations", "view": "sandbox", "page": "movement", "space": "Sprite Animations" },
15
+ { "label": "Skill Forge", "icon": "⚒", "href": "#/sandbox/skill-forge", "view": "sandbox", "page": "skill-forge", "space": "Skill Forge" },
16
  { "label": "Classes", "href": "#/sandbox/classes", "view": "sandbox", "page": "classes" },
17
  { "label": "Enemies", "href": "#/sandbox/enemies", "view": "sandbox", "page": "enemies" },
18
  { "label": "Levels", "href": "#/sandbox/levels", "view": "sandbox", "page": "levels" },
web/skillForgePanel.js ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Skill Forge — the Sandbox surface that uses the Coding Model to author a combat
2
+ // skill for a chosen hero. Pick a recruited persona, describe the skill you want, and
3
+ // the coding model (Nemotron via NIM, or Mellum2 via ZeroGPU — see Settings → Coding
4
+ // Model) writes a self-contained skill definition tailored to that hero. For now it
5
+ // just generates and shows the skill; wiring it into the battle engine comes later.
6
+ import { streamCoding, currentCodingModel, onCodingModelChange } from '/web/codingModel.js'
7
+ import { listPersonas, getPersona, onRosterChange } from '/web/personaStore.js'
8
+
9
+ function el(tag, props = {}, kids = []) {
10
+ const n = document.createElement(tag)
11
+ for (const [k, v] of Object.entries(props)) {
12
+ if (k === 'class') n.className = v
13
+ else if (k.startsWith('on') && typeof v === 'function') n.addEventListener(k.slice(2), v)
14
+ else if (v != null) n.setAttribute(k, v)
15
+ }
16
+ for (const kid of [].concat(kids)) if (kid != null) n.append(kid)
17
+ return n
18
+ }
19
+
20
+ const SYSTEM = [
21
+ 'You are the Skill Forge for a fantasy auto-battler. You author ONE combat skill for a',
22
+ 'specific hero, tailored to their class, story and personality. Respond with exactly:',
23
+ 'a one-line skill name, a one-sentence flavour description, then a fenced ```json block',
24
+ 'with fields {name, type:"active"|"passive", target:"self"|"ally"|"enemy"|"area",',
25
+ 'cooldown:<int seconds>, effect:"<short mechanical description>"}.',
26
+ 'Keep it concise and balanced. Output only the skill — no preamble, no commentary.',
27
+ ].join(' ')
28
+
29
+ function personaBlock(p) {
30
+ if (!p) return ''
31
+ return [
32
+ `Hero: ${p.name || 'Unnamed'}${p.unitClass ? ` — ${p.unitClass}` : ''}`,
33
+ p.about ? `About: ${p.about}` : '',
34
+ p.personality ? `Personality: ${p.personality}` : '',
35
+ p.specialty ? `Specialty: ${p.specialty}` : '',
36
+ ].filter(Boolean).join('\n')
37
+ }
38
+
39
+ export function mountSkillForgePanel(host) {
40
+ const sel = el('select', { class: 'persona-input skillforge-hero' })
41
+ const req = el('textarea', { class: 'persona-prompt-edit skillforge-req', rows: 4,
42
+ placeholder: 'what skill should this hero learn? (e.g. “a defensive shout that shields nearby allies”)' })
43
+ const btn = el('button', { class: 'persona-go', type: 'button' }, '⚒ Forge skill')
44
+ const status = el('div', { class: 'persona-status' })
45
+ const out = el('pre', { class: 'persona-think skillforge-out' })
46
+ const empty = el('div', { class: 'persona-roster-empty' },
47
+ 'No heroes yet — recruit one in the Personas tab, then come back to forge its skills.')
48
+
49
+ const controls = el('aside', { class: 'persona-controls skillforge' }, [
50
+ el('div', { class: 'persona-sec' }, [el('div', { class: 'persona-sec-title' }, 'Skill Forge'), el('span')]),
51
+ el('label', { class: 'persona-label' }, 'Hero'), sel,
52
+ empty,
53
+ el('label', { class: 'persona-label' }, 'Skill request'), req,
54
+ el('div', { class: 'persona-prompt-actions' }, [btn]),
55
+ status,
56
+ el('label', { class: 'persona-label' }, 'Forged skill'), out,
57
+ ])
58
+ host.append(controls)
59
+
60
+ function refreshHeroes() {
61
+ const people = listPersonas()
62
+ const prev = sel.value
63
+ sel.replaceChildren(...people.map((p) => el('option', { value: p.id }, p.name || 'Unnamed hero')))
64
+ if (people.some((p) => p.id === prev)) sel.value = prev
65
+ const none = people.length === 0
66
+ empty.style.display = none ? '' : 'none'
67
+ sel.style.display = none ? 'none' : ''
68
+ btn.disabled = none
69
+ }
70
+
71
+ function refreshStatus() {
72
+ if (!status.dataset.busy) status.textContent = `Coding model: ${currentCodingModel().label}`
73
+ }
74
+
75
+ let running = false
76
+ async function forge() {
77
+ if (running) return
78
+ const p = getPersona(sel.value)
79
+ if (!p) { status.textContent = 'Pick a hero first.'; return }
80
+ const ask = req.value.trim()
81
+ if (!ask) { status.textContent = 'Describe the skill you want.'; return }
82
+ running = true; status.dataset.busy = '1'; btn.disabled = true
83
+ out.textContent = ''
84
+ status.textContent = `Forging with ${currentCodingModel().label}…`
85
+ const user = `${personaBlock(p)}\n\nSkill to create: ${ask}`
86
+ try {
87
+ const { stats } = await streamCoding(SYSTEM, user, {
88
+ maxTokens: 512,
89
+ temperature: 0.6,
90
+ onToken: (t) => { out.textContent += t },
91
+ })
92
+ const tps = stats && stats.tokPerSec ? ` · ${stats.tokPerSec} tok/s` : ''
93
+ status.textContent = `Done${tps}.`
94
+ } catch (e) {
95
+ status.textContent = 'Forge failed: ' + (e && e.message ? e.message : e)
96
+ } finally {
97
+ running = false; delete status.dataset.busy; btn.disabled = listPersonas().length === 0
98
+ }
99
+ }
100
+
101
+ btn.addEventListener('click', forge)
102
+ onRosterChange(refreshHeroes)
103
+ onCodingModelChange(refreshStatus)
104
+ refreshHeroes(); refreshStatus()
105
+ return { refresh: refreshHeroes }
106
+ }
web/sseText.js ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Shared SSE reader for the server text-generation endpoint (/text/generate/stream).
2
+ // Parses the `event:`/`data:` wire format and dispatches each event to onEvent.
3
+ // Used by both engineServer.js (the model-bar "Server / ZeroGPU" engine) and
4
+ // codingModel.js (the Skill Forge coding-model picker) so the parser lives once.
5
+ export async function streamSse(url, body, { onEvent, signal } = {}) {
6
+ const res = await fetch(url, {
7
+ method: 'POST',
8
+ headers: { 'Content-Type': 'application/json' },
9
+ body: JSON.stringify(body),
10
+ signal,
11
+ })
12
+ if (!res.ok || !res.body) throw new Error(`HTTP ${res.status}`)
13
+ const reader = res.body.getReader()
14
+ const decoder = new TextDecoder()
15
+ let buf = ''
16
+ while (true) {
17
+ const { value, done } = await reader.read()
18
+ if (done) break
19
+ buf += decoder.decode(value, { stream: true })
20
+ const events = buf.split(/\n\n/)
21
+ buf = events.pop() ?? ''
22
+ for (const evChunk of events) {
23
+ const lines = evChunk.split('\n')
24
+ let evt = 'message'
25
+ const dataLines = []
26
+ for (const line of lines) {
27
+ if (line.startsWith('event:')) evt = line.slice(6).trim()
28
+ else if (line.startsWith('data:')) dataLines.push(line.slice(5).trimStart())
29
+ }
30
+ const data = dataLines.join('\n')
31
+ if (!data) continue
32
+ let parsed = null
33
+ try { parsed = JSON.parse(data) } catch { /* ignore */ }
34
+ if (evt === 'error') throw new Error(parsed?.error || data)
35
+ onEvent?.(evt, parsed, data)
36
+ }
37
+ }
38
+ }
web/tiny.js CHANGED
@@ -10,6 +10,7 @@ import { mountSpritePlayground } from '/web/playground.js'
10
  import { mountPersonaPanel } from '/web/personaPanel.js'
11
  import { mountDiaryPanel } from '/web/diaryPanel.js'
12
  import { mountSettingsPanel } from '/web/settingsPanel.js'
 
13
 
14
  function whenEl(id, cb) {
15
  const found = document.getElementById(id)
@@ -58,6 +59,7 @@ whenEl('sprite-stage', async (el) => {
58
  // ── Personas + War Diary tabs — in-browser llama.cpp (wllama), runs on the device ──
59
  whenEl('persona-stage', (el) => { mountPersonaPanel(el) })
60
  whenEl('diary-stage', (el) => { mountDiaryPanel(el) })
 
61
  // Engine + model + voice pickers are injected into Gradio's own Settings page (footer
62
  // link / sidebar ⚙), shared across pages via the runtime.js + tts.js singletons.
63
  mountSettingsPanel()
 
10
  import { mountPersonaPanel } from '/web/personaPanel.js'
11
  import { mountDiaryPanel } from '/web/diaryPanel.js'
12
  import { mountSettingsPanel } from '/web/settingsPanel.js'
13
+ import { mountSkillForgePanel } from '/web/skillForgePanel.js'
14
 
15
  function whenEl(id, cb) {
16
  const found = document.getElementById(id)
 
59
  // ── Personas + War Diary tabs — in-browser llama.cpp (wllama), runs on the device ──
60
  whenEl('persona-stage', (el) => { mountPersonaPanel(el) })
61
  whenEl('diary-stage', (el) => { mountDiaryPanel(el) })
62
+ whenEl('skillforge-stage', (el) => { mountSkillForgePanel(el) })
63
  // Engine + model + voice pickers are injected into Gradio's own Settings page (footer
64
  // link / sidebar ⚙), shared across pages via the runtime.js + tts.js singletons.
65
  mountSettingsPanel()