Spaces:
Running
Add Coding Model setting + Skill Forge sandbox
Browse filesNew "Coding Model" settings section (below Persona Prompt) selecting between
two backends:
• Nemotron 3 Nano 30B-A3B via NVIDIA NIM (integrate.api.nvidia.com,
reasoning off) — too large to self-host on ZeroGPU.
• Mellum2 12B-A2.5B via a ZeroGPU sidecar (polats/tiny-army-mellum-zerogpu),
same /generate(+_stream) contract as the Aya/MiniCPM5 sidecars.
New "Skill Forge" tab (Sandbox group, after Sprite Animations): pick a
recruited hero, describe a skill, and the chosen coding model authors a
tailored skill definition. Reuses personaStore (roster) + streamCoding.
Backend: app.py routes mellum2-zerogpu and nemotron-3-nano-30b-nim model ids
in /text/generate/stream; _nim_text_stream() streams NIM's OpenAI-compatible
SSE. Frontend: codingModel.js store (separate from the persona/diary model),
codingModelBar.js picker, shared sseText.js SSE reader (extracted from
engineServer.js).
Needs Space config: TINY_MELLUM_SPACE + NVIDIA_NIM_API_KEY.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
- .gitignore +1 -0
- app.py +66 -0
- web/codingModel.js +59 -0
- web/codingModelBar.js +40 -0
- web/engineServer.js +2 -36
- web/settingsPanel.js +5 -0
- web/shell/nav.json +1 -0
- web/skillForgePanel.js +106 -0
- web/sseText.js +38 -0
- web/tiny.js +2 -0
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
__pycache__/
|
| 2 |
*.pyc
|
| 3 |
.venv/
|
|
|
|
| 1 |
+
.env
|
| 2 |
__pycache__/
|
| 3 |
*.pyc
|
| 4 |
.venv/
|
|
@@ -203,6 +203,10 @@ with gr.Blocks(title="Tiny Army") as ui:
|
|
| 203 |
# div. No dark box here: the picker is the sidebar, the canvas is the
|
| 204 |
# stage (framed by CSS), so it mirrors auto-battler's layout.
|
| 205 |
gr.HTML('<div id="sprite-stage" style="overflow:hidden"></div>')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
# Pixi canvases start hidden (0×0); re-measure them when a tab is shown.
|
| 207 |
battle_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
|
| 208 |
sprite_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
|
|
@@ -265,6 +269,9 @@ TTS_MODE = os.environ.get("TINY_TTS_MODE", "").strip().lower()
|
|
| 265 |
VOXCPM_SPACE = os.environ.get("TINY_VOXCPM_SPACE", "").strip()
|
| 266 |
TINY_AYA_SPACE = os.environ.get("TINY_AYA_SPACE", "").strip()
|
| 267 |
MINICPM5_SPACE = os.environ.get("TINY_MINICPM5_SPACE", "").strip()
|
|
|
|
|
|
|
|
|
|
| 268 |
_local_tts = None # VoiceDesign model
|
| 269 |
_local_clone = None # Base model (voice clone) — lazy, only if a clone is requested
|
| 270 |
_local_tts_lock = threading.Lock()
|
|
@@ -513,6 +520,47 @@ def _minicpm5_stream(system, user, max_tokens, temperature):
|
|
| 513 |
yield from _space_text_stream(MINICPM5_SPACE, system, user, max_tokens, temperature)
|
| 514 |
|
| 515 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 516 |
@fastapi_app.post("/voxcpm-tts")
|
| 517 |
async def voxcpm_tts(request: Request):
|
| 518 |
body = await request.json()
|
|
@@ -557,6 +605,10 @@ IMAGE_MODE = os.environ.get("TINY_IMAGE_MODE", "").strip().lower()
|
|
| 557 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 558 |
NIM_KEY = os.environ.get("NVIDIA_NIM_API_KEY", "")
|
| 559 |
_NIM_BASE = "https://ai.api.nvidia.com/v1/genai"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 560 |
# id -> NIM FLUX preset (same shapes woid uses: schnell fast, dev higher quality).
|
| 561 |
_NIM_PROVIDERS = {
|
| 562 |
"flux-schnell": {"model": "black-forest-labs/flux.1-schnell", "steps": 4, "cfg": 0.0},
|
|
@@ -819,6 +871,20 @@ async def text_generate_stream(request: Request):
|
|
| 819 |
if stop.is_set():
|
| 820 |
break
|
| 821 |
loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 822 |
else:
|
| 823 |
for chunk in llm.stream_chat(
|
| 824 |
system,
|
|
|
|
| 203 |
# div. No dark box here: the picker is the sidebar, the canvas is the
|
| 204 |
# stage (framed by CSS), so it mirrors auto-battler's layout.
|
| 205 |
gr.HTML('<div id="sprite-stage" style="overflow:hidden"></div>')
|
| 206 |
+
with gr.Tab("Skill Forge"):
|
| 207 |
+
# Sandbox: the Coding Model (Settings → Coding Model) authors a combat skill
|
| 208 |
+
# for a chosen hero. Filled by web/skillForgePanel.js.
|
| 209 |
+
gr.HTML('<div id="skillforge-stage" style="overflow:hidden"></div>')
|
| 210 |
# Pixi canvases start hidden (0×0); re-measure them when a tab is shown.
|
| 211 |
battle_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
|
| 212 |
sprite_tab.select(None, None, None, js="()=>window.tinyResize&&window.tinyResize()")
|
|
|
|
| 269 |
VOXCPM_SPACE = os.environ.get("TINY_VOXCPM_SPACE", "").strip()
|
| 270 |
TINY_AYA_SPACE = os.environ.get("TINY_AYA_SPACE", "").strip()
|
| 271 |
MINICPM5_SPACE = os.environ.get("TINY_MINICPM5_SPACE", "").strip()
|
| 272 |
+
# Coding model (Skill Forge): Mellum2 is a ZeroGPU sidecar (same /generate contract as
|
| 273 |
+
# Aya); Nemotron-30B is too big to self-host, so it runs via hosted NVIDIA NIM (below).
|
| 274 |
+
MELLUM_SPACE = os.environ.get("TINY_MELLUM_SPACE", "").strip()
|
| 275 |
_local_tts = None # VoiceDesign model
|
| 276 |
_local_clone = None # Base model (voice clone) — lazy, only if a clone is requested
|
| 277 |
_local_tts_lock = threading.Lock()
|
|
|
|
| 520 |
yield from _space_text_stream(MINICPM5_SPACE, system, user, max_tokens, temperature)
|
| 521 |
|
| 522 |
|
| 523 |
+
def _mellum_stream(system, user, max_tokens, temperature):
|
| 524 |
+
yield from _space_text_stream(MELLUM_SPACE, system, user, max_tokens, temperature)
|
| 525 |
+
|
| 526 |
+
|
| 527 |
+
def _nim_text_stream(system, user, max_tokens, temperature, model=None):
|
| 528 |
+
"""Stream from NVIDIA NIM's OpenAI-compatible chat endpoint (hosted Nemotron). Same
|
| 529 |
+
nvapi-… key as the portrait NIM. reasoning_budget=0 keeps the coding output clean
|
| 530 |
+
(Nemotron defaults thinking ON, which would otherwise emit a <think> trace)."""
|
| 531 |
+
model = model or _NIM_NEMOTRON_MODEL # defined later in the file; resolve at call time
|
| 532 |
+
messages = []
|
| 533 |
+
if system and system.strip():
|
| 534 |
+
messages.append({"role": "system", "content": system.strip()})
|
| 535 |
+
messages.append({"role": "user", "content": (user or "").strip()})
|
| 536 |
+
body = _json.dumps({
|
| 537 |
+
"model": model,
|
| 538 |
+
"messages": messages,
|
| 539 |
+
"max_tokens": int(max_tokens or 512),
|
| 540 |
+
"temperature": float(temperature if temperature is not None else 0.6),
|
| 541 |
+
"top_p": 0.95,
|
| 542 |
+
"stream": True,
|
| 543 |
+
"reasoning_budget": 0,
|
| 544 |
+
}).encode()
|
| 545 |
+
req = urllib.request.Request(_NIM_TEXT_URL, data=body, method="POST", headers={
|
| 546 |
+
"Authorization": f"Bearer {NIM_KEY}", "Content-Type": "application/json", "Accept": "text/event-stream",
|
| 547 |
+
})
|
| 548 |
+
with urllib.request.urlopen(req, timeout=120) as resp:
|
| 549 |
+
for raw in resp:
|
| 550 |
+
line = raw.decode("utf-8").strip()
|
| 551 |
+
if not line.startswith("data:"):
|
| 552 |
+
continue
|
| 553 |
+
data = line[5:].strip()
|
| 554 |
+
if data == "[DONE]":
|
| 555 |
+
break
|
| 556 |
+
try:
|
| 557 |
+
delta = _json.loads(data)["choices"][0]["delta"].get("content")
|
| 558 |
+
except Exception: # noqa: BLE001
|
| 559 |
+
continue
|
| 560 |
+
if delta:
|
| 561 |
+
yield delta
|
| 562 |
+
|
| 563 |
+
|
| 564 |
@fastapi_app.post("/voxcpm-tts")
|
| 565 |
async def voxcpm_tts(request: Request):
|
| 566 |
body = await request.json()
|
|
|
|
| 605 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 606 |
NIM_KEY = os.environ.get("NVIDIA_NIM_API_KEY", "")
|
| 607 |
_NIM_BASE = "https://ai.api.nvidia.com/v1/genai"
|
| 608 |
+
# NIM text/LLM lives on a DIFFERENT host (OpenAI-compatible chat completions) than the
|
| 609 |
+
# image/genai host above, but uses the same nvapi-… key. Powers the Nemotron coding model.
|
| 610 |
+
_NIM_TEXT_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
|
| 611 |
+
_NIM_NEMOTRON_MODEL = os.environ.get("TINY_NEMOTRON_NIM_MODEL", "nvidia/nemotron-3-nano-30b-a3b")
|
| 612 |
# id -> NIM FLUX preset (same shapes woid uses: schnell fast, dev higher quality).
|
| 613 |
_NIM_PROVIDERS = {
|
| 614 |
"flux-schnell": {"model": "black-forest-labs/flux.1-schnell", "steps": 4, "cfg": 0.0},
|
|
|
|
| 871 |
if stop.is_set():
|
| 872 |
break
|
| 873 |
loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
|
| 874 |
+
elif model == "mellum2-zerogpu":
|
| 875 |
+
if not MELLUM_SPACE:
|
| 876 |
+
raise llm.LlmUnavailable("TINY_MELLUM_SPACE not set")
|
| 877 |
+
for chunk in _mellum_stream(system, user, max_tokens, temperature):
|
| 878 |
+
if stop.is_set():
|
| 879 |
+
break
|
| 880 |
+
loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
|
| 881 |
+
elif model == "nemotron-3-nano-30b-nim":
|
| 882 |
+
if not NIM_KEY:
|
| 883 |
+
raise llm.LlmUnavailable("NVIDIA_NIM_API_KEY not set")
|
| 884 |
+
for chunk in _nim_text_stream(system, user, max_tokens, temperature):
|
| 885 |
+
if stop.is_set():
|
| 886 |
+
break
|
| 887 |
+
loop.call_soon_threadsafe(q.put_nowait, ("delta", chunk))
|
| 888 |
else:
|
| 889 |
for chunk in llm.stream_chat(
|
| 890 |
system,
|
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Coding-model store for the Skill Forge. SEPARATE from runtime.js (the persona/diary
|
| 2 |
+
// "Text Generation Model") so picking a coding model never clobbers the writer model.
|
| 3 |
+
// Both candidates are large (Mellum2 ~8GB, Nemotron-30B ~24GB) with no browser-viable
|
| 4 |
+
// build, so this is ZeroGPU-only: every choice routes through the same server endpoint
|
| 5 |
+
// (/text/generate/stream) the `server` engine uses, by model id. Mellum2 is a ZeroGPU
|
| 6 |
+
// sidecar (TINY_MELLUM_SPACE); Nemotron-30B routes through hosted NVIDIA NIM
|
| 7 |
+
// (NVIDIA_NIM_API_KEY) since it's too big to self-host.
|
| 8 |
+
import { statsTracker } from '/web/genStats.js'
|
| 9 |
+
import { streamSse } from '/web/sseText.js'
|
| 10 |
+
|
| 11 |
+
const MODELS = [
|
| 12 |
+
{ id: 'nemotron-3-nano-30b-nim', label: 'Nemotron 3 Nano 30B-A3B', params: '30B (3B active)', backend: 'NVIDIA NIM', note: 'reasoning + agentic code (NVIDIA)' },
|
| 13 |
+
{ id: 'mellum2-zerogpu', label: 'Mellum2 12B-A2.5B', params: '12B (2.5B active)', backend: 'ZeroGPU sidecar', note: 'code model (JetBrains)' },
|
| 14 |
+
]
|
| 15 |
+
const DEFAULT = 'nemotron-3-nano-30b-nim'
|
| 16 |
+
const KEY = 'tinyarmy.codingModel'
|
| 17 |
+
|
| 18 |
+
const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
|
| 19 |
+
const loadStr = (k) => { try { return localStorage.getItem(k) || '' } catch { return '' } }
|
| 20 |
+
|
| 21 |
+
let _sel = (() => { const s = loadStr(KEY); return MODELS.some((m) => m.id === s) ? s : DEFAULT })()
|
| 22 |
+
|
| 23 |
+
const _listeners = new Set()
|
| 24 |
+
export function onCodingModelChange(fn) { _listeners.add(fn); return () => _listeners.delete(fn) }
|
| 25 |
+
const _notify = () => { for (const fn of _listeners) { try { fn() } catch { /* ignore */ } } }
|
| 26 |
+
|
| 27 |
+
export const listCodingModels = () => MODELS
|
| 28 |
+
export const getCodingModelId = () => _sel
|
| 29 |
+
export const currentCodingModel = () => get(_sel)
|
| 30 |
+
export function setCodingModel(id) {
|
| 31 |
+
if (!MODELS.some((m) => m.id === id) || id === _sel) return
|
| 32 |
+
_sel = id
|
| 33 |
+
try { localStorage.setItem(KEY, id) } catch { /* ignore */ }
|
| 34 |
+
_notify()
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
// Stream a coding-model completion. Same delta protocol as engineServer.stream.
|
| 38 |
+
export async function streamCoding(system, user, { maxTokens = 512, temperature = 0.6, onToken, onStats, signal } = {}) {
|
| 39 |
+
const st = statsTracker(onStats)
|
| 40 |
+
let full = ''
|
| 41 |
+
await streamSse('/text/generate/stream', {
|
| 42 |
+
model: _sel,
|
| 43 |
+
system,
|
| 44 |
+
user,
|
| 45 |
+
max_tokens: maxTokens,
|
| 46 |
+
temperature,
|
| 47 |
+
}, {
|
| 48 |
+
signal,
|
| 49 |
+
onEvent(evt, parsed) {
|
| 50 |
+
if (evt !== 'delta') return
|
| 51 |
+
const piece = parsed?.content || ''
|
| 52 |
+
if (!piece) return
|
| 53 |
+
full += piece
|
| 54 |
+
onToken?.(piece)
|
| 55 |
+
st.tick()
|
| 56 |
+
},
|
| 57 |
+
})
|
| 58 |
+
return { text: full, stats: st.finish() }
|
| 59 |
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Coding-model picker for the Settings page. A trimmed modelBar: no engine selector and
|
| 2 |
+
// no cache/delete controls (these are ZeroGPU sidecars with no browser download). Drives
|
| 3 |
+
// the shared codingModel.js store, so the Skill Forge uses whatever is chosen here.
|
| 4 |
+
import {
|
| 5 |
+
listCodingModels, getCodingModelId, setCodingModel, currentCodingModel, onCodingModelChange,
|
| 6 |
+
} from '/web/codingModel.js'
|
| 7 |
+
|
| 8 |
+
function el(tag, props = {}, kids = []) {
|
| 9 |
+
const n = document.createElement(tag)
|
| 10 |
+
for (const [k, v] of Object.entries(props)) {
|
| 11 |
+
if (k === 'class') n.className = v
|
| 12 |
+
else if (k.startsWith('on') && typeof v === 'function') n.addEventListener(k.slice(2), v)
|
| 13 |
+
else if (v != null) n.setAttribute(k, v)
|
| 14 |
+
}
|
| 15 |
+
for (const kid of [].concat(kids)) if (kid != null) n.append(kid)
|
| 16 |
+
return n
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
export function mountCodingModelBar(host, { onChange } = {}) {
|
| 20 |
+
const sel = el('select', { class: 'model-select' })
|
| 21 |
+
const info = el('div', { class: 'model-info' })
|
| 22 |
+
host.append(el('div', { class: 'model-bar' }, [
|
| 23 |
+
el('label', { class: 'persona-label' }, 'Model'),
|
| 24 |
+
sel,
|
| 25 |
+
el('div', { class: 'model-row' }, [info]),
|
| 26 |
+
]))
|
| 27 |
+
|
| 28 |
+
function render() {
|
| 29 |
+
sel.replaceChildren(...listCodingModels().map((m) =>
|
| 30 |
+
el('option', { value: m.id }, `${m.label}${m.params ? ` · ${m.params}` : ''}`)))
|
| 31 |
+
sel.value = getCodingModelId()
|
| 32 |
+
const m = currentCodingModel()
|
| 33 |
+
info.textContent = [m.params, m.backend, m.note].filter(Boolean).join(' · ')
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
sel.addEventListener('change', () => { setCodingModel(sel.value); render(); onChange && onChange(sel.value) })
|
| 37 |
+
onCodingModelChange(render)
|
| 38 |
+
render()
|
| 39 |
+
return { refresh: render }
|
| 40 |
+
}
|
|
@@ -2,6 +2,7 @@
|
|
| 2 |
// lets the same picker choose either a configured local llama.cpp server or a ZeroGPU
|
| 3 |
// hosted model such as Tiny Aya Global.
|
| 4 |
import { statsTracker } from '/web/genStats.js'
|
|
|
|
| 5 |
|
| 6 |
const MODELS = [
|
| 7 |
{ id: 'server-local', label: 'Configured server model', params: 'local/remote', note: 'uses TINY_LLM_* on the Space or local app' },
|
|
@@ -10,46 +11,11 @@ const MODELS = [
|
|
| 10 |
]
|
| 11 |
const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
|
| 12 |
|
| 13 |
-
async function streamSse(body, { onEvent, signal } = {}) {
|
| 14 |
-
const res = await fetch('/text/generate/stream', {
|
| 15 |
-
method: 'POST',
|
| 16 |
-
headers: { 'Content-Type': 'application/json' },
|
| 17 |
-
body: JSON.stringify(body),
|
| 18 |
-
signal,
|
| 19 |
-
})
|
| 20 |
-
if (!res.ok || !res.body) throw new Error(`HTTP ${res.status}`)
|
| 21 |
-
const reader = res.body.getReader()
|
| 22 |
-
const decoder = new TextDecoder()
|
| 23 |
-
let buf = ''
|
| 24 |
-
while (true) {
|
| 25 |
-
const { value, done } = await reader.read()
|
| 26 |
-
if (done) break
|
| 27 |
-
buf += decoder.decode(value, { stream: true })
|
| 28 |
-
const events = buf.split(/\n\n/)
|
| 29 |
-
buf = events.pop() ?? ''
|
| 30 |
-
for (const evChunk of events) {
|
| 31 |
-
const lines = evChunk.split('\n')
|
| 32 |
-
let evt = 'message'
|
| 33 |
-
const dataLines = []
|
| 34 |
-
for (const line of lines) {
|
| 35 |
-
if (line.startsWith('event:')) evt = line.slice(6).trim()
|
| 36 |
-
else if (line.startsWith('data:')) dataLines.push(line.slice(5).trimStart())
|
| 37 |
-
}
|
| 38 |
-
const data = dataLines.join('\n')
|
| 39 |
-
if (!data) continue
|
| 40 |
-
let parsed = null
|
| 41 |
-
try { parsed = JSON.parse(data) } catch { /* ignore */ }
|
| 42 |
-
if (evt === 'error') throw new Error(parsed?.error || data)
|
| 43 |
-
onEvent?.(evt, parsed, data)
|
| 44 |
-
}
|
| 45 |
-
}
|
| 46 |
-
}
|
| 47 |
-
|
| 48 |
async function stream(id, system, user, { maxTokens = 200, temperature = 0.8, onToken, onStats, signal } = {}) {
|
| 49 |
const m = get(id)
|
| 50 |
const st = statsTracker(onStats)
|
| 51 |
let full = ''
|
| 52 |
-
await streamSse({
|
| 53 |
model: m.id,
|
| 54 |
system,
|
| 55 |
user,
|
|
|
|
| 2 |
// lets the same picker choose either a configured local llama.cpp server or a ZeroGPU
|
| 3 |
// hosted model such as Tiny Aya Global.
|
| 4 |
import { statsTracker } from '/web/genStats.js'
|
| 5 |
+
import { streamSse } from '/web/sseText.js'
|
| 6 |
|
| 7 |
const MODELS = [
|
| 8 |
{ id: 'server-local', label: 'Configured server model', params: 'local/remote', note: 'uses TINY_LLM_* on the Space or local app' },
|
|
|
|
| 11 |
]
|
| 12 |
const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
async function stream(id, system, user, { maxTokens = 200, temperature = 0.8, onToken, onStats, signal } = {}) {
|
| 15 |
const m = get(id)
|
| 16 |
const st = statsTracker(onStats)
|
| 17 |
let full = ''
|
| 18 |
+
await streamSse('/text/generate/stream', {
|
| 19 |
model: m.id,
|
| 20 |
system,
|
| 21 |
user,
|
|
@@ -11,6 +11,7 @@ import { mountTtsBar } from '/web/ttsBar.js'
|
|
| 11 |
import { mountImagenBar } from '/web/imagenBar.js'
|
| 12 |
import { mountPersonaPromptBar } from '/web/personaPromptBar.js'
|
| 13 |
import { mountQualityBar } from '/web/qualityBar.js'
|
|
|
|
| 14 |
|
| 15 |
function el(tag, props = {}, kids = []) {
|
| 16 |
const n = document.createElement(tag)
|
|
@@ -58,6 +59,10 @@ export function mountSettingsPanel() {
|
|
| 58 |
injectSection(sample, 'tac-persona-prompt-settings', 'Persona Prompt',
|
| 59 |
'The system prompt that writes each hero (name, about, quote and voice design). ' +
|
| 60 |
'Edit it to change their style; Save uses it on the next “Recruit hero”.', mountPersonaPromptBar)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
}
|
| 62 |
new MutationObserver(tryInject).observe(document.body, { childList: true, subtree: true })
|
| 63 |
tryInject()
|
|
|
|
| 11 |
import { mountImagenBar } from '/web/imagenBar.js'
|
| 12 |
import { mountPersonaPromptBar } from '/web/personaPromptBar.js'
|
| 13 |
import { mountQualityBar } from '/web/qualityBar.js'
|
| 14 |
+
import { mountCodingModelBar } from '/web/codingModelBar.js'
|
| 15 |
|
| 16 |
function el(tag, props = {}, kids = []) {
|
| 17 |
const n = document.createElement(tag)
|
|
|
|
| 59 |
injectSection(sample, 'tac-persona-prompt-settings', 'Persona Prompt',
|
| 60 |
'The system prompt that writes each hero (name, about, quote and voice design). ' +
|
| 61 |
'Edit it to change their style; Save uses it on the next “Recruit hero”.', mountPersonaPromptBar)
|
| 62 |
+
injectSection(sample, 'tac-coding-model-settings', 'Coding Model',
|
| 63 |
+
'The model that powers the Skill Forge — it writes a skill for a chosen hero. ' +
|
| 64 |
+
'Nemotron 3 Nano (NVIDIA) runs via NVIDIA NIM; Mellum2 (JetBrains) runs as a ' +
|
| 65 |
+
'ZeroGPU sidecar.', mountCodingModelBar)
|
| 66 |
}
|
| 67 |
new MutationObserver(tryInject).observe(document.body, { childList: true, subtree: true })
|
| 68 |
tryInject()
|
|
@@ -12,6 +12,7 @@
|
|
| 12 |
"title": "Sandbox",
|
| 13 |
"items": [
|
| 14 |
{ "label": "Sprite Animations", "icon": "🎞", "href": "#/sandbox/sprite-animations", "view": "sandbox", "page": "movement", "space": "Sprite Animations" },
|
|
|
|
| 15 |
{ "label": "Classes", "href": "#/sandbox/classes", "view": "sandbox", "page": "classes" },
|
| 16 |
{ "label": "Enemies", "href": "#/sandbox/enemies", "view": "sandbox", "page": "enemies" },
|
| 17 |
{ "label": "Levels", "href": "#/sandbox/levels", "view": "sandbox", "page": "levels" },
|
|
|
|
| 12 |
"title": "Sandbox",
|
| 13 |
"items": [
|
| 14 |
{ "label": "Sprite Animations", "icon": "🎞", "href": "#/sandbox/sprite-animations", "view": "sandbox", "page": "movement", "space": "Sprite Animations" },
|
| 15 |
+
{ "label": "Skill Forge", "icon": "⚒", "href": "#/sandbox/skill-forge", "view": "sandbox", "page": "skill-forge", "space": "Skill Forge" },
|
| 16 |
{ "label": "Classes", "href": "#/sandbox/classes", "view": "sandbox", "page": "classes" },
|
| 17 |
{ "label": "Enemies", "href": "#/sandbox/enemies", "view": "sandbox", "page": "enemies" },
|
| 18 |
{ "label": "Levels", "href": "#/sandbox/levels", "view": "sandbox", "page": "levels" },
|
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Skill Forge — the Sandbox surface that uses the Coding Model to author a combat
|
| 2 |
+
// skill for a chosen hero. Pick a recruited persona, describe the skill you want, and
|
| 3 |
+
// the coding model (Nemotron via NIM, or Mellum2 via ZeroGPU — see Settings → Coding
|
| 4 |
+
// Model) writes a self-contained skill definition tailored to that hero. For now it
|
| 5 |
+
// just generates and shows the skill; wiring it into the battle engine comes later.
|
| 6 |
+
import { streamCoding, currentCodingModel, onCodingModelChange } from '/web/codingModel.js'
|
| 7 |
+
import { listPersonas, getPersona, onRosterChange } from '/web/personaStore.js'
|
| 8 |
+
|
| 9 |
+
function el(tag, props = {}, kids = []) {
|
| 10 |
+
const n = document.createElement(tag)
|
| 11 |
+
for (const [k, v] of Object.entries(props)) {
|
| 12 |
+
if (k === 'class') n.className = v
|
| 13 |
+
else if (k.startsWith('on') && typeof v === 'function') n.addEventListener(k.slice(2), v)
|
| 14 |
+
else if (v != null) n.setAttribute(k, v)
|
| 15 |
+
}
|
| 16 |
+
for (const kid of [].concat(kids)) if (kid != null) n.append(kid)
|
| 17 |
+
return n
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
const SYSTEM = [
|
| 21 |
+
'You are the Skill Forge for a fantasy auto-battler. You author ONE combat skill for a',
|
| 22 |
+
'specific hero, tailored to their class, story and personality. Respond with exactly:',
|
| 23 |
+
'a one-line skill name, a one-sentence flavour description, then a fenced ```json block',
|
| 24 |
+
'with fields {name, type:"active"|"passive", target:"self"|"ally"|"enemy"|"area",',
|
| 25 |
+
'cooldown:<int seconds>, effect:"<short mechanical description>"}.',
|
| 26 |
+
'Keep it concise and balanced. Output only the skill — no preamble, no commentary.',
|
| 27 |
+
].join(' ')
|
| 28 |
+
|
| 29 |
+
function personaBlock(p) {
|
| 30 |
+
if (!p) return ''
|
| 31 |
+
return [
|
| 32 |
+
`Hero: ${p.name || 'Unnamed'}${p.unitClass ? ` — ${p.unitClass}` : ''}`,
|
| 33 |
+
p.about ? `About: ${p.about}` : '',
|
| 34 |
+
p.personality ? `Personality: ${p.personality}` : '',
|
| 35 |
+
p.specialty ? `Specialty: ${p.specialty}` : '',
|
| 36 |
+
].filter(Boolean).join('\n')
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
export function mountSkillForgePanel(host) {
|
| 40 |
+
const sel = el('select', { class: 'persona-input skillforge-hero' })
|
| 41 |
+
const req = el('textarea', { class: 'persona-prompt-edit skillforge-req', rows: 4,
|
| 42 |
+
placeholder: 'what skill should this hero learn? (e.g. “a defensive shout that shields nearby allies”)' })
|
| 43 |
+
const btn = el('button', { class: 'persona-go', type: 'button' }, '⚒ Forge skill')
|
| 44 |
+
const status = el('div', { class: 'persona-status' })
|
| 45 |
+
const out = el('pre', { class: 'persona-think skillforge-out' })
|
| 46 |
+
const empty = el('div', { class: 'persona-roster-empty' },
|
| 47 |
+
'No heroes yet — recruit one in the Personas tab, then come back to forge its skills.')
|
| 48 |
+
|
| 49 |
+
const controls = el('aside', { class: 'persona-controls skillforge' }, [
|
| 50 |
+
el('div', { class: 'persona-sec' }, [el('div', { class: 'persona-sec-title' }, 'Skill Forge'), el('span')]),
|
| 51 |
+
el('label', { class: 'persona-label' }, 'Hero'), sel,
|
| 52 |
+
empty,
|
| 53 |
+
el('label', { class: 'persona-label' }, 'Skill request'), req,
|
| 54 |
+
el('div', { class: 'persona-prompt-actions' }, [btn]),
|
| 55 |
+
status,
|
| 56 |
+
el('label', { class: 'persona-label' }, 'Forged skill'), out,
|
| 57 |
+
])
|
| 58 |
+
host.append(controls)
|
| 59 |
+
|
| 60 |
+
function refreshHeroes() {
|
| 61 |
+
const people = listPersonas()
|
| 62 |
+
const prev = sel.value
|
| 63 |
+
sel.replaceChildren(...people.map((p) => el('option', { value: p.id }, p.name || 'Unnamed hero')))
|
| 64 |
+
if (people.some((p) => p.id === prev)) sel.value = prev
|
| 65 |
+
const none = people.length === 0
|
| 66 |
+
empty.style.display = none ? '' : 'none'
|
| 67 |
+
sel.style.display = none ? 'none' : ''
|
| 68 |
+
btn.disabled = none
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
function refreshStatus() {
|
| 72 |
+
if (!status.dataset.busy) status.textContent = `Coding model: ${currentCodingModel().label}`
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
let running = false
|
| 76 |
+
async function forge() {
|
| 77 |
+
if (running) return
|
| 78 |
+
const p = getPersona(sel.value)
|
| 79 |
+
if (!p) { status.textContent = 'Pick a hero first.'; return }
|
| 80 |
+
const ask = req.value.trim()
|
| 81 |
+
if (!ask) { status.textContent = 'Describe the skill you want.'; return }
|
| 82 |
+
running = true; status.dataset.busy = '1'; btn.disabled = true
|
| 83 |
+
out.textContent = ''
|
| 84 |
+
status.textContent = `Forging with ${currentCodingModel().label}…`
|
| 85 |
+
const user = `${personaBlock(p)}\n\nSkill to create: ${ask}`
|
| 86 |
+
try {
|
| 87 |
+
const { stats } = await streamCoding(SYSTEM, user, {
|
| 88 |
+
maxTokens: 512,
|
| 89 |
+
temperature: 0.6,
|
| 90 |
+
onToken: (t) => { out.textContent += t },
|
| 91 |
+
})
|
| 92 |
+
const tps = stats && stats.tokPerSec ? ` · ${stats.tokPerSec} tok/s` : ''
|
| 93 |
+
status.textContent = `Done${tps}.`
|
| 94 |
+
} catch (e) {
|
| 95 |
+
status.textContent = 'Forge failed: ' + (e && e.message ? e.message : e)
|
| 96 |
+
} finally {
|
| 97 |
+
running = false; delete status.dataset.busy; btn.disabled = listPersonas().length === 0
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
btn.addEventListener('click', forge)
|
| 102 |
+
onRosterChange(refreshHeroes)
|
| 103 |
+
onCodingModelChange(refreshStatus)
|
| 104 |
+
refreshHeroes(); refreshStatus()
|
| 105 |
+
return { refresh: refreshHeroes }
|
| 106 |
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Shared SSE reader for the server text-generation endpoint (/text/generate/stream).
|
| 2 |
+
// Parses the `event:`/`data:` wire format and dispatches each event to onEvent.
|
| 3 |
+
// Used by both engineServer.js (the model-bar "Server / ZeroGPU" engine) and
|
| 4 |
+
// codingModel.js (the Skill Forge coding-model picker) so the parser lives once.
|
| 5 |
+
export async function streamSse(url, body, { onEvent, signal } = {}) {
|
| 6 |
+
const res = await fetch(url, {
|
| 7 |
+
method: 'POST',
|
| 8 |
+
headers: { 'Content-Type': 'application/json' },
|
| 9 |
+
body: JSON.stringify(body),
|
| 10 |
+
signal,
|
| 11 |
+
})
|
| 12 |
+
if (!res.ok || !res.body) throw new Error(`HTTP ${res.status}`)
|
| 13 |
+
const reader = res.body.getReader()
|
| 14 |
+
const decoder = new TextDecoder()
|
| 15 |
+
let buf = ''
|
| 16 |
+
while (true) {
|
| 17 |
+
const { value, done } = await reader.read()
|
| 18 |
+
if (done) break
|
| 19 |
+
buf += decoder.decode(value, { stream: true })
|
| 20 |
+
const events = buf.split(/\n\n/)
|
| 21 |
+
buf = events.pop() ?? ''
|
| 22 |
+
for (const evChunk of events) {
|
| 23 |
+
const lines = evChunk.split('\n')
|
| 24 |
+
let evt = 'message'
|
| 25 |
+
const dataLines = []
|
| 26 |
+
for (const line of lines) {
|
| 27 |
+
if (line.startsWith('event:')) evt = line.slice(6).trim()
|
| 28 |
+
else if (line.startsWith('data:')) dataLines.push(line.slice(5).trimStart())
|
| 29 |
+
}
|
| 30 |
+
const data = dataLines.join('\n')
|
| 31 |
+
if (!data) continue
|
| 32 |
+
let parsed = null
|
| 33 |
+
try { parsed = JSON.parse(data) } catch { /* ignore */ }
|
| 34 |
+
if (evt === 'error') throw new Error(parsed?.error || data)
|
| 35 |
+
onEvent?.(evt, parsed, data)
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
}
|
|
@@ -10,6 +10,7 @@ import { mountSpritePlayground } from '/web/playground.js'
|
|
| 10 |
import { mountPersonaPanel } from '/web/personaPanel.js'
|
| 11 |
import { mountDiaryPanel } from '/web/diaryPanel.js'
|
| 12 |
import { mountSettingsPanel } from '/web/settingsPanel.js'
|
|
|
|
| 13 |
|
| 14 |
function whenEl(id, cb) {
|
| 15 |
const found = document.getElementById(id)
|
|
@@ -58,6 +59,7 @@ whenEl('sprite-stage', async (el) => {
|
|
| 58 |
// ── Personas + War Diary tabs — in-browser llama.cpp (wllama), runs on the device ──
|
| 59 |
whenEl('persona-stage', (el) => { mountPersonaPanel(el) })
|
| 60 |
whenEl('diary-stage', (el) => { mountDiaryPanel(el) })
|
|
|
|
| 61 |
// Engine + model + voice pickers are injected into Gradio's own Settings page (footer
|
| 62 |
// link / sidebar ⚙), shared across pages via the runtime.js + tts.js singletons.
|
| 63 |
mountSettingsPanel()
|
|
|
|
| 10 |
import { mountPersonaPanel } from '/web/personaPanel.js'
|
| 11 |
import { mountDiaryPanel } from '/web/diaryPanel.js'
|
| 12 |
import { mountSettingsPanel } from '/web/settingsPanel.js'
|
| 13 |
+
import { mountSkillForgePanel } from '/web/skillForgePanel.js'
|
| 14 |
|
| 15 |
function whenEl(id, cb) {
|
| 16 |
const found = document.getElementById(id)
|
|
|
|
| 59 |
// ── Personas + War Diary tabs — in-browser llama.cpp (wllama), runs on the device ──
|
| 60 |
whenEl('persona-stage', (el) => { mountPersonaPanel(el) })
|
| 61 |
whenEl('diary-stage', (el) => { mountDiaryPanel(el) })
|
| 62 |
+
whenEl('skillforge-stage', (el) => { mountSkillForgePanel(el) })
|
| 63 |
// Engine + model + voice pickers are injected into Gradio's own Settings page (footer
|
| 64 |
// link / sidebar ⚙), shared across pages via the runtime.js + tts.js singletons.
|
| 65 |
mountSettingsPanel()
|