Spaces:
Running
Running
| // Coding-model store for the Skill Forge. SEPARATE from runtime.js (the persona/diary | |
| // "Text Generation Model") so picking a coding model never clobbers the writer model. | |
| // All candidates are large (Mellum2 ~8GB, BLS Mini-Code 30B MoE, Nemotron-30B ~24GB) with no | |
| // browser-viable build, so this is ZeroGPU-only: every choice routes through the same server | |
| // endpoint (/text/generate/stream) the `server` engine uses, by model id. Mellum2 | |
| // (TINY_MELLUM_SPACE) and BLS Mini-Code (TINY_BLS_CODE_SPACE) are ZeroGPU sidecars; Nemotron-30B | |
| // routes through hosted NVIDIA NIM (NVIDIA_NIM_API_KEY) since it's too big to self-host. | |
| import { statsTracker } from '/web/genStats.js' | |
| import { streamSse } from '/web/sseText.js' | |
| const MODELS = [ | |
| { id: 'nemotron-3-nano-30b-nim', label: 'Nemotron 3 Nano 30B-A3B', params: '30B (3B active)', backend: 'NVIDIA NIM', note: 'reasoning + agentic code (NVIDIA)' }, | |
| { id: 'mellum2-zerogpu', label: 'Mellum2 12B-A2.5B', params: '12B (2.5B active)', backend: 'ZeroGPU sidecar', note: 'code model (JetBrains)' }, | |
| { id: 'bls-mini-code-zerogpu', label: 'BLS Mini-Code 1.0', params: '30B MoE', backend: 'ZeroGPU sidecar', note: 'code model (Cohere); reasoning suppressed' }, | |
| ] | |
| const DEFAULT = 'nemotron-3-nano-30b-nim' | |
| const KEY = 'tinyarmy.codingModel' | |
| const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0] | |
| const loadStr = (k) => { try { return localStorage.getItem(k) || '' } catch { return '' } } | |
| let _sel = (() => { const s = loadStr(KEY); return MODELS.some((m) => m.id === s) ? s : DEFAULT })() | |
| const _listeners = new Set() | |
| export function onCodingModelChange(fn) { _listeners.add(fn); return () => _listeners.delete(fn) } | |
| const _notify = () => { for (const fn of _listeners) { try { fn() } catch { /* ignore */ } } } | |
| export const listCodingModels = () => MODELS | |
| export const getCodingModelId = () => _sel | |
| export const currentCodingModel = () => get(_sel) | |
| export function setCodingModel(id) { | |
| if (!MODELS.some((m) => m.id === id) || id === _sel) return | |
| _sel = id | |
| try { localStorage.setItem(KEY, id) } catch { /* ignore */ } | |
| _notify() | |
| } | |
| // Stream a coding-model completion. Same delta protocol as engineServer.stream. | |
| // think=true asks reasoning models (Nemotron, BLS) to surface their <think>…</think> trace | |
| // instead of hiding it, so the caller can show it in a debug panel. | |
| export async function streamCoding(system, user, { maxTokens = 512, temperature = 0.6, think = false, onToken, onStats, signal } = {}) { | |
| const st = statsTracker(onStats) | |
| let full = '' | |
| await streamSse('/text/generate/stream', { | |
| model: _sel, | |
| system, | |
| user, | |
| max_tokens: maxTokens, | |
| temperature, | |
| think, | |
| }, { | |
| signal, | |
| onEvent(evt, parsed) { | |
| if (evt !== 'delta') return | |
| const piece = parsed?.content || '' | |
| if (!piece) return | |
| full += piece | |
| onToken?.(piece) | |
| st.tick() | |
| }, | |
| }) | |
| return { text: full, stats: st.finish() } | |
| } | |