// Engine: server-side text generation. Keeps API keys/model hosts off the client and // lets the same picker choose either a configured local llama.cpp server or a ZeroGPU // hosted model such as Tiny Aya Global. import { statsTracker } from '/web/genStats.js' import { streamSse } from '/web/sseText.js' const MODELS = [ { id: 'server-local', label: 'Configured server model', params: 'local/remote', note: 'uses TINY_LLM_* on the Space or local app' }, { id: 'tiny-aya-global-zerogpu', label: 'Tiny Aya Global 3.35B', params: '3.35B', note: 'ZeroGPU sidecar; multilingual' }, { id: 'minicpm5-1b-zerogpu', label: 'MiniCPM5 1B', params: '1B', note: 'ZeroGPU sidecar; efficient MiniCPM5 text model' }, ] const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0] async function stream(id, system, user, { maxTokens = 200, temperature = 0.8, onToken, onStats, signal } = {}) { const m = get(id) const st = statsTracker(onStats) let full = '' await streamSse('/text/generate/stream', { model: m.id, system, user, max_tokens: maxTokens, temperature, }, { signal, onEvent(evt, parsed) { if (evt !== 'delta') return const piece = parsed?.content || '' if (!piece) return full += piece onToken?.(piece) st.tick() }, }) return { text: full, stats: st.finish() } } export const engine = { id: 'server', label: 'Server / ZeroGPU', available: () => true, needsDownload: false, models: MODELS, defaultModel: 'tiny-aya-global-zerogpu', ensure: async () => {}, stream, backendLabel: () => 'server', }