Spaces:
Running
Running
File size: 1,603 Bytes
f9dd2fe 1f1908e f9dd2fe 22a51b2 f9dd2fe 1f1908e f9dd2fe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | // Engine: server-side text generation. Keeps API keys/model hosts off the client and
// lets the same picker choose either a configured local llama.cpp server or a ZeroGPU
// hosted model such as Tiny Aya Global.
import { statsTracker } from '/web/genStats.js'
import { streamSse } from '/web/sseText.js'
const MODELS = [
{ id: 'server-local', label: 'Configured server model', params: 'local/remote', note: 'uses TINY_LLM_* on the Space or local app' },
{ id: 'tiny-aya-global-zerogpu', label: 'Tiny Aya Global 3.35B', params: '3.35B', note: 'ZeroGPU sidecar; multilingual' },
{ id: 'minicpm5-1b-zerogpu', label: 'MiniCPM5 1B', params: '1B', note: 'ZeroGPU sidecar; efficient MiniCPM5 text model' },
]
const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
async function stream(id, system, user, { maxTokens = 200, temperature = 0.8, onToken, onStats, signal } = {}) {
const m = get(id)
const st = statsTracker(onStats)
let full = ''
await streamSse('/text/generate/stream', {
model: m.id,
system,
user,
max_tokens: maxTokens,
temperature,
}, {
signal,
onEvent(evt, parsed) {
if (evt !== 'delta') return
const piece = parsed?.content || ''
if (!piece) return
full += piece
onToken?.(piece)
st.tick()
},
})
return { text: full, stats: st.finish() }
}
export const engine = {
id: 'server',
label: 'Server / ZeroGPU',
available: () => true,
needsDownload: false,
models: MODELS,
defaultModel: 'tiny-aya-global-zerogpu',
ensure: async () => {},
stream,
backendLabel: () => 'server',
}
|