File size: 3,566 Bytes
f9dd2fe
f8d0843
 
 
 
 
 
f9dd2fe
fa27f81
f8d0843
f9dd2fe
750ca83
 
 
 
 
 
 
 
 
de78f87
750ca83
 
 
 
 
 
 
 
 
 
 
f8d0843
 
 
 
 
750ca83
 
 
 
f8d0843
 
750ca83
 
 
 
 
 
f8d0843
750ca83
 
 
 
f8d0843
fa27f81
f9dd2fe
fa27f81
 
 
f8d0843
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
// Runtime facade — picks the active engine (wllama / Transformers.js / WebLLM / server) and
// model, and delegates load/stream/cache. Lets you A/B the same model across engines
// and compare tok/s. Panels + the model bar import only from here. (Named runtime.js,
// not engine.js — that one is the game-engine bundle.)
import { engine as wllama } from '/web/engineWllama.js'
import { engine as transformers } from '/web/engineTransformers.js'
import { engine as webllm } from '/web/engineWebllm.js'
import { engine as server } from '/web/engineServer.js'
import { ensurePersistentStorage } from '/web/storage.js'

const ENGINES = [wllama, transformers, webllm, server]
// Persisted choices (survive refresh). Defaults: WebLLM where there's WebGPU (fastest),
// else wllama so the app still works without it.
const ENGINE_KEY = 'tinyarmy.llmEngine', MODELS_KEY = 'tinyarmy.llmModels'
const loadJSON = (k, fb) => { try { const v = localStorage.getItem(k); return v ? JSON.parse(v) : fb } catch { return fb } }
const loadStr = (k) => { try { return localStorage.getItem(k) || '' } catch { return '' } }

let activeId = (() => {
  const saved = loadStr(ENGINE_KEY)
  const e = ENGINES.find((x) => x.id === saved)
  return e && e.available() ? saved : 'server'
})()
const modelSel = loadJSON(MODELS_KEY, {}) // engineId -> chosen model id (remembered per engine)

function persist() {
  try { localStorage.setItem(ENGINE_KEY, activeId); localStorage.setItem(MODELS_KEY, JSON.stringify(modelSel)) } catch { /* ignore */ }
}

// Change listeners (the Settings "Recommended" preset bar + the model bar re-render).
const _listeners = new Set()
export function onModelChange(fn) { _listeners.add(fn); return () => _listeners.delete(fn) }
const _notify = () => { for (const fn of _listeners) { try { fn() } catch { /* ignore */ } } }

const eng = () => ENGINES.find((e) => e.id === activeId) || ENGINES[0]

export const listEngines = () => ENGINES.map((e) => ({ id: e.id, label: e.label, available: e.available() }))
export const getEngineId = () => activeId
export function setEngine(id) {
  if (!ENGINES.some((e) => e.id === id) || id === activeId) return
  activeId = id; persist(); _notify()
}

export const listModels = () => eng().models
// A stored model id only counts if it actually exists in the active engine's catalog
// (otherwise fall back to that engine's default — handles cross-engine presets cleanly).
export const currentModelId = () => {
  const sel = modelSel[activeId]
  return (sel && eng().models.some((m) => m.id === sel)) ? sel : eng().defaultModel
}
export const currentModel = () => eng().models.find((m) => m.id === currentModelId()) || eng().models[0]
export function setModel(id) {
  if (modelSel[activeId] === id) return
  modelSel[activeId] = id; persist(); _notify()
}

export const ensureModel = async (onProgress) => {
  if (eng().needsDownload === false) return eng().ensure(currentModelId(), onProgress)
  await ensurePersistentStorage() // keep downloads from being evicted across engine switches
  return eng().ensure(currentModelId(), onProgress)
}
export const streamChat = (sys, user, opts) => eng().stream(currentModelId(), sys, user, opts)
export const backendLabel = () => eng().backendLabel()

// Cache management (only wllama exposes per-model delete; others manage their own cache).
export const cacheSupported = () => !!eng().cachedSet
export const cachedSet = async () => (eng().cachedSet ? eng().cachedSet() : new Set())
export const deleteCached = async (id) => (eng().deleteCached ? eng().deleteCached(id) : null)