polats Claude Opus 4.8 (1M context) commited on
Commit
b0f48f8
·
1 Parent(s): bed3298

Cache status + delete for WebLLM and Transformers.js (like wllama)

Browse files

The model bar already shows "✓ downloaded" + a delete button for any engine that
exposes cachedSet()/deleteCached(); wire those up for the other two:
- WebLLM: MLC's own hasModelInCache() / deleteModelAllInfoInCache() over
prebuiltAppConfig (matches the q4f16/q4f32 variant we'd load).
- Transformers.js: inspect the 'transformers-cache' Cache API store; only count a
model as cached when its *.onnx weights are present (Transformers.js sometimes
caches the config/tokenizer JSONs but not the big model file — known upstream
issue), so the badge is honest.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

Files changed (2) hide show
  1. web/engineTransformers.js +21 -0
  2. web/engineWebllm.js +17 -0
web/engineTransformers.js CHANGED
@@ -69,6 +69,11 @@ function stream(id, system, user, { maxTokens = 200, temperature = 0.8, onToken,
69
  const p = _chain.then(run, run); _chain = p.catch(() => {}); return p
70
  }
71
 
 
 
 
 
 
72
  export const engine = {
73
  id: 'transformers',
74
  label: 'Transformers.js · ONNX (WebGPU/WASM)',
@@ -78,4 +83,20 @@ export const engine = {
78
  defaultModel: 'qwen2.5-0.5b',
79
  ensure, stream,
80
  backendLabel: () => (_device === 'webgpu' ? '⚡ WebGPU' : 'CPU (WASM)'),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  }
 
69
  const p = _chain.then(run, run); _chain = p.catch(() => {}); return p
70
  }
71
 
72
+ // Transformers.js caches model files in the Cache API store 'transformers-cache',
73
+ // keyed by the remote HF URL — so we match by the model's repo name.
74
+ const CACHE = 'transformers-cache'
75
+ const repoKey = (m) => m.repo.split('/').pop()
76
+
77
  export const engine = {
78
  id: 'transformers',
79
  label: 'Transformers.js · ONNX (WebGPU/WASM)',
 
83
  defaultModel: 'qwen2.5-0.5b',
84
  ensure, stream,
85
  backendLabel: () => (_device === 'webgpu' ? '⚡ WebGPU' : 'CPU (WASM)'),
86
+ async cachedSet() {
87
+ try {
88
+ if (typeof caches === 'undefined') return new Set()
89
+ const urls = (await (await caches.open(CACHE)).keys()).map((r) => r.url)
90
+ const ids = new Set()
91
+ // Require the actual weights (*.onnx) in cache, not just the metadata JSONs —
92
+ // Transformers.js sometimes caches config/tokenizer but not the big model file.
93
+ for (const m of MODELS) if (urls.some((u) => u.includes(repoKey(m)) && /\.onnx(\?|$)/i.test(u))) ids.add(m.id)
94
+ return ids
95
+ } catch { return new Set() }
96
+ },
97
+ async deleteCached(id) {
98
+ const m = get(id)
99
+ if (_loadedId === id && _pipe) { try { await _pipe.dispose?.() } catch { /* ignore */ } _pipe = null; _loadedId = null; _loadPromise = null; _loadingId = null }
100
+ try { const c = await caches.open(CACHE); for (const req of await c.keys()) if (req.url.includes(repoKey(m))) await c.delete(req) } catch { /* ignore */ }
101
+ },
102
  }
web/engineWebllm.js CHANGED
@@ -85,4 +85,21 @@ export const engine = {
85
  defaultModel: 'qwen2.5-0.5b',
86
  ensure, stream,
87
  backendLabel: () => (hasGPU() ? '⚡ WebGPU' : 'needs WebGPU'),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  }
 
85
  defaultModel: 'qwen2.5-0.5b',
86
  ensure, stream,
87
  backendLabel: () => (hasGPU() ? '⚡ WebGPU' : 'needs WebGPU'),
88
+ // Cache list/delete via MLC's own helpers (Cache API or IndexedDB, per appConfig).
89
+ async cachedSet() {
90
+ try {
91
+ const wl = await lib()
92
+ const cfg = wl.prebuiltAppConfig
93
+ const ids = new Set()
94
+ for (const m of MODELS) { if (await wl.hasModelInCache(await mlcId(m), cfg)) ids.add(m.id) }
95
+ return ids
96
+ } catch { return new Set() }
97
+ },
98
+ async deleteCached(id) {
99
+ const wl = await lib()
100
+ const m = get(id)
101
+ const target = await mlcId(m)
102
+ if (_loadedId === id && _engine) { try { await _engine.unload?.() } catch { /* ignore */ } _engine = null; _loadedId = null; _loadPromise = null; _loadingId = null }
103
+ try { await wl.deleteModelAllInfoInCache(target, wl.prebuiltAppConfig) } catch { /* ignore */ }
104
+ },
105
  }