Spaces:

build-small-hackathon
/

tiny-army

Running

polats Claude Opus 4.8 (1M context) commited on Jun 4

Commit

b0f48f8

1 Parent(s): bed3298

Cache status + delete for WebLLM and Transformers.js (like wllama)

The model bar already shows "✓ downloaded" + a delete button for any engine that
exposes cachedSet()/deleteCached(); wire those up for the other two:
- WebLLM: MLC's own hasModelInCache() / deleteModelAllInfoInCache() over
prebuiltAppConfig (matches the q4f16/q4f32 variant we'd load).
- Transformers.js: inspect the 'transformers-cache' Cache API store; only count a
model as cached when its *.onnx weights are present (Transformers.js sometimes
caches the config/tokenizer JSONs but not the big model file — known upstream
issue), so the badge is honest.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

Files changed (2) hide show

web/engineTransformers.js +21 -0
web/engineWebllm.js +17 -0

web/engineTransformers.js CHANGED Viewed

@@ -69,6 +69,11 @@ function stream(id, system, user, { maxTokens = 200, temperature = 0.8, onToken,
   const p = _chain.then(run, run); _chain = p.catch(() => {}); return p
 }
 export const engine = {
   id: 'transformers',
   label: 'Transformers.js · ONNX (WebGPU/WASM)',
@@ -78,4 +83,20 @@ export const engine = {
   defaultModel: 'qwen2.5-0.5b',
   ensure, stream,
   backendLabel: () => (_device === 'webgpu' ? '⚡ WebGPU' : 'CPU (WASM)'),
 }

   const p = _chain.then(run, run); _chain = p.catch(() => {}); return p
 }
+// Transformers.js caches model files in the Cache API store 'transformers-cache',
+// keyed by the remote HF URL — so we match by the model's repo name.
+const CACHE = 'transformers-cache'
+const repoKey = (m) => m.repo.split('/').pop()
 export const engine = {
   id: 'transformers',
   label: 'Transformers.js · ONNX (WebGPU/WASM)',
   defaultModel: 'qwen2.5-0.5b',
   ensure, stream,
   backendLabel: () => (_device === 'webgpu' ? '⚡ WebGPU' : 'CPU (WASM)'),
+  async cachedSet() {
+    try {
+      if (typeof caches === 'undefined') return new Set()
+      const urls = (await (await caches.open(CACHE)).keys()).map((r) => r.url)
+      const ids = new Set()
+      // Require the actual weights (*.onnx) in cache, not just the metadata JSONs —
+      // Transformers.js sometimes caches config/tokenizer but not the big model file.
+      for (const m of MODELS) if (urls.some((u) => u.includes(repoKey(m)) && /\.onnx(\?|$)/i.test(u))) ids.add(m.id)
+      return ids
+    } catch { return new Set() }
+  },
+  async deleteCached(id) {
+    const m = get(id)
+    if (_loadedId === id && _pipe) { try { await _pipe.dispose?.() } catch { /* ignore */ } _pipe = null; _loadedId = null; _loadPromise = null; _loadingId = null }
+    try { const c = await caches.open(CACHE); for (const req of await c.keys()) if (req.url.includes(repoKey(m))) await c.delete(req) } catch { /* ignore */ }
+  },
 }

web/engineWebllm.js CHANGED Viewed

@@ -85,4 +85,21 @@ export const engine = {
   defaultModel: 'qwen2.5-0.5b',
   ensure, stream,
   backendLabel: () => (hasGPU() ? '⚡ WebGPU' : 'needs WebGPU'),
 }

   defaultModel: 'qwen2.5-0.5b',
   ensure, stream,
   backendLabel: () => (hasGPU() ? '⚡ WebGPU' : 'needs WebGPU'),
+  // Cache list/delete via MLC's own helpers (Cache API or IndexedDB, per appConfig).
+  async cachedSet() {
+    try {
+      const wl = await lib()
+      const cfg = wl.prebuiltAppConfig
+      const ids = new Set()
+      for (const m of MODELS) { if (await wl.hasModelInCache(await mlcId(m), cfg)) ids.add(m.id) }
+      return ids
+    } catch { return new Set() }
+  },
+  async deleteCached(id) {
+    const wl = await lib()
+    const m = get(id)
+    const target = await mlcId(m)
+    if (_loadedId === id && _engine) { try { await _engine.unload?.() } catch { /* ignore */ } _engine = null; _loadedId = null; _loadPromise = null; _loadingId = null }
+    try { await wl.deleteModelAllInfoInCache(target, wl.prebuiltAppConfig) } catch { /* ignore */ }
+  },
 }