Spaces:

build-small-hackathon
/

tiny-army

Running

polats Claude Opus 4.8 (1M context) commited on Jun 4

Commit

bed3298

1 Parent(s): 559041f

Transformers.js: pre-flight WebGPU device, else WASM (fixes load + caching)

Verified headlessly: Transformers.js DOES cache (writes a transformers-cache store)
— the real problem was it never loaded. navigator.gpu can exist and requestAdapter()
succeed, yet the WebGPU backend still throws "no available backend", and a failed
WebGPU attempt poisons the in-context WASM retry. Now we pick the device up front via
requestDevice() and only use WebGPU when a real device is obtained; otherwise WASM
from the start (which loads and caches reliably).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

Files changed (1) hide show

web/engineTransformers.js +19 -6

web/engineTransformers.js CHANGED Viewed

@@ -8,7 +8,22 @@ const MODELS = [
   { id: 'llama3.2-1b', label: 'Llama 3.2 1B', params: '1B', repo: 'onnx-community/Llama-3.2-1B-Instruct' },
 ]
 const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
-const preferGPU = () => { try { return !!navigator.gpu } catch { return false } }
 let _lib = null, _pipe = null, _loadedId = null, _loadingId = null, _loadPromise = null, _device = 'wasm', _chain = Promise.resolve()
 async function lib() { if (!_lib) _lib = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3'); return _lib }
@@ -23,16 +38,14 @@ async function ensure(id, onProgress) {
   _loadingId = m.id
   _loadPromise = (async () => {
     const { pipeline } = await lib()
     const mk = (device) => pipeline('text-generation', m.repo, {
       device, dtype: 'q4',
       progress_callback: (p) => { if (onProgress && p.status === 'progress' && p.total) onProgress(p.loaded / p.total) },
     })
-    // navigator.gpu can exist but fail to provide an adapter (headless, flaky drivers)
-    // — Transformers.js throws "no available backend" instead of falling back, so we
-    // catch and retry on WASM. Otherwise the model never loads (and never caches).
     let pipe
-    try { pipe = await mk(preferGPU() ? 'webgpu' : 'wasm'); _device = preferGPU() ? 'webgpu' : 'wasm' }
-    catch (e) { pipe = await mk('wasm'); _device = 'wasm' }
     _pipe = pipe; _loadedId = m.id; return pipe
   })().catch((e) => { _loadPromise = null; _loadingId = null; throw e })
   return _loadPromise

   { id: 'llama3.2-1b', label: 'Llama 3.2 1B', params: '1B', repo: 'onnx-community/Llama-3.2-1B-Instruct' },
 ]
 const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
+// Only choose WebGPU if we can actually get a *device* (not just an adapter):
+// navigator.gpu can exist and requestAdapter() can succeed, yet Transformers.js still
+// throws "no available backend" (headless, flaky drivers). And once a WebGPU pipeline
+// attempt fails, the in-context WASM retry is poisoned too — so we must decide up front
+// and never attempt WebGPU unless it's real. WASM always works and caches fine.
+async function pickDevice() {
+  try {
+    if (!navigator.gpu) return 'wasm'
+    const a = await navigator.gpu.requestAdapter()
+    if (!a) return 'wasm'
+    const d = await a.requestDevice()
+    if (d) { try { d.destroy() } catch { /* ignore */ } return 'webgpu' }
+  } catch { /* fall through */ }
+  return 'wasm'
+}
 let _lib = null, _pipe = null, _loadedId = null, _loadingId = null, _loadPromise = null, _device = 'wasm', _chain = Promise.resolve()
 async function lib() { if (!_lib) _lib = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3'); return _lib }
   _loadingId = m.id
   _loadPromise = (async () => {
     const { pipeline } = await lib()
+    _device = await pickDevice()
     const mk = (device) => pipeline('text-generation', m.repo, {
       device, dtype: 'q4',
       progress_callback: (p) => { if (onProgress && p.status === 'progress' && p.total) onProgress(p.loaded / p.total) },
     })
     let pipe
+    try { pipe = await mk(_device) }
+    catch (e) { if (_device !== 'wasm') { _device = 'wasm'; pipe = await mk('wasm') } else throw e }
     _pipe = pipe; _loadedId = m.id; return pipe
   })().catch((e) => { _loadPromise = null; _loadingId = null; throw e })
   return _loadPromise