polats Claude Opus 4.8 (1M context) commited on
Commit
bed3298
·
1 Parent(s): 559041f

Transformers.js: pre-flight WebGPU device, else WASM (fixes load + caching)

Browse files

Verified headlessly: Transformers.js DOES cache (writes a transformers-cache store)
— the real problem was it never loaded. navigator.gpu can exist and requestAdapter()
succeed, yet the WebGPU backend still throws "no available backend", and a failed
WebGPU attempt poisons the in-context WASM retry. Now we pick the device up front via
requestDevice() and only use WebGPU when a real device is obtained; otherwise WASM
from the start (which loads and caches reliably).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

Files changed (1) hide show
  1. web/engineTransformers.js +19 -6
web/engineTransformers.js CHANGED
@@ -8,7 +8,22 @@ const MODELS = [
8
  { id: 'llama3.2-1b', label: 'Llama 3.2 1B', params: '1B', repo: 'onnx-community/Llama-3.2-1B-Instruct' },
9
  ]
10
  const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
11
- const preferGPU = () => { try { return !!navigator.gpu } catch { return false } }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  let _lib = null, _pipe = null, _loadedId = null, _loadingId = null, _loadPromise = null, _device = 'wasm', _chain = Promise.resolve()
14
  async function lib() { if (!_lib) _lib = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3'); return _lib }
@@ -23,16 +38,14 @@ async function ensure(id, onProgress) {
23
  _loadingId = m.id
24
  _loadPromise = (async () => {
25
  const { pipeline } = await lib()
 
26
  const mk = (device) => pipeline('text-generation', m.repo, {
27
  device, dtype: 'q4',
28
  progress_callback: (p) => { if (onProgress && p.status === 'progress' && p.total) onProgress(p.loaded / p.total) },
29
  })
30
- // navigator.gpu can exist but fail to provide an adapter (headless, flaky drivers)
31
- // — Transformers.js throws "no available backend" instead of falling back, so we
32
- // catch and retry on WASM. Otherwise the model never loads (and never caches).
33
  let pipe
34
- try { pipe = await mk(preferGPU() ? 'webgpu' : 'wasm'); _device = preferGPU() ? 'webgpu' : 'wasm' }
35
- catch (e) { pipe = await mk('wasm'); _device = 'wasm' }
36
  _pipe = pipe; _loadedId = m.id; return pipe
37
  })().catch((e) => { _loadPromise = null; _loadingId = null; throw e })
38
  return _loadPromise
 
8
  { id: 'llama3.2-1b', label: 'Llama 3.2 1B', params: '1B', repo: 'onnx-community/Llama-3.2-1B-Instruct' },
9
  ]
10
  const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
11
+
12
+ // Only choose WebGPU if we can actually get a *device* (not just an adapter):
13
+ // navigator.gpu can exist and requestAdapter() can succeed, yet Transformers.js still
14
+ // throws "no available backend" (headless, flaky drivers). And once a WebGPU pipeline
15
+ // attempt fails, the in-context WASM retry is poisoned too — so we must decide up front
16
+ // and never attempt WebGPU unless it's real. WASM always works and caches fine.
17
+ async function pickDevice() {
18
+ try {
19
+ if (!navigator.gpu) return 'wasm'
20
+ const a = await navigator.gpu.requestAdapter()
21
+ if (!a) return 'wasm'
22
+ const d = await a.requestDevice()
23
+ if (d) { try { d.destroy() } catch { /* ignore */ } return 'webgpu' }
24
+ } catch { /* fall through */ }
25
+ return 'wasm'
26
+ }
27
 
28
  let _lib = null, _pipe = null, _loadedId = null, _loadingId = null, _loadPromise = null, _device = 'wasm', _chain = Promise.resolve()
29
  async function lib() { if (!_lib) _lib = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3'); return _lib }
 
38
  _loadingId = m.id
39
  _loadPromise = (async () => {
40
  const { pipeline } = await lib()
41
+ _device = await pickDevice()
42
  const mk = (device) => pipeline('text-generation', m.repo, {
43
  device, dtype: 'q4',
44
  progress_callback: (p) => { if (onProgress && p.status === 'progress' && p.total) onProgress(p.loaded / p.total) },
45
  })
 
 
 
46
  let pipe
47
+ try { pipe = await mk(_device) }
48
+ catch (e) { if (_device !== 'wasm') { _device = 'wasm'; pipe = await mk('wasm') } else throw e }
49
  _pipe = pipe; _loadedId = m.id; return pipe
50
  })().catch((e) => { _loadPromise = null; _loadingId = null; throw e })
51
  return _loadPromise