Spaces:
Running
Running
Transformers.js: pre-flight WebGPU device, else WASM (fixes load + caching)
Browse filesVerified headlessly: Transformers.js DOES cache (writes a transformers-cache store)
— the real problem was it never loaded. navigator.gpu can exist and requestAdapter()
succeed, yet the WebGPU backend still throws "no available backend", and a failed
WebGPU attempt poisons the in-context WASM retry. Now we pick the device up front via
requestDevice() and only use WebGPU when a real device is obtained; otherwise WASM
from the start (which loads and caches reliably).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
- web/engineTransformers.js +19 -6
web/engineTransformers.js
CHANGED
|
@@ -8,7 +8,22 @@ const MODELS = [
|
|
| 8 |
{ id: 'llama3.2-1b', label: 'Llama 3.2 1B', params: '1B', repo: 'onnx-community/Llama-3.2-1B-Instruct' },
|
| 9 |
]
|
| 10 |
const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
let _lib = null, _pipe = null, _loadedId = null, _loadingId = null, _loadPromise = null, _device = 'wasm', _chain = Promise.resolve()
|
| 14 |
async function lib() { if (!_lib) _lib = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3'); return _lib }
|
|
@@ -23,16 +38,14 @@ async function ensure(id, onProgress) {
|
|
| 23 |
_loadingId = m.id
|
| 24 |
_loadPromise = (async () => {
|
| 25 |
const { pipeline } = await lib()
|
|
|
|
| 26 |
const mk = (device) => pipeline('text-generation', m.repo, {
|
| 27 |
device, dtype: 'q4',
|
| 28 |
progress_callback: (p) => { if (onProgress && p.status === 'progress' && p.total) onProgress(p.loaded / p.total) },
|
| 29 |
})
|
| 30 |
-
// navigator.gpu can exist but fail to provide an adapter (headless, flaky drivers)
|
| 31 |
-
// — Transformers.js throws "no available backend" instead of falling back, so we
|
| 32 |
-
// catch and retry on WASM. Otherwise the model never loads (and never caches).
|
| 33 |
let pipe
|
| 34 |
-
try { pipe = await mk(
|
| 35 |
-
catch (e) {
|
| 36 |
_pipe = pipe; _loadedId = m.id; return pipe
|
| 37 |
})().catch((e) => { _loadPromise = null; _loadingId = null; throw e })
|
| 38 |
return _loadPromise
|
|
|
|
| 8 |
{ id: 'llama3.2-1b', label: 'Llama 3.2 1B', params: '1B', repo: 'onnx-community/Llama-3.2-1B-Instruct' },
|
| 9 |
]
|
| 10 |
const get = (id) => MODELS.find((m) => m.id === id) || MODELS[0]
|
| 11 |
+
|
| 12 |
+
// Only choose WebGPU if we can actually get a *device* (not just an adapter):
|
| 13 |
+
// navigator.gpu can exist and requestAdapter() can succeed, yet Transformers.js still
|
| 14 |
+
// throws "no available backend" (headless, flaky drivers). And once a WebGPU pipeline
|
| 15 |
+
// attempt fails, the in-context WASM retry is poisoned too — so we must decide up front
|
| 16 |
+
// and never attempt WebGPU unless it's real. WASM always works and caches fine.
|
| 17 |
+
async function pickDevice() {
|
| 18 |
+
try {
|
| 19 |
+
if (!navigator.gpu) return 'wasm'
|
| 20 |
+
const a = await navigator.gpu.requestAdapter()
|
| 21 |
+
if (!a) return 'wasm'
|
| 22 |
+
const d = await a.requestDevice()
|
| 23 |
+
if (d) { try { d.destroy() } catch { /* ignore */ } return 'webgpu' }
|
| 24 |
+
} catch { /* fall through */ }
|
| 25 |
+
return 'wasm'
|
| 26 |
+
}
|
| 27 |
|
| 28 |
let _lib = null, _pipe = null, _loadedId = null, _loadingId = null, _loadPromise = null, _device = 'wasm', _chain = Promise.resolve()
|
| 29 |
async function lib() { if (!_lib) _lib = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3'); return _lib }
|
|
|
|
| 38 |
_loadingId = m.id
|
| 39 |
_loadPromise = (async () => {
|
| 40 |
const { pipeline } = await lib()
|
| 41 |
+
_device = await pickDevice()
|
| 42 |
const mk = (device) => pipeline('text-generation', m.repo, {
|
| 43 |
device, dtype: 'q4',
|
| 44 |
progress_callback: (p) => { if (onProgress && p.status === 'progress' && p.total) onProgress(p.loaded / p.total) },
|
| 45 |
})
|
|
|
|
|
|
|
|
|
|
| 46 |
let pipe
|
| 47 |
+
try { pipe = await mk(_device) }
|
| 48 |
+
catch (e) { if (_device !== 'wasm') { _device = 'wasm'; pipe = await mk('wasm') } else throw e }
|
| 49 |
_pipe = pipe; _loadedId = m.id; return pipe
|
| 50 |
})().catch((e) => { _loadPromise = null; _loadingId = null; throw e })
|
| 51 |
return _loadPromise
|