| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import { dispatchBackendEvent, shortenReason } from 'lib/backend-events'; |
|
|
| |
| |
| |
| |
|
|
| export function isNativeMode() { |
| return !!globalThis.__nativeOrt; |
| } |
|
|
| |
| |
| |
| |
|
|
| let nativeAutoDisabled = false; |
|
|
| function isWorkerCrash(err) { |
| const m = err && err.message; |
| return !!m && /worker crashed|worker not available|native ort unavailable/i.test(m); |
| } |
|
|
| |
| |
|
|
| const WIRE_DTYPES = { |
| float32: Float32Array, float16: Uint16Array, |
| int32: Int32Array, int64: BigInt64Array, uint8: Uint8Array, |
| }; |
|
|
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export async function loadSession(modelBytes, intent, opts = {}) { |
| if (intent !== 'gpu' && intent !== 'cpu') { |
| throw new Error(`loadSession: unknown intent ${JSON.stringify(intent)} (expected 'gpu' or 'cpu')`); |
| } |
| if (isNativeMode() && !nativeAutoDisabled) { |
| try { |
| return await loadNative(modelBytes, intent); |
| } catch (e) { |
| if (isWorkerCrash(e)) { |
| nativeAutoDisabled = true; |
| console.warn(`[backend] native ORT auto-disabled for this page session: ${e.message} Future loads use ORT-Web. Reload to retry native.`); |
| |
| } else { |
| throw e; |
| } |
| } |
| } |
| return loadWeb(modelBytes, intent, opts); |
| } |
|
|
| |
|
|
| let nativeSeq = 0; |
|
|
| async function loadNative(modelBytes, intent) { |
| const transferable = toArrayBuffer(modelBytes); |
| const key = `m${++nativeSeq}_${transferable.byteLength}`; |
| |
| |
| |
| const meta = await globalThis.__nativeOrt.load(key, transferable, { intent }); |
| console.log(`[backend] native session ${key}: ${meta.inputNames.join(',')} -> ${meta.outputNames.join(',')} via ${meta.rung}`); |
| return { |
| session: makeNativeSession(key, meta), |
| realizedBackend: `native-${meta.rung}`, |
| }; |
| } |
|
|
| function makeNativeSession(key, meta) { |
| |
| |
| |
| const inputMetadata = meta.inputNames.map(name => ({ name, type: 'tensor(float)', dimensions: [] })); |
| const outputMetadata = meta.outputNames.map(name => ({ name, type: 'tensor(float)', dimensions: [] })); |
|
|
| return { |
| inputNames: meta.inputNames, |
| outputNames: meta.outputNames, |
| inputMetadata, |
| outputMetadata, |
|
|
| async run(feeds ) { |
| const wire = {}; |
| for (const [name, t] of Object.entries(feeds)) { |
| const data = t.data; |
| const ab = data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength); |
| wire[name] = { type: t.type, dims: t.dims, data: ab }; |
| } |
| let raw; |
| try { |
| raw = await globalThis.__nativeOrt.run(key, wire); |
| } catch (e) { |
| if (isWorkerCrash(e)) { |
| |
| |
| |
| nativeAutoDisabled = true; |
| console.warn(`[backend] native ORT auto-disabled mid-run: ${e.message}`); |
| } |
| throw e; |
| } |
| const out = {}; |
| for (const [name, t] of Object.entries(raw)) { |
| const Arr = WIRE_DTYPES[t.type]; |
| if (!Arr) throw new Error(`[backend] unsupported output tensor type: ${t.type}`); |
| const ortGlobal = globalThis.ort; |
| const tensor = new ortGlobal.Tensor(t.type, new Arr(t.data), t.dims); |
| if (typeof tensor.dispose !== 'function') tensor.dispose = () => {}; |
| out[name] = tensor; |
| } |
| return out; |
| }, |
|
|
| async release() { |
| try { await globalThis.__nativeOrt.release(key); } catch {} |
| }, |
| startProfiling() {}, |
| endProfiling() {}, |
| }; |
| } |
|
|
| |
|
|
| async function loadWeb(modelBytes, intent, { profile = false, preferredOutputLocation } = {}) { |
| const ort = globalThis.ort; |
| if (!ort) throw new Error('[backend] ort-web is not loaded — include vendor/onnxruntime-web/ort.all.min.js before using loadSession'); |
|
|
| ort.env.wasm.wasmPaths = |
| globalThis.__ORT_WASM_PATHS__ || |
| new URL('vendor/onnxruntime-web/', document.baseURI).toString(); |
| ort.env.wasm.numThreads = navigator.hardwareConcurrency || 4; |
|
|
| |
| |
| if (ort.env.webgpu) { |
| ort.env.webgpu.profilingMode = (profile && intent === 'gpu') ? 'default' : 'off'; |
| } |
|
|
| const sessionOpts = { |
| graphOptimizationLevel: 'all', |
| ...(profile && { enableProfiling: true }), |
| }; |
|
|
| if (intent === 'gpu') { |
| sessionOpts.executionProviders = [{ name: 'webgpu', preferredLayout: 'NCHW' }]; |
| if (preferredOutputLocation) sessionOpts.preferredOutputLocation = preferredOutputLocation; |
| dispatchBackendEvent({ kind: 'attempt', backend: 'web-webgpu' }); |
| try { |
| const session = await ort.InferenceSession.create(modelBytes, sessionOpts); |
| dispatchBackendEvent({ kind: 'success', backend: 'web-webgpu' }); |
| return { session, realizedBackend: 'web-webgpu' }; |
| } catch (e) { |
| console.warn(`[backend] WebGPU failed, falling back to WASM. Reason:`, e); |
| dispatchBackendEvent({ kind: 'fallback', backend: 'web-webgpu', reason: shortenReason(e) }); |
| |
| delete sessionOpts.preferredOutputLocation; |
| } |
| } |
|
|
| sessionOpts.executionProviders = ['wasm']; |
| dispatchBackendEvent({ kind: 'attempt', backend: 'web-wasm' }); |
| const session = await ort.InferenceSession.create(modelBytes, sessionOpts); |
| dispatchBackendEvent({ kind: 'success', backend: 'web-wasm' }); |
| return { session, realizedBackend: 'web-wasm' }; |
| } |
|
|
| |
|
|
| function toArrayBuffer(modelBytes) { |
| if (modelBytes instanceof ArrayBuffer) return modelBytes; |
| if (modelBytes instanceof Uint8Array) { |
| return modelBytes.buffer.slice(modelBytes.byteOffset, modelBytes.byteOffset + modelBytes.byteLength); |
| } |
| throw new Error(`[backend] modelBytes must be ArrayBuffer or Uint8Array, got ${typeof modelBytes}`); |
| } |
|
|