diff --git "a/dist/ort.webgpu.bundle.min.mjs" "b/dist/ort.webgpu.bundle.min.mjs" new file mode 100755--- /dev/null +++ "b/dist/ort.webgpu.bundle.min.mjs" @@ -0,0 +1,2844 @@ +/*! + * ONNX Runtime Web v1.23.0 + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. + */ +var fi=Object.defineProperty;var _v=Object.getOwnPropertyDescriptor;var wv=Object.getOwnPropertyNames;var vv=Object.prototype.hasOwnProperty;var mi=(e=>typeof require<"u"?require:typeof Proxy<"u"?new Proxy(e,{get:(t,o)=>(typeof require<"u"?require:t)[o]}):e)(function(e){if(typeof require<"u")return require.apply(this,arguments);throw Error('Dynamic require of "'+e+'" is not supported')});var X=(e,t)=>()=>(e&&(t=e(e=0)),t);var Xt=(e,t)=>{for(var o in t)fi(e,o,{get:t[o],enumerable:!0})},xv=(e,t,o,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let u of wv(t))!vv.call(e,u)&&u!==o&&fi(e,u,{get:()=>t[u],enumerable:!(n=_v(t,u))||n.enumerable});return e};var pr=e=>xv(fi({},"__esModule",{value:!0}),e);var Dr,Vt,Wt,$v,Bd,hi=X(()=>{Dr=new Map,Vt=[],Wt=(e,t,o)=>{if(t&&typeof t.init=="function"&&typeof t.createInferenceSessionHandler=="function"){let n=Dr.get(e);if(n===void 0)Dr.set(e,{backend:t,priority:o});else{if(n.priority>o)return;if(n.priority===o&&n.backend!==t)throw new Error(`cannot register backend "${e}" using priority ${o}`)}if(o>=0){let u=Vt.indexOf(e);u!==-1&&Vt.splice(u,1);for(let c=0;c{let t=Dr.get(e);if(!t)return"backend not found.";if(t.initialized)return t.backend;if(t.aborted)return t.error;{let o=!!t.initPromise;try{return o||(t.initPromise=t.backend.init(e)),await t.initPromise,t.initialized=!0,t.backend}catch(n){return o||(t.error=`${n}`,t.aborted=!0),t.error}finally{delete t.initPromise}}},Bd=async e=>{let t=e.executionProviders||[],o=t.map(g=>typeof g=="string"?g:g.name),n=o.length===0?Vt:o,u,c=[],p=new Set;for(let g of n){let b=await $v(g);typeof b=="string"?c.push({name:g,err:b}):(u||(u=b),u===b&&p.add(g))}if(!u)throw new Error(`no available backend found. ERR: ${c.map(g=>`[${g.name}] ${g.err}`).join(", ")}`);for(let{name:g,err:b}of c)o.includes(g)&&console.warn(`removing requested execution provider "${g}" from session options because it is not available: ${b}`);let m=t.filter(g=>p.has(typeof g=="string"?g:g.name));return[u,new Proxy(e,{get:(g,b)=>b==="executionProviders"?m:Reflect.get(g,b)})]}});var Dd=X(()=>{hi()});var jd,Md=X(()=>{jd="1.23.0"});var Rd,Ye,gi=X(()=>{Md();Rd="warning",Ye={wasm:{},webgl:{},webgpu:{},versions:{common:jd},set logLevel(e){if(e!==void 0){if(typeof e!="string"||["verbose","info","warning","error","fatal"].indexOf(e)===-1)throw new Error(`Unsupported logging level: ${e}`);Rd=e}},get logLevel(){return Rd}};Object.defineProperty(Ye,"logLevel",{enumerable:!0})});var ze,Ud=X(()=>{gi();ze=Ye});var Nd,Vd,Wd=X(()=>{Nd=(e,t)=>{let o=typeof document<"u"?document.createElement("canvas"):new OffscreenCanvas(1,1);o.width=e.dims[3],o.height=e.dims[2];let n=o.getContext("2d");if(n!=null){let u,c;t?.tensorLayout!==void 0&&t.tensorLayout==="NHWC"?(u=e.dims[2],c=e.dims[3]):(u=e.dims[3],c=e.dims[2]);let p=t?.format!==void 0?t.format:"RGB",m=t?.norm,g,b;m===void 0||m.mean===void 0?g=[255,255,255,255]:typeof m.mean=="number"?g=[m.mean,m.mean,m.mean,m.mean]:(g=[m.mean[0],m.mean[1],m.mean[2],0],m.mean[3]!==void 0&&(g[3]=m.mean[3])),m===void 0||m.bias===void 0?b=[0,0,0,0]:typeof m.bias=="number"?b=[m.bias,m.bias,m.bias,m.bias]:(b=[m.bias[0],m.bias[1],m.bias[2],0],m.bias[3]!==void 0&&(b[3]=m.bias[3]));let _=c*u,w=0,x=_,S=_*2,C=-1;p==="RGBA"?(w=0,x=_,S=_*2,C=_*3):p==="RGB"?(w=0,x=_,S=_*2):p==="RBG"&&(w=0,S=_,x=_*2);for(let T=0;T{let o=typeof document<"u"?document.createElement("canvas").getContext("2d"):new OffscreenCanvas(1,1).getContext("2d"),n;if(o!=null){let u,c,p;t?.tensorLayout!==void 0&&t.tensorLayout==="NHWC"?(u=e.dims[2],c=e.dims[1],p=e.dims[3]):(u=e.dims[3],c=e.dims[2],p=e.dims[1]);let m=t!==void 0&&t.format!==void 0?t.format:"RGB",g=t?.norm,b,_;g===void 0||g.mean===void 0?b=[255,255,255,255]:typeof g.mean=="number"?b=[g.mean,g.mean,g.mean,g.mean]:(b=[g.mean[0],g.mean[1],g.mean[2],255],g.mean[3]!==void 0&&(b[3]=g.mean[3])),g===void 0||g.bias===void 0?_=[0,0,0,0]:typeof g.bias=="number"?_=[g.bias,g.bias,g.bias,g.bias]:(_=[g.bias[0],g.bias[1],g.bias[2],0],g.bias[3]!==void 0&&(_[3]=g.bias[3]));let w=c*u;if(t!==void 0&&(t.format!==void 0&&p===4&&t.format!=="RGBA"||p===3&&t.format!=="RGB"&&t.format!=="BGR"))throw new Error("Tensor format doesn't match input tensor dims");let x=4,S=0,C=1,T=2,z=3,k=0,A=w,O=w*2,B=-1;m==="RGBA"?(k=0,A=w,O=w*2,B=w*3):m==="RGB"?(k=0,A=w,O=w*2):m==="RBG"&&(k=0,O=w,A=w*2),n=o.createImageData(u,c);for(let W=0;W{jr();yi=(e,t)=>{if(e===void 0)throw new Error("Image buffer must be defined");if(t.height===void 0||t.width===void 0)throw new Error("Image height and width must be defined");if(t.tensorLayout==="NHWC")throw new Error("NHWC Tensor layout is not supported yet");let{height:o,width:n}=t,u=t.norm??{mean:255,bias:0},c,p;typeof u.mean=="number"?c=[u.mean,u.mean,u.mean,u.mean]:c=[u.mean[0],u.mean[1],u.mean[2],u.mean[3]??255],typeof u.bias=="number"?p=[u.bias,u.bias,u.bias,u.bias]:p=[u.bias[0],u.bias[1],u.bias[2],u.bias[3]??0];let m=t.format!==void 0?t.format:"RGBA",g=t.tensorFormat!==void 0&&t.tensorFormat!==void 0?t.tensorFormat:"RGB",b=o*n,_=g==="RGBA"?new Float32Array(b*4):new Float32Array(b*3),w=4,x=0,S=1,C=2,T=3,z=0,k=b,A=b*2,O=-1;m==="RGB"&&(w=3,x=0,S=1,C=2,T=-1),g==="RGBA"?O=b*3:g==="RBG"?(z=0,A=b,k=b*2):g==="BGR"&&(A=0,k=b,z=b*2);for(let W=0;W{let o=typeof HTMLImageElement<"u"&&e instanceof HTMLImageElement,n=typeof ImageData<"u"&&e instanceof ImageData,u=typeof ImageBitmap<"u"&&e instanceof ImageBitmap,c=typeof e=="string",p,m=t??{},g=()=>{if(typeof document<"u")return document.createElement("canvas");if(typeof OffscreenCanvas<"u")return new OffscreenCanvas(1,1);throw new Error("Canvas is not supported")},b=_=>typeof HTMLCanvasElement<"u"&&_ instanceof HTMLCanvasElement||_ instanceof OffscreenCanvas?_.getContext("2d"):null;if(o){let _=g();_.width=e.width,_.height=e.height;let w=b(_);if(w!=null){let x=e.height,S=e.width;if(t!==void 0&&t.resizedHeight!==void 0&&t.resizedWidth!==void 0&&(x=t.resizedHeight,S=t.resizedWidth),t!==void 0){if(m=t,t.tensorFormat!==void 0)throw new Error("Image input config format must be RGBA for HTMLImageElement");m.tensorFormat="RGBA",m.height=x,m.width=S}else m.tensorFormat="RGBA",m.height=x,m.width=S;w.drawImage(e,0,0),p=w.getImageData(0,0,S,x).data}else throw new Error("Can not access image data")}else if(n){let _,w;if(t!==void 0&&t.resizedWidth!==void 0&&t.resizedHeight!==void 0?(_=t.resizedHeight,w=t.resizedWidth):(_=e.height,w=e.width),t!==void 0&&(m=t),m.format="RGBA",m.height=_,m.width=w,t!==void 0){let x=g();x.width=w,x.height=_;let S=b(x);if(S!=null)S.putImageData(e,0,0),p=S.getImageData(0,0,w,_).data;else throw new Error("Can not access image data")}else p=e.data}else if(u){if(t===void 0)throw new Error("Please provide image config with format for Imagebitmap");let _=g();_.width=e.width,_.height=e.height;let w=b(_);if(w!=null){let x=e.height,S=e.width;return w.drawImage(e,0,0,S,x),p=w.getImageData(0,0,S,x).data,m.height=x,m.width=S,yi(p,m)}else throw new Error("Can not access image data")}else{if(c)return new Promise((_,w)=>{let x=g(),S=b(x);if(!e||!S)return w();let C=new Image;C.crossOrigin="Anonymous",C.src=e,C.onload=()=>{x.width=C.width,x.height=C.height,S.drawImage(C,0,0,x.width,x.height);let T=S.getImageData(0,0,x.width,x.height);m.height=x.height,m.width=x.width,_(yi(T.data,m))}});throw new Error("Input data provided is not supported - aborted tensor creation")}if(p!==void 0)return yi(p,m);throw new Error("Input data provided is not supported - aborted tensor creation")},Gd=(e,t)=>{let{width:o,height:n,download:u,dispose:c}=t,p=[1,n,o,4];return new Ke({location:"texture",type:"float32",texture:e,dims:p,download:u,dispose:c})},Hd=(e,t)=>{let{dataType:o,dims:n,download:u,dispose:c}=t;return new Ke({location:"gpu-buffer",type:o??"float32",gpuBuffer:e,dims:n,download:u,dispose:c})},Fd=(e,t)=>{let{dataType:o,dims:n,download:u,dispose:c}=t;return new Ke({location:"ml-tensor",type:o??"float32",mlTensor:e,dims:n,download:u,dispose:c})},qd=(e,t,o)=>new Ke({location:"cpu-pinned",type:e,data:t,dims:o??[t.length]})});var Lt,fr,Jd,Zd,Qd=X(()=>{Lt=new Map([["float32",Float32Array],["uint8",Uint8Array],["int8",Int8Array],["uint16",Uint16Array],["int16",Int16Array],["int32",Int32Array],["bool",Uint8Array],["float64",Float64Array],["uint32",Uint32Array],["int4",Uint8Array],["uint4",Uint8Array]]),fr=new Map([[Float32Array,"float32"],[Uint8Array,"uint8"],[Int8Array,"int8"],[Uint16Array,"uint16"],[Int16Array,"int16"],[Int32Array,"int32"],[Float64Array,"float64"],[Uint32Array,"uint32"]]),Jd=!1,Zd=()=>{if(!Jd){Jd=!0;let e=typeof BigInt64Array<"u"&&BigInt64Array.from,t=typeof BigUint64Array<"u"&&BigUint64Array.from,o=globalThis.Float16Array,n=typeof o<"u"&&o.from;e&&(Lt.set("int64",BigInt64Array),fr.set(BigInt64Array,"int64")),t&&(Lt.set("uint64",BigUint64Array),fr.set(BigUint64Array,"uint64")),n?(Lt.set("float16",o),fr.set(o,"float16")):Lt.set("float16",Uint16Array)}}});var Yd,Xd,ec=X(()=>{jr();Yd=e=>{let t=1;for(let o=0;o{switch(e.location){case"cpu":return new Ke(e.type,e.data,t);case"cpu-pinned":return new Ke({location:"cpu-pinned",data:e.data,type:e.type,dims:t});case"texture":return new Ke({location:"texture",texture:e.texture,type:e.type,dims:t});case"gpu-buffer":return new Ke({location:"gpu-buffer",gpuBuffer:e.gpuBuffer,type:e.type,dims:t});case"ml-tensor":return new Ke({location:"ml-tensor",mlTensor:e.mlTensor,type:e.type,dims:t});default:throw new Error(`tensorReshape: tensor location ${e.location} is not supported`)}}});var Ke,jr=X(()=>{Wd();Kd();Qd();ec();Ke=class{constructor(t,o,n){Zd();let u,c;if(typeof t=="object"&&"location"in t)switch(this.dataLocation=t.location,u=t.type,c=t.dims,t.location){case"cpu-pinned":{let m=Lt.get(u);if(!m)throw new TypeError(`unsupported type "${u}" to create tensor from pinned buffer`);if(!(t.data instanceof m))throw new TypeError(`buffer should be of type ${m.name}`);this.cpuData=t.data;break}case"texture":{if(u!=="float32")throw new TypeError(`unsupported type "${u}" to create tensor from texture`);this.gpuTextureData=t.texture,this.downloader=t.download,this.disposer=t.dispose;break}case"gpu-buffer":{if(u!=="float32"&&u!=="float16"&&u!=="int32"&&u!=="int64"&&u!=="uint32"&&u!=="uint8"&&u!=="bool"&&u!=="uint4"&&u!=="int4")throw new TypeError(`unsupported type "${u}" to create tensor from gpu buffer`);this.gpuBufferData=t.gpuBuffer,this.downloader=t.download,this.disposer=t.dispose;break}case"ml-tensor":{if(u!=="float32"&&u!=="float16"&&u!=="int32"&&u!=="int64"&&u!=="uint32"&&u!=="uint64"&&u!=="int8"&&u!=="uint8"&&u!=="bool"&&u!=="uint4"&&u!=="int4")throw new TypeError(`unsupported type "${u}" to create tensor from MLTensor`);this.mlTensorData=t.mlTensor,this.downloader=t.download,this.disposer=t.dispose;break}default:throw new Error(`Tensor constructor: unsupported location '${this.dataLocation}'`)}else{let m,g;if(typeof t=="string")if(u=t,g=n,t==="string"){if(!Array.isArray(o))throw new TypeError("A string tensor's data must be a string array.");m=o}else{let b=Lt.get(t);if(b===void 0)throw new TypeError(`Unsupported tensor type: ${t}.`);if(Array.isArray(o)){if(t==="float16"&&b===Uint16Array||t==="uint4"||t==="int4")throw new TypeError(`Creating a ${t} tensor from number array is not supported. Please use ${b.name} as data.`);t==="uint64"||t==="int64"?m=b.from(o,BigInt):m=b.from(o)}else if(o instanceof b)m=o;else if(o instanceof Uint8ClampedArray)if(t==="uint8")m=Uint8Array.from(o);else throw new TypeError("A Uint8ClampedArray tensor's data must be type of uint8");else if(t==="float16"&&o instanceof Uint16Array&&b!==Uint16Array)m=new globalThis.Float16Array(o.buffer,o.byteOffset,o.length);else throw new TypeError(`A ${u} tensor's data must be type of ${b}`)}else if(g=o,Array.isArray(t)){if(t.length===0)throw new TypeError("Tensor type cannot be inferred from an empty array.");let b=typeof t[0];if(b==="string")u="string",m=t;else if(b==="boolean")u="bool",m=Uint8Array.from(t);else throw new TypeError(`Invalid element type of data array: ${b}.`)}else if(t instanceof Uint8ClampedArray)u="uint8",m=Uint8Array.from(t);else{let b=fr.get(t.constructor);if(b===void 0)throw new TypeError(`Unsupported type for tensor data: ${t.constructor}.`);u=b,m=t}if(g===void 0)g=[m.length];else if(!Array.isArray(g))throw new TypeError("A tensor's dims must be a number array");c=g,this.cpuData=m,this.dataLocation="cpu"}let p=Yd(c);if(this.cpuData&&p!==this.cpuData.length&&!((u==="uint4"||u==="int4")&&Math.ceil(p/2)===this.cpuData.length))throw new Error(`Tensor's size(${p}) does not match data length(${this.cpuData.length}).`);this.type=u,this.dims=c,this.size=p}static async fromImage(t,o){return Ld(t,o)}static fromTexture(t,o){return Gd(t,o)}static fromGpuBuffer(t,o){return Hd(t,o)}static fromMLTensor(t,o){return Fd(t,o)}static fromPinnedBuffer(t,o,n){return qd(t,o,n)}toDataURL(t){return Nd(this,t)}toImageData(t){return Vd(this,t)}get data(){if(this.ensureValid(),!this.cpuData)throw new Error("The data is not on CPU. Use `getData()` to download GPU data to CPU, or use `texture` or `gpuBuffer` property to access the GPU data directly.");return this.cpuData}get location(){return this.dataLocation}get texture(){if(this.ensureValid(),!this.gpuTextureData)throw new Error("The data is not stored as a WebGL texture.");return this.gpuTextureData}get gpuBuffer(){if(this.ensureValid(),!this.gpuBufferData)throw new Error("The data is not stored as a WebGPU buffer.");return this.gpuBufferData}get mlTensor(){if(this.ensureValid(),!this.mlTensorData)throw new Error("The data is not stored as a WebNN MLTensor.");return this.mlTensorData}async getData(t){switch(this.ensureValid(),this.dataLocation){case"cpu":case"cpu-pinned":return this.data;case"texture":case"gpu-buffer":case"ml-tensor":{if(!this.downloader)throw new Error("The current tensor is not created with a specified data downloader.");if(this.isDownloading)throw new Error("The current tensor is being downloaded.");try{this.isDownloading=!0;let o=await this.downloader();return this.downloader=void 0,this.dataLocation="cpu",this.cpuData=o,t&&this.disposer&&(this.disposer(),this.disposer=void 0),o}finally{this.isDownloading=!1}}default:throw new Error(`cannot get data from location: ${this.dataLocation}`)}}dispose(){if(this.isDownloading)throw new Error("The current tensor is being downloaded.");this.disposer&&(this.disposer(),this.disposer=void 0),this.cpuData=void 0,this.gpuTextureData=void 0,this.gpuBufferData=void 0,this.mlTensorData=void 0,this.downloader=void 0,this.isDownloading=void 0,this.dataLocation="none"}ensureValid(){if(this.dataLocation==="none")throw new Error("The tensor is disposed.")}reshape(t){if(this.ensureValid(),this.downloader||this.disposer)throw new Error("Cannot reshape a tensor that owns GPU resource.");return Xd(this,t)}}});var st,bi=X(()=>{jr();st=Ke});var Mr,tc,Xe,Ze,_i=X(()=>{gi();Mr=(e,t)=>{(typeof Ye.trace>"u"?!Ye.wasm.trace:!Ye.trace)||console.timeStamp(`${e}::ORT::${t}`)},tc=(e,t)=>{let o=new Error().stack?.split(/\r\n|\r|\n/g)||[],n=!1;for(let u=0;u{(typeof Ye.trace>"u"?!Ye.wasm.trace:!Ye.trace)||tc("BEGIN",e)},Ze=e=>{(typeof Ye.trace>"u"?!Ye.wasm.trace:!Ye.trace)||tc("END",e)}});var Rr,rc=X(()=>{hi();bi();_i();Rr=class e{constructor(t){this.handler=t}async run(t,o,n){Xe();let u={},c={};if(typeof t!="object"||t===null||t instanceof st||Array.isArray(t))throw new TypeError("'feeds' must be an object that use input names as keys and OnnxValue as corresponding values.");let p=!0;if(typeof o=="object"){if(o===null)throw new TypeError("Unexpected argument[1]: cannot be null.");if(o instanceof st)throw new TypeError("'fetches' cannot be a Tensor");if(Array.isArray(o)){if(o.length===0)throw new TypeError("'fetches' cannot be an empty array.");p=!1;for(let b of o){if(typeof b!="string")throw new TypeError("'fetches' must be a string array or an object.");if(this.outputNames.indexOf(b)===-1)throw new RangeError(`'fetches' contains invalid output name: ${b}.`);u[b]=null}if(typeof n=="object"&&n!==null)c=n;else if(typeof n<"u")throw new TypeError("'options' must be an object.")}else{let b=!1,_=Object.getOwnPropertyNames(o);for(let w of this.outputNames)if(_.indexOf(w)!==-1){let x=o[w];(x===null||x instanceof st)&&(b=!0,p=!1,u[w]=x)}if(b){if(typeof n=="object"&&n!==null)c=n;else if(typeof n<"u")throw new TypeError("'options' must be an object.")}else c=o}}else if(typeof o<"u")throw new TypeError("Unexpected argument[1]: must be 'fetches' or 'options'.");for(let b of this.inputNames)if(typeof t[b]>"u")throw new Error(`input '${b}' is missing in 'feeds'.`);if(p)for(let b of this.outputNames)u[b]=null;let m=await this.handler.run(t,u,c),g={};for(let b in m)if(Object.hasOwnProperty.call(m,b)){let _=m[b];_ instanceof st?g[b]=_:g[b]=new st(_.type,_.data,_.dims)}return Ze(),g}async release(){return this.handler.dispose()}static async create(t,o,n,u){Xe();let c,p={};if(typeof t=="string"){if(c=t,typeof o=="object"&&o!==null)p=o;else if(typeof o<"u")throw new TypeError("'options' must be an object.")}else if(t instanceof Uint8Array){if(c=t,typeof o=="object"&&o!==null)p=o;else if(typeof o<"u")throw new TypeError("'options' must be an object.")}else if(t instanceof ArrayBuffer||typeof SharedArrayBuffer<"u"&&t instanceof SharedArrayBuffer){let _=t,w=0,x=t.byteLength;if(typeof o=="object"&&o!==null)p=o;else if(typeof o=="number"){if(w=o,!Number.isSafeInteger(w))throw new RangeError("'byteOffset' must be an integer.");if(w<0||w>=_.byteLength)throw new RangeError(`'byteOffset' is out of range [0, ${_.byteLength}).`);if(x=t.byteLength-w,typeof n=="number"){if(x=n,!Number.isSafeInteger(x))throw new RangeError("'byteLength' must be an integer.");if(x<=0||w+x>_.byteLength)throw new RangeError(`'byteLength' is out of range (0, ${_.byteLength-w}].`);if(typeof u=="object"&&u!==null)p=u;else if(typeof u<"u")throw new TypeError("'options' must be an object.")}else if(typeof n<"u")throw new TypeError("'byteLength' must be a number.")}else if(typeof o<"u")throw new TypeError("'options' must be an object.");c=new Uint8Array(_,w,x)}else throw new TypeError("Unexpected argument[0]: must be 'path' or 'buffer'.");let[m,g]=await Bd(p),b=await m.createInferenceSessionHandler(c,g);return Ze(),new e(b)}startProfiling(){this.handler.startProfiling()}endProfiling(){this.handler.endProfiling()}get inputNames(){return this.handler.inputNames}get outputNames(){return this.handler.outputNames}get inputMetadata(){return this.handler.inputMetadata}get outputMetadata(){return this.handler.outputMetadata}}});var Cv,nc=X(()=>{rc();Cv=Rr});var ic=X(()=>{});var oc=X(()=>{});var ac=X(()=>{});var sc=X(()=>{});var wi={};Xt(wi,{InferenceSession:()=>Cv,TRACE:()=>Mr,TRACE_FUNC_BEGIN:()=>Xe,TRACE_FUNC_END:()=>Ze,Tensor:()=>st,env:()=>ze,registerBackend:()=>Wt});var nt=X(()=>{Dd();Ud();nc();bi();ic();oc();_i();ac();sc()});var Ur=X(()=>{"use strict"});var cc={};Xt(cc,{default:()=>Sv});var lc,dc,Sv,pc=X(()=>{"use strict";vi();Ot();Nr();lc="ort-wasm-proxy-worker",dc=globalThis.self?.name===lc;dc&&(self.onmessage=e=>{let{type:t,in:o}=e.data;try{switch(t){case"init-wasm":Vr(o.wasm).then(()=>{Wr(o).then(()=>{postMessage({type:t})},n=>{postMessage({type:t,err:n})})},n=>{postMessage({type:t,err:n})});break;case"init-ep":{let{epName:n,env:u}=o;Lr(u,n).then(()=>{postMessage({type:t})},c=>{postMessage({type:t,err:c})});break}case"copy-from":{let{buffer:n}=o,u=mr(n);postMessage({type:t,out:u});break}case"create":{let{model:n,options:u}=o;Gr(n,u).then(c=>{postMessage({type:t,out:c})},c=>{postMessage({type:t,err:c})});break}case"release":Hr(o),postMessage({type:t});break;case"run":{let{sessionId:n,inputIndices:u,inputs:c,outputIndices:p,options:m}=o;Fr(n,u,c,p,new Array(p.length).fill(null),m).then(g=>{g.some(b=>b[3]!=="cpu")?postMessage({type:t,err:"Proxy does not support non-cpu tensor location."}):postMessage({type:t,out:g},Kr([...c,...g]))},g=>{postMessage({type:t,err:g})});break}case"end-profiling":qr(o),postMessage({type:t});break;default:}}catch(n){postMessage({type:t,err:n})}});Sv=dc?null:e=>new Worker(e??et,{type:"module",name:lc})});var mc={};Xt(mc,{default:()=>Tv});var xi,fc,Tv,Iv,hc=X(()=>{"use strict";fc=(xi=import.meta.url,async function(e={}){var t,o,n=e,u=new Promise((r,i)=>{t=r,o=i}),c=typeof window=="object",p=typeof WorkerGlobalScope<"u",m=p&&self.name?.startsWith("em-pthread");n.mountExternalData=(r,i)=>{r.startsWith("./")&&(r=r.substring(2)),(n.MountedFiles||(n.MountedFiles=new Map)).set(r,i)},n.unmountExternalData=()=>{delete n.MountedFiles};var g=globalThis.SharedArrayBuffer??new WebAssembly.Memory({initial:0,maximum:0,shared:!0}).buffer.constructor;let b=r=>async(...i)=>{try{if(n.jsepSessionState)throw new Error("Session already started");let a=n.jsepSessionState={sessionHandle:i[0],errors:[]},s=await r(...i);if(n.jsepSessionState!==a)throw new Error("Session mismatch");n.jsepBackend?.flush();let l=a.errors;if(l.length>0){let d=await Promise.all(l);if(d=d.filter(f=>f),d.length>0)throw new Error(d.join(` +`))}return s}finally{n.jsepSessionState=null}};n.jsepInit=(r,i)=>{if(r==="webgpu"){[n.jsepBackend,n.jsepAlloc,n.jsepFree,n.jsepCopy,n.jsepCopyAsync,n.jsepCreateKernel,n.jsepReleaseKernel,n.jsepRunKernel,n.jsepCaptureBegin,n.jsepCaptureEnd,n.jsepReplay]=i;let a=n.jsepBackend;n.jsepRegisterBuffer=(s,l,d,f)=>a.registerBuffer(s,l,d,f),n.jsepGetBuffer=s=>a.getBuffer(s),n.jsepCreateDownloader=(s,l,d)=>a.createDownloader(s,l,d),n.jsepOnCreateSession=s=>{a.onCreateSession(s)},n.jsepOnReleaseSession=s=>{a.onReleaseSession(s)},n.jsepOnRunStart=s=>a.onRunStart(s),n.jsepUploadExternalBuffer=(s,l)=>{a.upload(s,l)}}else if(r==="webnn"){let a=i[0];[n.webnnReserveTensorId,n.webnnReleaseTensorId,n.webnnEnsureTensor,n.webnnUploadTensor,n.webnnDownloadTensor]=i.slice(1),n.webnnReleaseTensorId=n.webnnReleaseTensorId,n.webnnUploadTensor=n.webnnUploadTensor,n.webnnOnRunStart=s=>a.onRunStart(s),n.webnnOnRunEnd=a.onRunEnd.bind(a),n.webnnRegisterMLContext=(s,l)=>{a.registerMLContext(s,l)},n.webnnOnReleaseSession=s=>{a.onReleaseSession(s)},n.webnnCreateMLTensorDownloader=(s,l)=>a.createMLTensorDownloader(s,l),n.webnnRegisterMLTensor=(s,l,d,f)=>a.registerMLTensor(s,l,d,f),n.webnnCreateMLContext=s=>a.createMLContext(s),n.webnnRegisterMLConstant=(s,l,d,f,h,y)=>a.registerMLConstant(s,l,d,f,h,n.MountedFiles,y),n.webnnRegisterGraphInput=a.registerGraphInput.bind(a),n.webnnIsGraphInput=a.isGraphInput.bind(a),n.webnnRegisterGraphOutput=a.registerGraphOutput.bind(a),n.webnnIsGraphOutput=a.isGraphOutput.bind(a),n.webnnCreateTemporaryTensor=a.createTemporaryTensor.bind(a),n.webnnIsGraphInputOutputTypeSupported=a.isGraphInputOutputTypeSupported.bind(a)}};let _=()=>{let r=(i,a,s)=>(...l)=>{let d=de.currData,f=a?.(),h=i(...l),y=a?.();return f!==y&&(i=y,s(f),s=null,a=null),de.currData!=d?de.whenDone():h};(()=>{for(let i of["_OrtAppendExecutionProvider","_OrtCreateSession","_OrtRun","_OrtRunWithBinding","_OrtBindInput"])n[i]=r(n[i],()=>n[i],a=>n[i]=a)})(),b!==void 0&&(n._OrtRun=b(n._OrtRun),n._OrtRunWithBinding=b(n._OrtRunWithBinding)),_=void 0};n.asyncInit=()=>{_?.()};var w,x,S=Object.assign({},n),C=(r,i)=>{throw i},T="";(c||p)&&(p?T=self.location.href:typeof document<"u"&&document.currentScript&&(T=document.currentScript.src),xi&&(T=xi),T=T.startsWith("blob:")?"":T.slice(0,T.replace(/[?#].*/,"").lastIndexOf("/")+1),p&&(x=r=>{var i=new XMLHttpRequest;return i.open("GET",r,!1),i.responseType="arraybuffer",i.send(null),new Uint8Array(i.response)}),w=async r=>{if(le(r))return new Promise((a,s)=>{var l=new XMLHttpRequest;l.open("GET",r,!0),l.responseType="arraybuffer",l.onload=()=>{l.status==200||l.status==0&&l.response?a(l.response):s(l.status)},l.onerror=s,l.send(null)});var i=await fetch(r,{credentials:"same-origin"});if(i.ok)return i.arrayBuffer();throw new Error(i.status+" : "+i.url)});var z=console.log.bind(console),k=console.error.bind(console),A=z,O=k;Object.assign(n,S),S=null;var B,W,N,q,K,Q,ne,se,ue,ge,re,Se,fe,ie=n.wasmBinary,ve=!1,le=r=>r.startsWith("file://");function me(){return B.buffer!=q.buffer&&Ie(),q}function ke(){return B.buffer!=q.buffer&&Ie(),K}function je(){return B.buffer!=q.buffer&&Ie(),Q}function he(){return B.buffer!=q.buffer&&Ie(),ne}function R(){return B.buffer!=q.buffer&&Ie(),se}function V(){return B.buffer!=q.buffer&&Ie(),ue}function Ce(){return B.buffer!=q.buffer&&Ie(),ge}function Ne(){return B.buffer!=q.buffer&&Ie(),fe}if(m){let r=function(...s){var l=s.join(" ");console.error(l)},i=function(...s){var l=s.join(" ");postMessage({cmd:"alert",text:l,threadId:Br()})},a=function(s){try{var l=s.data,d=l.cmd;if(d==="load"){let f=[];self.onmessage=h=>f.push(h),self.startWorker=h=>{postMessage({cmd:"loaded"});for(let y of f)a(y);self.onmessage=a};for(let h of l.handlers)n[h]&&!n[h].proxy||(n[h]=(...y)=>{postMessage({cmd:"callHandler",handler:h,args:y})},h=="print"&&(A=n[h]),h=="printErr"&&(O=n[h]));B=l.wasmMemory,Ie(),Je(l.wasmModule)}else if(d==="run"){lg(l.pthread_ptr),si(l.pthread_ptr,0,0,1,0,0),Te.threadInitTLS(),Gn(l.pthread_ptr),Ve||(ca(),Ve=!0);try{dg(l.start_routine,l.arg)}catch(f){if(f!="unwind")throw f}}else l.target==="setimmediate"||(d==="checkMailbox"?Ve&&Ir():d&&(O(`worker: received unknown command ${d}`),O(l)))}catch(f){throw Aa(),f}};var e1=r,t1=i,r1=a,Je,Ve=!1;O=r,self.alert=i,self.onunhandledrejection=s=>{throw s.reason||s},self.onmessage=a}function Ie(){var r=B.buffer;n.HEAP8=q=new Int8Array(r),n.HEAP16=Q=new Int16Array(r),n.HEAPU8=K=new Uint8Array(r),n.HEAPU16=ne=new Uint16Array(r),n.HEAP32=se=new Int32Array(r),n.HEAPU32=ue=new Uint32Array(r),n.HEAPF32=ge=new Float32Array(r),n.HEAPF64=fe=new Float64Array(r),n.HEAP64=re=new BigInt64Array(r),n.HEAPU64=Se=new BigUint64Array(r)}function ot(){if(m)return startWorker(n);E.qf()}m||(B=new WebAssembly.Memory({initial:256,maximum:65536,shared:!0}),Ie());var At,Mt=0,Zt=null;function po(r){Mt++}function fo(r){if(--Mt==0&&Zt){var i=Zt;Zt=null,i()}}function pt(r){O(r="Aborted("+r+")"),ve=!0,r+=". Build with -sASSERTIONS for more info.";var i=new WebAssembly.RuntimeError(r);throw o(i),i}async function ag(r,i,a){if(!r&&typeof WebAssembly.instantiateStreaming=="function"&&!le(i))try{var s=fetch(i,{credentials:"same-origin"});return await WebAssembly.instantiateStreaming(s,a)}catch(l){O(`wasm streaming compile failed: ${l}`),O("falling back to ArrayBuffer instantiation")}return async function(l,d){try{var f=await async function(h){if(!ie)try{var y=await w(h);return new Uint8Array(y)}catch{}return function(v){if(v==At&&ie)return new Uint8Array(ie);if(x)return x(v);throw"both async and sync fetching of the wasm failed"}(h)}(l);return await WebAssembly.instantiate(f,d)}catch(h){O(`failed to asynchronously prepare wasm: ${h}`),pt(h)}}(i,a)}function mo(){return{a:{rd:ug,Md:sg,v:cg,_:pg,b:mg,o:hg,A:gg,F:yg,r:bg,uc:_g,u:wg,Bb:vg,pf:Co,g:fg,tc:Io,of:Ao,nf:ko,mf:Eo,lf:Po,kf:Oo,jf:zo,hf:Bo,qd:Do,gf:jo,ff:Mo,ef:Ro,df:Uo,Ze:$g,nd:Tg,Ye:Ig,Xe:kg,md:Pg,Ma:Og,na:zg,We:Bg,rc:Vg,Ve:Wg,Ue:Lg,Te:Gg,Se:Hg,Re:Fg,ld:qg,Qe:Gn,Pe:Kg,kd:Jg,Oe:Zg,Ne:Ho,Ab:Qg,sa:Xg,Me:Vn,Le:ey,Ke:ty,ma:iy,zb:oy,Je:ay,Z:sy,qc:uy,Ie:ly,ha:dy,lb:cy,He:fy,Ge:my,fb:hy,Fe:gy,Ee:_y,De:wy,Ce:Jo,Be:Zo,Ae:Qo,ze:vy,cf:Cy,C:Sy,pc:Ty,jd:Iy,ye:Yo,Tb:Ay,xe:ky,we:Ey,ba:Hn,ve:Py,Sb:Oy,id:zy,ue:jy,te:My,se:Ry,re:Uy,hd:Ny,gd:Vy,fd:Wy,qe:Ly,oc:Gy,wa:Hy,pe:Fy,oe:qy,ne:Ky,me:Jy,le:Zy,ke:na,bf:ia,af:oa,je:Dn,sc:aa,pd:sa,$e:ua,od:la,Rb:s_,Qb:yw,qa:iv,S:av,R:ov,Ca:J0,Ga:o_,ed:dw,nc:gw,ie:rv,Y:E0,y:Lb,c:Pb,dd:k_,va:Zb,he:qw,cd:Jw,h:kb,eb:tv,Ja:fv,d:Ab,ja:e0,k:Nb,ge:yv,j:jb,Pb:Lw,s:Bb,t:h_,q:i_,yb:w0,O:t_,Ba:tw,oa:v_,mc:A0,kb:P0,lc:Dw,xb:vw,bd:t0,ad:m_,$c:o0,jb:J_,_c:n0,kc:gv,ra:X0,Ob:e_,wb:Y_,da:ev,la:X_,Zc:q_,N:_0,Yc:r0,Ya:z0,H:Gb,Xc:sv,vb:D_,fe:i0,Aa:cw,La:R_,B:Qb,jc:c0,Wc:a0,Vc:G_,Uc:L_,ic:W_,ee:u0,Xa:z_,aa:sw,ib:dv,hc:pv,hb:mv,db:uv,Tc:H_,Sc:hv,gc:Z0,fa:a_,K:y_,D:Fb,Ia:M_,U:H0,Rc:xw,Qc:l0,Pc:F_,X:d0,e:zb,cb:$0,l:Ob,Oc:S_,Wa:Zw,pa:K_,Nb:p0,i:Eb,Nc:C_,V:Q_,ua:S0,Mb:$w,ub:f0,f:Mb,de:Vw,ce:Rw,m:Ub,Mc:C0,n:Wb,be:Nw,Lc:m0,ae:Mw,Kc:T0,Jc:Kw,$d:Sw,p:Vb,_d:Fw,Va:M0,Lb:j0,Ua:R0,fc:_w,E:Xb,G:p_,J:n_,bb:Ww,Zd:Uw,Kb:x0,$:x_,ga:A_,Ha:Pw,Fa:k0,Ta:pw,Yd:h0,ab:j_,Ea:$_,Ic:Ow,gb:Ew,Jb:kw,Sa:Cw,Xd:I0,za:mw,Wd:Bw,Vd:aw,Ud:B0,ec:Q0,Ra:fw,W:b_,tb:hw,Hc:ew,Ib:Y0,ta:uw,Qa:E_,Gc:V_,Hb:c_,sb:s0,dc:zw,P:T_,Gb:ow,ka:l_,Td:P_,ea:nw,rb:w_,cc:jw,Fc:Aw,bc:Yw,Ec:Qw,Dc:Hw,$a:Z_,Cc:lv,qb:O_,z:Jb,T:g_,ya:Hb,Ka:d_,Sd:B_,ac:g0,Fb:qb,L:Yb,pb:ww,Da:iw,Bc:cv,Ac:bw,Rd:__,x:Kb,I:f_,ca:Db,Qd:lw,$b:L0,zc:Iw,Pa:Gw,_a:u_,_b:G0,Za:I_,Zb:rw,Pd:Rb,Od:N_,Yb:Tw,Xb:O0,ob:Xw,yc:r_,Wb:b0,ia:F0,xc:y0,M:v0,nb:U_,Nd:nv,xa:K0,Q:q0,Eb:V0,Oa:N0,Na:D0,Db:W0,Cb:U0,w:Xy,a:B,_e:Bn,Ld:eb,Kd:tb,mb:rb,wc:nb,Jd:ib,Id:ob,Hd:ab,Gd:sb,Fd:ub,Vb:lb,Ed:db,Dd:cb,Cd:pb,Bd:fb,Ad:hb,Ub:gb,zd:yb,yd:bb,xd:wb,wd:vb,vd:xb,ud:$b,vc:Cb,td:Sb,sd:Tb}}}var ho={1635076:(r,i,a,s,l)=>{if(n===void 0||!n.MountedFiles)return 1;let d=Ue(Number(r>>>0));d.startsWith("./")&&(d=d.substring(2));let f=n.MountedFiles.get(d);if(!f)return 2;let h=Number(i>>>0),y=Number(a>>>0),v=Number(s>>>0),$=l;if(h+y>f.byteLength)return 3;try{let I=f.subarray(h,h+y);switch($){case 0:ke().set(I,v>>>0);break;case 1:n.webgpuUploadExternalBuffer?n.webgpuUploadExternalBuffer(v,I):n.jsepUploadExternalBuffer(v,I);break;default:return 4}return 0}catch{return 4}},1635900:(r,i,a)=>{let s=Number(r),l=Number(i),d=Ue(a),f=new Uint8Array(l);f.set(ke().subarray(s>>>0,s+l>>>0));{let h=new File([f],d,{type:"application/octet-stream"}),y=URL.createObjectURL(h);window.open(y,"_blank")}},1636424:(r,i,a)=>{n.webnnUploadTensor(r,ke().subarray(i>>>0,i+a>>>0))},1636488:()=>n.webnnReserveTensorId(),1636530:r=>{n.webnnReleaseTensorId(r)},1636567:()=>{n.jsepCaptureBegin()},1636598:()=>{n.jsepCaptureEnd()},1636627:()=>{n.jsepReplay()},1636652:r=>n.jsepAlloc(r),1636685:r=>n.jsepFree(r),1636717:(r,i,a)=>{n.jsepCopy(Number(r),Number(i),Number(a),!0)},1636780:(r,i,a)=>{n.jsepCopy(Number(r),Number(i),Number(a))},1636837:()=>typeof wasmOffsetConverter<"u",1636894:r=>{n.jsepCreateKernel("Abs",r,void 0)},1636945:r=>{n.jsepCreateKernel("Neg",r,void 0)},1636996:r=>{n.jsepCreateKernel("Floor",r,void 0)},1637049:r=>{n.jsepCreateKernel("Ceil",r,void 0)},1637101:r=>{n.jsepCreateKernel("Reciprocal",r,void 0)},1637159:r=>{n.jsepCreateKernel("Sqrt",r,void 0)},1637211:r=>{n.jsepCreateKernel("Exp",r,void 0)},1637262:r=>{n.jsepCreateKernel("Erf",r,void 0)},1637313:r=>{n.jsepCreateKernel("Sigmoid",r,void 0)},1637368:(r,i,a)=>{n.jsepCreateKernel("HardSigmoid",r,{alpha:i,beta:a})},1637447:r=>{n.jsepCreateKernel("Log",r,void 0)},1637498:r=>{n.jsepCreateKernel("Sin",r,void 0)},1637549:r=>{n.jsepCreateKernel("Cos",r,void 0)},1637600:r=>{n.jsepCreateKernel("Tan",r,void 0)},1637651:r=>{n.jsepCreateKernel("Asin",r,void 0)},1637703:r=>{n.jsepCreateKernel("Acos",r,void 0)},1637755:r=>{n.jsepCreateKernel("Atan",r,void 0)},1637807:r=>{n.jsepCreateKernel("Sinh",r,void 0)},1637859:r=>{n.jsepCreateKernel("Cosh",r,void 0)},1637911:r=>{n.jsepCreateKernel("Asinh",r,void 0)},1637964:r=>{n.jsepCreateKernel("Acosh",r,void 0)},1638017:r=>{n.jsepCreateKernel("Atanh",r,void 0)},1638070:r=>{n.jsepCreateKernel("Tanh",r,void 0)},1638122:r=>{n.jsepCreateKernel("Not",r,void 0)},1638173:(r,i,a)=>{n.jsepCreateKernel("Clip",r,{min:i,max:a})},1638242:r=>{n.jsepCreateKernel("Clip",r,void 0)},1638294:(r,i)=>{n.jsepCreateKernel("Elu",r,{alpha:i})},1638352:r=>{n.jsepCreateKernel("Gelu",r,void 0)},1638404:r=>{n.jsepCreateKernel("Relu",r,void 0)},1638456:(r,i)=>{n.jsepCreateKernel("LeakyRelu",r,{alpha:i})},1638520:(r,i)=>{n.jsepCreateKernel("ThresholdedRelu",r,{alpha:i})},1638590:(r,i)=>{n.jsepCreateKernel("Cast",r,{to:i})},1638648:r=>{n.jsepCreateKernel("Add",r,void 0)},1638699:r=>{n.jsepCreateKernel("Sub",r,void 0)},1638750:r=>{n.jsepCreateKernel("Mul",r,void 0)},1638801:r=>{n.jsepCreateKernel("Div",r,void 0)},1638852:r=>{n.jsepCreateKernel("Pow",r,void 0)},1638903:r=>{n.jsepCreateKernel("Equal",r,void 0)},1638956:r=>{n.jsepCreateKernel("Greater",r,void 0)},1639011:r=>{n.jsepCreateKernel("GreaterOrEqual",r,void 0)},1639073:r=>{n.jsepCreateKernel("Less",r,void 0)},1639125:r=>{n.jsepCreateKernel("LessOrEqual",r,void 0)},1639184:(r,i,a,s,l)=>{n.jsepCreateKernel("ReduceMean",r,{keepDims:!!i,noopWithEmptyAxes:!!a,axes:s?Array.from(R().subarray(Number(s)>>>0,Number(l)>>>0)):[]})},1639359:(r,i,a,s,l)=>{n.jsepCreateKernel("ReduceMax",r,{keepDims:!!i,noopWithEmptyAxes:!!a,axes:s?Array.from(R().subarray(Number(s)>>>0,Number(l)>>>0)):[]})},1639533:(r,i,a,s,l)=>{n.jsepCreateKernel("ReduceMin",r,{keepDims:!!i,noopWithEmptyAxes:!!a,axes:s?Array.from(R().subarray(Number(s)>>>0,Number(l)>>>0)):[]})},1639707:(r,i,a,s,l)=>{n.jsepCreateKernel("ReduceProd",r,{keepDims:!!i,noopWithEmptyAxes:!!a,axes:s?Array.from(R().subarray(Number(s)>>>0,Number(l)>>>0)):[]})},1639882:(r,i,a,s,l)=>{n.jsepCreateKernel("ReduceSum",r,{keepDims:!!i,noopWithEmptyAxes:!!a,axes:s?Array.from(R().subarray(Number(s)>>>0,Number(l)>>>0)):[]})},1640056:(r,i,a,s,l)=>{n.jsepCreateKernel("ReduceL1",r,{keepDims:!!i,noopWithEmptyAxes:!!a,axes:s?Array.from(R().subarray(Number(s)>>>0,Number(l)>>>0)):[]})},1640229:(r,i,a,s,l)=>{n.jsepCreateKernel("ReduceL2",r,{keepDims:!!i,noopWithEmptyAxes:!!a,axes:s?Array.from(R().subarray(Number(s)>>>0,Number(l)>>>0)):[]})},1640402:(r,i,a,s,l)=>{n.jsepCreateKernel("ReduceLogSum",r,{keepDims:!!i,noopWithEmptyAxes:!!a,axes:s?Array.from(R().subarray(Number(s)>>>0,Number(l)>>>0)):[]})},1640579:(r,i,a,s,l)=>{n.jsepCreateKernel("ReduceSumSquare",r,{keepDims:!!i,noopWithEmptyAxes:!!a,axes:s?Array.from(R().subarray(Number(s)>>>0,Number(l)>>>0)):[]})},1640759:(r,i,a,s,l)=>{n.jsepCreateKernel("ReduceLogSumExp",r,{keepDims:!!i,noopWithEmptyAxes:!!a,axes:s?Array.from(R().subarray(Number(s)>>>0,Number(l)>>>0)):[]})},1640939:r=>{n.jsepCreateKernel("Where",r,void 0)},1640992:(r,i,a)=>{n.jsepCreateKernel("Transpose",r,{perm:i?Array.from(R().subarray(Number(i)>>>0,Number(a)>>>0)):[]})},1641116:(r,i,a,s)=>{n.jsepCreateKernel("DepthToSpace",r,{blocksize:i,mode:Ue(a),format:s?"NHWC":"NCHW"})},1641249:(r,i,a,s)=>{n.jsepCreateKernel("DepthToSpace",r,{blocksize:i,mode:Ue(a),format:s?"NHWC":"NCHW"})},1641382:(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)=>{n.jsepCreateKernel("ConvTranspose",r,{format:y?"NHWC":"NCHW",autoPad:i,dilations:[a],group:s,kernelShape:[l],pads:[d,f],strides:[h],wIsConst:()=>!!me()[v>>>0],outputPadding:$?Array.from(R().subarray(Number($)>>>0,Number(I)>>>0)):[],outputShape:P?Array.from(R().subarray(Number(P)>>>0,Number(D)>>>0)):[],activation:Ue(H)})},1641815:(r,i,a,s,l,d,f,h,y,v,$,I,P,D)=>{n.jsepCreateKernel("ConvTranspose",r,{format:h?"NHWC":"NCHW",autoPad:i,dilations:Array.from(R().subarray(Number(a)>>>0,2+(Number(a)>>>0)>>>0)),group:s,kernelShape:Array.from(R().subarray(Number(l)>>>0,2+(Number(l)>>>0)>>>0)),pads:Array.from(R().subarray(Number(d)>>>0,4+(Number(d)>>>0)>>>0)),strides:Array.from(R().subarray(Number(f)>>>0,2+(Number(f)>>>0)>>>0)),wIsConst:()=>!!me()[y>>>0],outputPadding:v?Array.from(R().subarray(Number(v)>>>0,Number($)>>>0)):[],outputShape:I?Array.from(R().subarray(Number(I)>>>0,Number(P)>>>0)):[],activation:Ue(D)})},1642476:(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)=>{n.jsepCreateKernel("ConvTranspose",r,{format:y?"NHWC":"NCHW",autoPad:i,dilations:[a],group:s,kernelShape:[l],pads:[d,f],strides:[h],wIsConst:()=>!!me()[v>>>0],outputPadding:$?Array.from(R().subarray(Number($)>>>0,Number(I)>>>0)):[],outputShape:P?Array.from(R().subarray(Number(P)>>>0,Number(D)>>>0)):[],activation:Ue(H)})},1642909:(r,i,a,s,l,d,f,h,y,v,$,I,P,D)=>{n.jsepCreateKernel("ConvTranspose",r,{format:h?"NHWC":"NCHW",autoPad:i,dilations:Array.from(R().subarray(Number(a)>>>0,2+(Number(a)>>>0)>>>0)),group:s,kernelShape:Array.from(R().subarray(Number(l)>>>0,2+(Number(l)>>>0)>>>0)),pads:Array.from(R().subarray(Number(d)>>>0,4+(Number(d)>>>0)>>>0)),strides:Array.from(R().subarray(Number(f)>>>0,2+(Number(f)>>>0)>>>0)),wIsConst:()=>!!me()[y>>>0],outputPadding:v?Array.from(R().subarray(Number(v)>>>0,Number($)>>>0)):[],outputShape:I?Array.from(R().subarray(Number(I)>>>0,Number(P)>>>0)):[],activation:Ue(D)})},1643570:(r,i)=>{n.jsepCreateKernel("GlobalAveragePool",r,{format:i?"NHWC":"NCHW"})},1643661:(r,i,a,s,l,d,f,h,y,v,$,I,P,D)=>{n.jsepCreateKernel("AveragePool",r,{format:D?"NHWC":"NCHW",auto_pad:i,ceil_mode:a,count_include_pad:s,storage_order:l,dilations:d?Array.from(R().subarray(Number(d)>>>0,Number(f)>>>0)):[],kernel_shape:h?Array.from(R().subarray(Number(h)>>>0,Number(y)>>>0)):[],pads:v?Array.from(R().subarray(Number(v)>>>0,Number($)>>>0)):[],strides:I?Array.from(R().subarray(Number(I)>>>0,Number(P)>>>0)):[]})},1644140:(r,i)=>{n.jsepCreateKernel("GlobalAveragePool",r,{format:i?"NHWC":"NCHW"})},1644231:(r,i,a,s,l,d,f,h,y,v,$,I,P,D)=>{n.jsepCreateKernel("AveragePool",r,{format:D?"NHWC":"NCHW",auto_pad:i,ceil_mode:a,count_include_pad:s,storage_order:l,dilations:d?Array.from(R().subarray(Number(d)>>>0,Number(f)>>>0)):[],kernel_shape:h?Array.from(R().subarray(Number(h)>>>0,Number(y)>>>0)):[],pads:v?Array.from(R().subarray(Number(v)>>>0,Number($)>>>0)):[],strides:I?Array.from(R().subarray(Number(I)>>>0,Number(P)>>>0)):[]})},1644710:(r,i)=>{n.jsepCreateKernel("GlobalMaxPool",r,{format:i?"NHWC":"NCHW"})},1644797:(r,i,a,s,l,d,f,h,y,v,$,I,P,D)=>{n.jsepCreateKernel("MaxPool",r,{format:D?"NHWC":"NCHW",auto_pad:i,ceil_mode:a,count_include_pad:s,storage_order:l,dilations:d?Array.from(R().subarray(Number(d)>>>0,Number(f)>>>0)):[],kernel_shape:h?Array.from(R().subarray(Number(h)>>>0,Number(y)>>>0)):[],pads:v?Array.from(R().subarray(Number(v)>>>0,Number($)>>>0)):[],strides:I?Array.from(R().subarray(Number(I)>>>0,Number(P)>>>0)):[]})},1645272:(r,i)=>{n.jsepCreateKernel("GlobalMaxPool",r,{format:i?"NHWC":"NCHW"})},1645359:(r,i,a,s,l,d,f,h,y,v,$,I,P,D)=>{n.jsepCreateKernel("MaxPool",r,{format:D?"NHWC":"NCHW",auto_pad:i,ceil_mode:a,count_include_pad:s,storage_order:l,dilations:d?Array.from(R().subarray(Number(d)>>>0,Number(f)>>>0)):[],kernel_shape:h?Array.from(R().subarray(Number(h)>>>0,Number(y)>>>0)):[],pads:v?Array.from(R().subarray(Number(v)>>>0,Number($)>>>0)):[],strides:I?Array.from(R().subarray(Number(I)>>>0,Number(P)>>>0)):[]})},1645834:(r,i,a,s,l)=>{n.jsepCreateKernel("Gemm",r,{alpha:i,beta:a,transA:s,transB:l})},1645938:r=>{n.jsepCreateKernel("MatMul",r,void 0)},1645992:(r,i,a,s)=>{n.jsepCreateKernel("ArgMax",r,{keepDims:!!i,selectLastIndex:!!a,axis:s})},1646100:(r,i,a,s)=>{n.jsepCreateKernel("ArgMin",r,{keepDims:!!i,selectLastIndex:!!a,axis:s})},1646208:(r,i)=>{n.jsepCreateKernel("Softmax",r,{axis:i})},1646271:(r,i)=>{n.jsepCreateKernel("Concat",r,{axis:i})},1646331:(r,i,a,s,l)=>{n.jsepCreateKernel("Split",r,{axis:i,numOutputs:a,splitSizes:s?Array.from(R().subarray(Number(s)>>>0,Number(l)>>>0)):[]})},1646487:r=>{n.jsepCreateKernel("Expand",r,void 0)},1646541:(r,i)=>{n.jsepCreateKernel("Gather",r,{axis:Number(i)})},1646612:(r,i)=>{n.jsepCreateKernel("GatherElements",r,{axis:Number(i)})},1646691:(r,i)=>{n.jsepCreateKernel("GatherND",r,{batch_dims:Number(i)})},1646770:(r,i,a,s,l,d,f,h,y,v,$)=>{n.jsepCreateKernel("Resize",r,{antialias:i,axes:a?Array.from(R().subarray(Number(a)>>>0,Number(s)>>>0)):[],coordinateTransformMode:Ue(l),cubicCoeffA:d,excludeOutside:f,extrapolationValue:h,keepAspectRatioPolicy:Ue(y),mode:Ue(v),nearestMode:Ue($)})},1647132:(r,i,a,s,l,d,f)=>{n.jsepCreateKernel("Slice",r,{starts:i?Array.from(R().subarray(Number(i)>>>0,Number(a)>>>0)):[],ends:s?Array.from(R().subarray(Number(s)>>>0,Number(l)>>>0)):[],axes:d?Array.from(R().subarray(Number(d)>>>0,Number(f)>>>0)):[]})},1647396:r=>{n.jsepCreateKernel("Tile",r,void 0)},1647448:(r,i,a)=>{n.jsepCreateKernel("InstanceNormalization",r,{epsilon:i,format:a?"NHWC":"NCHW"})},1647562:(r,i,a)=>{n.jsepCreateKernel("InstanceNormalization",r,{epsilon:i,format:a?"NHWC":"NCHW"})},1647676:r=>{n.jsepCreateKernel("Range",r,void 0)},1647729:(r,i)=>{n.jsepCreateKernel("Einsum",r,{equation:Ue(i)})},1647810:(r,i,a,s,l)=>{n.jsepCreateKernel("Pad",r,{mode:i,value:a,pads:s?Array.from(R().subarray(Number(s)>>>0,Number(l)>>>0)):[]})},1647953:(r,i,a,s,l,d)=>{n.jsepCreateKernel("BatchNormalization",r,{epsilon:i,momentum:a,spatial:!!l,trainingMode:!!s,format:d?"NHWC":"NCHW"})},1648122:(r,i,a,s,l,d)=>{n.jsepCreateKernel("BatchNormalization",r,{epsilon:i,momentum:a,spatial:!!l,trainingMode:!!s,format:d?"NHWC":"NCHW"})},1648291:(r,i,a)=>{n.jsepCreateKernel("CumSum",r,{exclusive:Number(i),reverse:Number(a)})},1648388:(r,i,a)=>{n.jsepCreateKernel("DequantizeLinear",r,{axis:i,blockSize:a})},1648478:(r,i,a,s,l)=>{n.jsepCreateKernel("GridSample",r,{align_corners:i,mode:Ue(a),padding_mode:Ue(s),format:l?"NHWC":"NCHW"})},1648648:(r,i,a,s,l)=>{n.jsepCreateKernel("GridSample",r,{align_corners:i,mode:Ue(a),padding_mode:Ue(s),format:l?"NHWC":"NCHW"})},1648818:(r,i)=>{n.jsepCreateKernel("ScatterND",r,{reduction:Ue(i)})},1648903:(r,i,a,s,l,d,f,h,y)=>{n.jsepCreateKernel("Attention",r,{numHeads:i,isUnidirectional:a,maskFilterValue:s,scale:l,doRotary:d,qkvHiddenSizes:f?Array.from(R().subarray(Number(h)>>>0,Number(h)+f>>>0)):[],pastPresentShareBuffer:!!y})},1649175:r=>{n.jsepCreateKernel("BiasAdd",r,void 0)},1649230:r=>{n.jsepCreateKernel("BiasSplitGelu",r,void 0)},1649291:r=>{n.jsepCreateKernel("FastGelu",r,void 0)},1649347:(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z)=>{n.jsepCreateKernel("Conv",r,{format:I?"NHWC":"NCHW",auto_pad:i,dilations:a?Array.from(R().subarray(Number(a)>>>0,Number(s)>>>0)):[],group:l,kernel_shape:d?Array.from(R().subarray(Number(d)>>>0,Number(f)>>>0)):[],pads:h?Array.from(R().subarray(Number(h)>>>0,Number(y)>>>0)):[],strides:v?Array.from(R().subarray(Number(v)>>>0,Number($)>>>0)):[],w_is_const:()=>!!me()[Number(P)>>>0],activation:Ue(D),activation_params:H?Array.from(Ce().subarray(Number(H)>>>0,Number(Z)>>>0)):[]})},1649931:r=>{n.jsepCreateKernel("Gelu",r,void 0)},1649983:(r,i,a,s,l,d,f,h,y)=>{n.jsepCreateKernel("GroupQueryAttention",r,{numHeads:i,kvNumHeads:a,scale:s,softcap:l,doRotary:d,rotaryInterleaved:f,smoothSoftmax:h,localWindowSize:y})},1650200:(r,i,a,s)=>{n.jsepCreateKernel("LayerNormalization",r,{axis:i,epsilon:a,simplified:!!s})},1650311:(r,i,a,s)=>{n.jsepCreateKernel("LayerNormalization",r,{axis:i,epsilon:a,simplified:!!s})},1650422:(r,i,a,s,l,d)=>{n.jsepCreateKernel("MatMulNBits",r,{k:i,n:a,accuracyLevel:s,bits:l,blockSize:d})},1650549:(r,i,a,s,l,d)=>{n.jsepCreateKernel("MultiHeadAttention",r,{numHeads:i,isUnidirectional:a,maskFilterValue:s,scale:l,doRotary:d})},1650708:(r,i)=>{n.jsepCreateKernel("QuickGelu",r,{alpha:i})},1650772:(r,i,a,s,l)=>{n.jsepCreateKernel("RotaryEmbedding",r,{interleaved:!!i,numHeads:a,rotaryEmbeddingDim:s,scale:l})},1650911:(r,i,a)=>{n.jsepCreateKernel("SkipLayerNormalization",r,{epsilon:i,simplified:!!a})},1651013:(r,i,a)=>{n.jsepCreateKernel("SkipLayerNormalization",r,{epsilon:i,simplified:!!a})},1651115:(r,i,a,s)=>{n.jsepCreateKernel("GatherBlockQuantized",r,{gatherAxis:i,quantizeAxis:a,blockSize:s})},1651236:r=>{n.jsepReleaseKernel(r)},1651270:(r,i)=>n.jsepRunKernel(Number(r),Number(i),n.jsepSessionState.sessionHandle,n.jsepSessionState.errors)};function sg(r,i,a){return de.handleAsync(async()=>{await n.jsepCopyAsync(Number(r),Number(i),Number(a))})}function ug(){return typeof wasmOffsetConverter<"u"}class go{name="ExitStatus";constructor(i){this.message=`Program terminated with exit(${i})`,this.status=i}}var yo=r=>{r.terminate(),r.onmessage=i=>{}},bo=r=>{var i=Te.pthreads[r];Te.returnWorkerToPool(i)},_o=[],wo=r=>{var i=Te.getNewWorker();if(!i)return 6;Te.runningWorkers.push(i),Te.pthreads[r.pthread_ptr]=i,i.pthread_ptr=r.pthread_ptr;var a={cmd:"run",start_routine:r.startRoutine,arg:r.arg,pthread_ptr:r.pthread_ptr};return i.postMessage(a,r.transferList),0},ar=0,On=()=>ar>0,j=()=>Ma(),M=r=>Da(r),zn=r=>ja(r),qe=r=>r<-9007199254740992||r>9007199254740992?NaN:Number(r),Re=(r,i,a,...s)=>{for(var l=2*s.length,d=j(),f=zn(8*l),h=f>>>3,y=0;y>>0]=v)}var $=ka(r,i,l,f,a);return M(d),$};function Bn(r){if(m)return Re(0,0,1,r);N=r,On()||(Te.terminateAllThreads(),ve=!0),C(0,new go(r))}var vo=r=>{if(r instanceof go||r=="unwind")return N;C(0,r)};function xo(r){if(m)return Re(1,0,0,r);Dn(r)}var Dn=(r,i)=>{if(N=r,m)throw xo(r),"unwind";Bn(r)},Te={unusedWorkers:[],runningWorkers:[],tlsInitFunctions:[],pthreads:{},init(){m||Te.initMainThread()},initMainThread(){for(var r,i=n.numThreads-1;i--;)Te.allocateUnusedWorker();r=()=>{po(),Te.loadWasmModuleToAllWorkers(()=>fo())},_o.unshift(r)},terminateAllThreads:()=>{for(var r of Te.runningWorkers)yo(r);for(var r of Te.unusedWorkers)yo(r);Te.unusedWorkers=[],Te.runningWorkers=[],Te.pthreads={}},returnWorkerToPool:r=>{var i=r.pthread_ptr;delete Te.pthreads[i],Te.unusedWorkers.push(r),Te.runningWorkers.splice(Te.runningWorkers.indexOf(r),1),r.pthread_ptr=0,Ea(i)},threadInitTLS(){Te.tlsInitFunctions.forEach(r=>r())},loadWasmModuleToWorker:r=>new Promise(i=>{r.onmessage=l=>{var d=l.data,f=d.cmd;if(d.targetThread&&d.targetThread!=Br()){var h=Te.pthreads[d.targetThread];h?h.postMessage(d,d.transferList):O(`Internal error! Worker sent a message "${f}" to target pthread ${d.targetThread}, but that thread no longer exists!`)}else f==="checkMailbox"?Ir():f==="spawnThread"?wo(d):f==="cleanupThread"?bo(d.thread):f==="loaded"?(r.loaded=!0,i(r)):f==="alert"?alert(`Thread ${d.threadId}: ${d.text}`):d.target==="setimmediate"?r.postMessage(d):f==="callHandler"?n[d.handler](...d.args):f&&O(`worker sent an unknown command ${f}`)},r.onerror=l=>{throw O(`worker sent an error! ${l.filename}:${l.lineno}: ${l.message}`),l};var a=[];for(var s of[])n.propertyIsEnumerable(s)&&a.push(s);r.postMessage({cmd:"load",handlers:a,wasmMemory:B,wasmModule:W})}),loadWasmModuleToAllWorkers(r){if(m)return r();Promise.all(Te.unusedWorkers.map(Te.loadWasmModuleToWorker)).then(r)},allocateUnusedWorker(){var r;r=new Worker((()=>{let i=URL;return import.meta.url>"file:"&&import.meta.url<"file;"?new i("ort.webgpu.bundle.min.mjs",import.meta.url):new URL(import.meta.url)})(),{type:"module",workerData:"em-pthread",name:"em-pthread"}),Te.unusedWorkers.push(r)},getNewWorker:()=>(Te.unusedWorkers.length==0&&(Te.allocateUnusedWorker(),Te.loadWasmModuleToWorker(Te.unusedWorkers[0])),Te.unusedWorkers.pop())},lg=r=>{Ie();var i=V()[r+52>>>2>>>0],a=V()[r+56>>>2>>>0];Ba(i,i-a),M(i)},dg=(r,i)=>{var a;ar=0,a=di(r,i),On()?N=a:ui(a)},Cr=[],Sr=0;function cg(r){var i=new jn(r>>>=0);return i.get_caught()||(i.set_caught(!0),Sr--),i.set_rethrown(!1),Cr.push(i),Ua(r),Va(r)}var Rt=0,pg=()=>{U(0,0);var r=Cr.pop();Ra(r.excPtr),Rt=0};class jn{constructor(i){this.excPtr=i,this.ptr=i-24}set_type(i){V()[this.ptr+4>>>2>>>0]=i}get_type(){return V()[this.ptr+4>>>2>>>0]}set_destructor(i){V()[this.ptr+8>>>2>>>0]=i}get_destructor(){return V()[this.ptr+8>>>2>>>0]}set_caught(i){i=i?1:0,me()[this.ptr+12>>>0]=i}get_caught(){return me()[this.ptr+12>>>0]!=0}set_rethrown(i){i=i?1:0,me()[this.ptr+13>>>0]=i}get_rethrown(){return me()[this.ptr+13>>>0]!=0}init(i,a){this.set_adjusted_ptr(0),this.set_type(i),this.set_destructor(a)}set_adjusted_ptr(i){V()[this.ptr+16>>>2>>>0]=i}get_adjusted_ptr(){return V()[this.ptr+16>>>2>>>0]}}function fg(r){throw Rt||(Rt=r>>>=0),Rt}var Tr=r=>za(r),sr=r=>{var i=Rt;if(!i)return Tr(0),0;var a=new jn(i);a.set_adjusted_ptr(i);var s=a.get_type();if(!s)return Tr(0),i;for(var l of r){if(l===0||l===s)break;var d=a.ptr+16;if(Na(l,s,d))return Tr(l),i}return Tr(s),i};function mg(){return sr([])}function hg(r){return sr([r>>>=0])}function gg(r,i){return sr([r>>>=0,i>>>=0])}function yg(r,i,a){return sr([r>>>=0,i>>>=0,a>>>=0])}function bg(r,i,a,s){return sr([r>>>=0,i>>>=0,a>>>=0,s>>>=0])}var _g=()=>{var r=Cr.pop();r||pt("no exception to throw");var i=r.excPtr;throw r.get_rethrown()||(Cr.push(r),r.set_rethrown(!0),r.set_caught(!1),Sr++),Rt=i};function wg(r,i,a){throw i>>>=0,a>>>=0,new jn(r>>>=0).init(i,a),Sr++,Rt=r}var vg=()=>Sr;function $o(r,i,a,s){return m?Re(2,0,1,r,i,a,s):Co(r,i,a,s)}var xg=()=>g!==void 0;function Co(r,i,a,s){if(r>>>=0,i>>>=0,a>>>=0,s>>>=0,!xg())return 6;var l=[];if(m&&l.length===0)return $o(r,i,a,s);var d={startRoutine:a,pthread_ptr:r,arg:s,transferList:l};return m?(d.cmd="spawnThread",postMessage(d,l),0):wo(d)}var So=typeof TextDecoder<"u"?new TextDecoder:void 0,To=(r,i=0,a=NaN)=>{for(var s=(i>>>=0)+a,l=i;r[l]&&!(l>=s);)++l;if(l-i>16&&r.buffer&&So)return So.decode(r.buffer instanceof ArrayBuffer?r.subarray(i,l):r.slice(i,l));for(var d="";i>10,56320|1023&v)}}else d+=String.fromCharCode((31&f)<<6|h)}else d+=String.fromCharCode(f)}return d},Ue=(r,i)=>(r>>>=0)?To(ke(),r,i):"",Mn={varargs:void 0,getStr:r=>Ue(r)};function Io(r,i,a){return m?Re(3,0,1,r,i,a):(a>>>=0,Mn.varargs=a,0)}function Ao(r,i){if(m)return Re(4,0,1,r,i);i>>>=0}var kt=r=>{for(var i=0,a=0;a=55296&&s<=57343?(i+=4,++a):i+=3}return i},Et=(r,i,a)=>((s,l,d,f)=>{if(!(f>0))return 0;for(var h=d>>>=0,y=d+f-1,v=0;v=55296&&$<=57343&&($=65536+((1023&$)<<10)|1023&s.charCodeAt(++v)),$<=127){if(d>=y)break;l[d++>>>0]=$}else if($<=2047){if(d+1>=y)break;l[d++>>>0]=192|$>>6,l[d++>>>0]=128|63&$}else if($<=65535){if(d+2>=y)break;l[d++>>>0]=224|$>>12,l[d++>>>0]=128|$>>6&63,l[d++>>>0]=128|63&$}else{if(d+3>=y)break;l[d++>>>0]=240|$>>18,l[d++>>>0]=128|$>>12&63,l[d++>>>0]=128|$>>6&63,l[d++>>>0]=128|63&$}}return l[d>>>0]=0,d-h})(r,ke(),i,a);function ko(r,i){if(m)return Re(5,0,1,r,i);r>>>=0,i>>>=0}function Eo(r,i,a){if(m)return Re(6,0,1,r,i,a);i>>>=0,a>>>=0}function Po(r,i,a){return m?Re(7,0,1,r,i,a):(a>>>=0,Mn.varargs=a,0)}function Oo(r,i){if(m)return Re(8,0,1,r,i);r>>>=0,i>>>=0}function zo(r,i,a){if(m)return Re(9,0,1,r,i,a);i>>>=0}function Bo(r,i,a,s){if(m)return Re(10,0,1,r,i,a,s);i>>>=0,a>>>=0}function Do(r,i,a,s){if(m)return Re(11,0,1,r,i,a,s);i>>>=0,s>>>=0,Mn.varargs=s}function jo(r,i,a,s){if(m)return Re(12,0,1,r,i,a,s);i>>>=0,a>>>=0,s>>>=0}function Mo(r){if(m)return Re(13,0,1,r);r>>>=0}function Ro(r,i){if(m)return Re(14,0,1,r,i);r>>>=0,i>>>=0}function Uo(r,i,a){if(m)return Re(15,0,1,r,i,a);i>>>=0}var No,Vo,$g=()=>pt(""),Cg=r=>{if(r===null)return"null";var i=typeof r;return i==="object"||i==="array"||i==="function"?r.toString():""+r},ft=r=>{for(var i="",a=r;ke()[a>>>0];)i+=No[ke()[a++>>>0]];return i},Rn={},Un={},Sg={},Ut=r=>{throw new Vo(r)};function Ct(r,i,a={}){return function(s,l,d={}){var f=l.name;if(s||Ut(`type "${f}" must have a positive integer typeid pointer`),Un.hasOwnProperty(s)){if(d.ignoreDuplicateRegistrations)return;Ut(`Cannot register type '${f}' twice`)}if(Un[s]=l,delete Sg[s],Rn.hasOwnProperty(s)){var h=Rn[s];delete Rn[s],h.forEach(y=>y())}}(r,i,a)}var Wo=(r,i,a)=>{switch(i){case 1:return a?s=>me()[s>>>0]:s=>ke()[s>>>0];case 2:return a?s=>je()[s>>>1>>>0]:s=>he()[s>>>1>>>0];case 4:return a?s=>R()[s>>>2>>>0]:s=>V()[s>>>2>>>0];case 8:return a?s=>re[s>>>3]:s=>Se[s>>>3];default:throw new TypeError(`invalid integer width (${i}): ${r}`)}};function Tg(r,i,a,s,l){r>>>=0,a>>>=0;var d=(i=ft(i>>>=0)).indexOf("u")!=-1;Ct(r,{name:i,fromWireType:f=>f,toWireType:function(f,h){if(typeof h!="bigint"&&typeof h!="number")throw new TypeError(`Cannot convert "${Cg(h)}" to ${this.name}`);return typeof h=="number"&&(h=BigInt(h)),h},argPackAdvance:Pt,readValueFromPointer:Wo(i,a,!d),destructorFunction:null})}var Pt=8;function Ig(r,i,a,s){Ct(r>>>=0,{name:i=ft(i>>>=0),fromWireType:function(l){return!!l},toWireType:function(l,d){return d?a:s},argPackAdvance:Pt,readValueFromPointer:function(l){return this.fromWireType(ke()[l>>>0])},destructorFunction:null})}var Nn=[],St=[];function Vn(r){(r>>>=0)>9&&--St[r+1]==0&&(St[r]=void 0,Nn.push(r))}var Me={toValue:r=>(r||Ut("Cannot use deleted val. handle = "+r),St[r]),toHandle:r=>{switch(r){case void 0:return 2;case null:return 4;case!0:return 6;case!1:return 8;default:{let i=Nn.pop()||St.length;return St[i]=r,St[i+1]=1,i}}}};function Wn(r){return this.fromWireType(V()[r>>>2>>>0])}var Ag={name:"emscripten::val",fromWireType:r=>{var i=Me.toValue(r);return Vn(r),i},toWireType:(r,i)=>Me.toHandle(i),argPackAdvance:Pt,readValueFromPointer:Wn,destructorFunction:null};function kg(r){return Ct(r>>>=0,Ag)}var Eg=(r,i)=>{switch(i){case 4:return function(a){return this.fromWireType(Ce()[a>>>2>>>0])};case 8:return function(a){return this.fromWireType(Ne()[a>>>3>>>0])};default:throw new TypeError(`invalid float width (${i}): ${r}`)}},Pg=function(r,i,a){a>>>=0,Ct(r>>>=0,{name:i=ft(i>>>=0),fromWireType:s=>s,toWireType:(s,l)=>l,argPackAdvance:Pt,readValueFromPointer:Eg(i,a),destructorFunction:null})};function Og(r,i,a,s,l){r>>>=0,a>>>=0,i=ft(i>>>=0),l===-1&&(l=4294967295);var d=y=>y;if(s===0){var f=32-8*a;d=y=>y<>>f}var h=i.includes("unsigned");Ct(r,{name:i,fromWireType:d,toWireType:h?function(y,v){return this.name,v>>>0}:function(y,v){return this.name,v},argPackAdvance:Pt,readValueFromPointer:Wo(i,a,s!==0),destructorFunction:null})}function zg(r,i,a){a>>>=0;var s=[Int8Array,Uint8Array,Int16Array,Uint16Array,Int32Array,Uint32Array,Float32Array,Float64Array,BigInt64Array,BigUint64Array][i];function l(d){var f=V()[d>>>2>>>0],h=V()[d+4>>>2>>>0];return new s(me().buffer,h,f)}Ct(r>>>=0,{name:a=ft(a),fromWireType:l,argPackAdvance:Pt,readValueFromPointer:l},{ignoreDuplicateRegistrations:!0})}function Bg(r,i){Ct(r>>>=0,{name:i=ft(i>>>=0),fromWireType(a){for(var s,l=V()[a>>>2>>>0],d=a+4,f=d,h=0;h<=l;++h){var y=d+h;if(h==l||ke()[y>>>0]==0){var v=Ue(f,y-f);s===void 0?s=v:(s+="\0",s+=v),f=y+1}}return rt(a),s},toWireType(a,s){var l;s instanceof ArrayBuffer&&(s=new Uint8Array(s));var d=typeof s=="string";d||s instanceof Uint8Array||s instanceof Uint8ClampedArray||s instanceof Int8Array||Ut("Cannot pass non-string to std::string"),l=d?kt(s):s.length;var f=Yt(4+l+1),h=f+4;if(V()[f>>>2>>>0]=l,d)Et(s,h,l+1);else if(d)for(var y=0;y255&&(rt(f),Ut("String has UTF-16 code units that do not fit in 8 bits")),ke()[h+y>>>0]=v}else for(y=0;y>>0]=s[y];return a!==null&&a.push(rt,f),f},argPackAdvance:Pt,readValueFromPointer:Wn,destructorFunction(a){rt(a)}})}var Lo=typeof TextDecoder<"u"?new TextDecoder("utf-16le"):void 0,Dg=(r,i)=>{for(var a=r,s=a>>1,l=s+i/2;!(s>=l)&&he()[s>>>0];)++s;if((a=s<<1)-r>32&&Lo)return Lo.decode(ke().slice(r,a));for(var d="",f=0;!(f>=i/2);++f){var h=je()[r+2*f>>>1>>>0];if(h==0)break;d+=String.fromCharCode(h)}return d},jg=(r,i,a)=>{if(a??=2147483647,a<2)return 0;for(var s=i,l=(a-=2)<2*r.length?a/2:r.length,d=0;d>>1>>>0]=f,i+=2}return je()[i>>>1>>>0]=0,i-s},Mg=r=>2*r.length,Rg=(r,i)=>{for(var a=0,s="";!(a>=i/4);){var l=R()[r+4*a>>>2>>>0];if(l==0)break;if(++a,l>=65536){var d=l-65536;s+=String.fromCharCode(55296|d>>10,56320|1023&d)}else s+=String.fromCharCode(l)}return s},Ug=(r,i,a)=>{if(i>>>=0,a??=2147483647,a<4)return 0;for(var s=i,l=s+a-4,d=0;d=55296&&f<=57343&&(f=65536+((1023&f)<<10)|1023&r.charCodeAt(++d)),R()[i>>>2>>>0]=f,(i+=4)+4>l)break}return R()[i>>>2>>>0]=0,i-s},Ng=r=>{for(var i=0,a=0;a=55296&&s<=57343&&++a,i+=4}return i},Vg=function(r,i,a){var s,l,d,f;r>>>=0,i>>>=0,a=ft(a>>>=0),i===2?(s=Dg,l=jg,f=Mg,d=h=>he()[h>>>1>>>0]):i===4&&(s=Rg,l=Ug,f=Ng,d=h=>V()[h>>>2>>>0]),Ct(r,{name:a,fromWireType:h=>{for(var y,v=V()[h>>>2>>>0],$=h+4,I=0;I<=v;++I){var P=h+4+I*i;if(I==v||d(P)==0){var D=s($,P-$);y===void 0?y=D:(y+="\0",y+=D),$=P+i}}return rt(h),y},toWireType:(h,y)=>{typeof y!="string"&&Ut(`Cannot pass non-string to C++ string type ${a}`);var v=f(y),$=Yt(4+v+i);return V()[$>>>2>>>0]=v/i,l(y,$+4,v+i),h!==null&&h.push(rt,$),$},argPackAdvance:Pt,readValueFromPointer:Wn,destructorFunction(h){rt(h)}})},Wg=function(r,i){Ct(r>>>=0,{isVoid:!0,name:i=ft(i>>>=0),argPackAdvance:0,fromWireType:()=>{},toWireType:(a,s)=>{}})};function Lg(r){si(r>>>=0,!p,1,!c,131072,!1),Te.threadInitTLS()}var Ln=r=>{if(!ve)try{r(),(()=>{if(!On())try{m?ui(N):Dn(N)}catch(i){vo(i)}})()}catch(i){vo(i)}};function Gn(r){if(r>>>=0,typeof Atomics.waitAsync=="function"){Atomics.waitAsync(R(),r>>>2,r).value.then(Ir);var i=r+128;Atomics.store(R(),i>>>2,1)}}var Ir=()=>{var r=Br();r&&(Gn(r),Ln(Oa))};function Gg(r,i){if((r>>>=0)==(i>>>=0))setTimeout(Ir);else if(m)postMessage({targetThread:r,cmd:"checkMailbox"});else{var a=Te.pthreads[r];if(!a)return;a.postMessage({cmd:"checkMailbox"})}}var Ar=[];function Hg(r,i,a,s,l){i>>>=0,a>>>=0,l>>>=0,s/=2,Ar.length=s;for(var d=l>>>3,f=0;f>>0];var h=i?ho[i]:Ib[r];Te.currentProxiedOperationCallerThread=a;var y=h(...Ar);return Te.currentProxiedOperationCallerThread=0,y}var Fg=()=>{ar=0};function qg(r){r>>>=0,m?postMessage({cmd:"cleanupThread",thread:r}):bo(r)}function Kg(r){}var kr=(r,i)=>{var a,s,l,d=Un[r];return d===void 0&&Ut(`${i} has unknown type ${a=r,s=da(a),l=ft(s),rt(s),l}`),d},Go=(r,i,a)=>{var s=[],l=r.toWireType(s,a);return s.length&&(V()[i>>>2>>>0]=Me.toHandle(s)),l};function Jg(r,i,a){return r>>>=0,i>>>=0,a>>>=0,r=Me.toValue(r),i=kr(i,"emval::as"),Go(i,a,r)}function Zg(r,i){return r>>>=0,i>>>=0,r=Me.toValue(r),(i=kr(i,"emval::as")).toWireType(null,r)}var Er=r=>{try{return r()}catch(i){pt(i)}},Nt=()=>{ar+=1},mt=()=>{ar-=1},de={instrumentWasmImports(r){var i=/^(invoke_.*|__asyncjs__.*)$/;for(let[a,s]of Object.entries(r))typeof s=="function"&&(s.isAsync||i.test(a))},instrumentWasmExports(r){var i={};for(let[a,s]of Object.entries(r))i[a]=typeof s=="function"?(...l)=>{de.exportCallStack.push(a);try{return s(...l)}finally{ve||(de.exportCallStack.pop(),de.maybeStopUnwind())}}:s;return i},State:{Normal:0,Unwinding:1,Rewinding:2,Disabled:3},state:0,StackSize:65536,currData:null,handleSleepReturnValue:0,exportCallStack:[],callStackNameToId:{},callStackIdToName:{},callStackId:0,asyncPromiseHandlers:null,sleepCallbacks:[],getCallStackId(r){var i=de.callStackNameToId[r];return i===void 0&&(i=de.callStackId++,de.callStackNameToId[r]=i,de.callStackIdToName[i]=r),i},maybeStopUnwind(){de.currData&&de.state===de.State.Unwinding&&de.exportCallStack.length===0&&(de.state=de.State.Normal,Nt(),Er(Pd),typeof Fibers<"u"&&Fibers.trampoline())},whenDone:()=>new Promise((r,i)=>{de.asyncPromiseHandlers={resolve:r,reject:i}}),allocateData(){var r=Yt(12+de.StackSize);return de.setDataHeader(r,r+12,de.StackSize),de.setDataRewindFunc(r),r},setDataHeader(r,i,a){V()[r>>>2>>>0]=i,V()[r+4>>>2>>>0]=i+a},setDataRewindFunc(r){var i=de.exportCallStack[0],a=de.getCallStackId(i);R()[r+8>>>2>>>0]=a},getDataRewindFuncName(r){var i=R()[r+8>>>2>>>0];return de.callStackIdToName[i]},getDataRewindFunc:r=>E[r],doRewind(r){var i=de.getDataRewindFuncName(r),a=de.getDataRewindFunc(i);return mt(),a()},handleSleep(r){if(!ve){if(de.state===de.State.Normal){var i=!1,a=!1;r((s=0)=>{if(!ve&&(de.handleSleepReturnValue=s,i=!0,a)){de.state=de.State.Rewinding,Er(()=>Od(de.currData)),typeof MainLoop<"u"&&MainLoop.func&&MainLoop.resume();var l,d=!1;try{l=de.doRewind(de.currData)}catch(y){l=y,d=!0}var f=!1;if(!de.currData){var h=de.asyncPromiseHandlers;h&&(de.asyncPromiseHandlers=null,(d?h.reject:h.resolve)(l),f=!0)}if(d&&!f)throw l}}),a=!0,i||(de.state=de.State.Unwinding,de.currData=de.allocateData(),typeof MainLoop<"u"&&MainLoop.func&&MainLoop.pause(),Er(()=>Ed(de.currData)))}else de.state===de.State.Rewinding?(de.state=de.State.Normal,Er(zd),rt(de.currData),de.currData=null,de.sleepCallbacks.forEach(Ln)):pt(`invalid state: ${de.state}`);return de.handleSleepReturnValue}},handleAsync:r=>de.handleSleep(i=>{r().then(i)})},Ho=function(r){return r>>>=0,de.handleAsync(async()=>{var i=await Me.toValue(r);return Me.toHandle(i)})};Ho.isAsync=!0;var Pr=[];function Qg(r,i,a,s){return i>>>=0,a>>>=0,s>>>=0,(r=Pr[r>>>=0])(null,i=Me.toValue(i),a,s)}var Yg={},Or=r=>{var i=Yg[r];return i===void 0?ft(r):i};function Xg(r,i,a,s,l){return i>>>=0,a>>>=0,s>>>=0,l>>>=0,(r=Pr[r>>>=0])(i=Me.toValue(i),i[a=Or(a)],s,l)}function ey(r,i){return r>>>=0,i>>>=0,(r=Me.toValue(r))==Me.toValue(i)}var Fo=()=>typeof globalThis=="object"?globalThis:Function("return this")();function ty(r){return(r>>>=0)==0?Me.toHandle(Fo()):(r=Or(r),Me.toHandle(Fo()[r]))}var ry=r=>{var i=Pr.length;return Pr.push(r),i},ny=(r,i)=>{for(var a=new Array(r),s=0;s>>2>>>0],"parameter "+s);return a},qo=(r,i)=>Object.defineProperty(i,"name",{value:r});function iy(r,i,a){var s=ny(r,i>>>=0),l=s.shift();r--;var d=`return function (obj, func, destructorsRef, args) { +`,f=0,h=[];a===0&&h.push("obj");for(var y=["retType"],v=[l],$=0;$D.name).join(", ")}) => ${l.name}>`;return ry(qo(P,I))}function oy(r){return r=Or(r>>>=0),Me.toHandle(n[r])}function ay(r,i){return r>>>=0,i>>>=0,r=Me.toValue(r),i=Me.toValue(i),Me.toHandle(r[i])}function sy(r){(r>>>=0)>9&&(St[r+1]+=1)}function uy(){return Me.toHandle([])}function ly(r){r>>>=0,r=Me.toValue(r);for(var i=new Array(r.length),a=0;a>>=0,Me.toHandle(Or(r))}function cy(){return Me.toHandle({})}var py=r=>{for(;r.length;){var i=r.pop();r.pop()(i)}};function fy(r){r>>>=0;var i=Me.toValue(r);py(i),Vn(r)}function my(r,i,a){r>>>=0,i>>>=0,a>>>=0,r=Me.toValue(r),i=Me.toValue(i),a=Me.toValue(a),r[i]=a}function hy(r,i){i>>>=0;var a=(r=kr(r>>>=0,"_emval_take_value")).readValueFromPointer(i);return Me.toHandle(a)}function gy(r,i){r=qe(r),i>>>=0;var a=new Date(1e3*r);R()[i>>>2>>>0]=a.getUTCSeconds(),R()[i+4>>>2>>>0]=a.getUTCMinutes(),R()[i+8>>>2>>>0]=a.getUTCHours(),R()[i+12>>>2>>>0]=a.getUTCDate(),R()[i+16>>>2>>>0]=a.getUTCMonth(),R()[i+20>>>2>>>0]=a.getUTCFullYear()-1900,R()[i+24>>>2>>>0]=a.getUTCDay();var s=Date.UTC(a.getUTCFullYear(),0,1,0,0,0,0),l=(a.getTime()-s)/864e5|0;R()[i+28>>>2>>>0]=l}var yy=[0,31,60,91,121,152,182,213,244,274,305,335],by=[0,31,59,90,120,151,181,212,243,273,304,334],Ko=r=>{var i;return((i=r.getFullYear())%4!=0||i%100==0&&i%400!=0?by:yy)[r.getMonth()]+r.getDate()-1};function _y(r,i){r=qe(r),i>>>=0;var a=new Date(1e3*r);R()[i>>>2>>>0]=a.getSeconds(),R()[i+4>>>2>>>0]=a.getMinutes(),R()[i+8>>>2>>>0]=a.getHours(),R()[i+12>>>2>>>0]=a.getDate(),R()[i+16>>>2>>>0]=a.getMonth(),R()[i+20>>>2>>>0]=a.getFullYear()-1900,R()[i+24>>>2>>>0]=a.getDay();var s=0|Ko(a);R()[i+28>>>2>>>0]=s,R()[i+36>>>2>>>0]=-60*a.getTimezoneOffset();var l=new Date(a.getFullYear(),0,1),d=new Date(a.getFullYear(),6,1).getTimezoneOffset(),f=l.getTimezoneOffset(),h=0|(d!=f&&a.getTimezoneOffset()==Math.min(f,d));R()[i+32>>>2>>>0]=h}var wy=function(r){r>>>=0;var i=(()=>{var a=new Date(R()[r+20>>>2>>>0]+1900,R()[r+16>>>2>>>0],R()[r+12>>>2>>>0],R()[r+8>>>2>>>0],R()[r+4>>>2>>>0],R()[r>>>2>>>0],0),s=R()[r+32>>>2>>>0],l=a.getTimezoneOffset(),d=new Date(a.getFullYear(),0,1),f=new Date(a.getFullYear(),6,1).getTimezoneOffset(),h=d.getTimezoneOffset(),y=Math.min(h,f);if(s<0)R()[r+32>>>2>>>0]=+(f!=h&&y==l);else if(s>0!=(y==l)){var v=Math.max(h,f),$=s>0?y:v;a.setTime(a.getTime()+6e4*($-l))}R()[r+24>>>2>>>0]=a.getDay();var I=0|Ko(a);R()[r+28>>>2>>>0]=I,R()[r>>>2>>>0]=a.getSeconds(),R()[r+4>>>2>>>0]=a.getMinutes(),R()[r+8>>>2>>>0]=a.getHours(),R()[r+12>>>2>>>0]=a.getDate(),R()[r+16>>>2>>>0]=a.getMonth(),R()[r+20>>>2>>>0]=a.getYear();var P=a.getTime();return isNaN(P)?-1:P/1e3})();return BigInt(i)};function Jo(r,i,a,s,l,d,f){return m?Re(16,0,1,r,i,a,s,l,d,f):(r>>>=0,l=qe(l),d>>>=0,f>>>=0,-52)}function Zo(r,i,a,s,l,d){if(m)return Re(17,0,1,r,i,a,s,l,d);r>>>=0,i>>>=0,d=qe(d)}var ur={},Hn=()=>performance.timeOrigin+performance.now();function Qo(r,i){if(m)return Re(18,0,1,r,i);if(ur[r]&&(clearTimeout(ur[r].id),delete ur[r]),!i)return 0;var a=setTimeout(()=>{delete ur[r],Ln(()=>Pa(r,Hn()))},i);return ur[r]={id:a,timeout_ms:i},0}var vy=function(r,i,a,s){r>>>=0,i>>>=0,a>>>=0,s>>>=0;var l=new Date().getFullYear(),d=new Date(l,0,1),f=new Date(l,6,1),h=d.getTimezoneOffset(),y=f.getTimezoneOffset(),v=Math.max(h,y);V()[r>>>2>>>0]=60*v,R()[i>>>2>>>0]=+(h!=y);var $=D=>{var H=D>=0?"-":"+",Z=Math.abs(D);return`UTC${H}${String(Math.floor(Z/60)).padStart(2,"0")}${String(Z%60).padStart(2,"0")}`},I=$(h),P=$(y);yDate.now(),xy=1,$y=r=>r>=0&&r<=3;function Cy(r,i,a){if(i=qe(i),a>>>=0,!$y(r))return 28;var s;if(r===0)s=Yo();else{if(!xy)return 52;s=Hn()}var l=Math.round(1e3*s*1e3);return re[a>>>3]=BigInt(l),0}var Fn=[],Xo=(r,i,a)=>{var s=((l,d)=>{var f;for(Fn.length=0;f=ke()[l++>>>0];){var h=f!=105;d+=(h&=f!=112)&&d%8?4:0,Fn.push(f==112?V()[d>>>2>>>0]:f==106?re[d>>>3]:f==105?R()[d>>>2>>>0]:Ne()[d>>>3>>>0]),d+=h?8:4}return Fn})(i,a);return ho[r](...s)};function Sy(r,i,a){return Xo(r>>>=0,i>>>=0,a>>>=0)}function Ty(r,i,a){return Xo(r>>>=0,i>>>=0,a>>>=0)}var lr=r=>{lr.shown||={},lr.shown[r]||(lr.shown[r]=1,O(r))},Iy=()=>{};function Ay(r,i){return O(Ue(r>>>=0,i>>>=0))}var ky=()=>{throw Nt(),"unwind"},ea=()=>4294901760;function Ey(){return ea()}var Py=()=>1,Oy=()=>navigator.hardwareConcurrency;function zy(r){return pt("Cannot use emscripten_pc_get_function without -sUSE_OFFSET_CONVERTER"),0}var By=(r,i)=>Math.ceil(r/i)*i,Dy=r=>{var i=(r-B.buffer.byteLength+65535)/65536|0;try{return B.grow(i),Ie(),1}catch{}};function jy(r){r>>>=0;var i=ke().length;if(r<=i)return!1;var a=ea();if(r>a)return!1;for(var s=1;s<=4;s*=2){var l=i*(1+.2/s);l=Math.min(l,r+100663296);var d=Math.min(a,By(Math.max(r,l),65536));if(Dy(d))return!0}return!1}var zr=r=>(pt("Cannot use convertFrameToPC (needed by __builtin_return_address) without -sUSE_OFFSET_CONVERTER"),0),Qt={},ta=r=>{r.forEach(i=>{var a=zr(i);a&&(Qt[a]=i)})},ra=()=>new Error().stack.toString();function My(){var r=ra().split(` +`);return r[0]=="Error"&&r.shift(),ta(r),Qt.last_addr=zr(r[3]),Qt.last_stack=r,Qt.last_addr}function Ry(r,i,a){var s;r>>>=0,i>>>=0,Qt.last_addr==r?s=Qt.last_stack:((s=ra().split(` +`))[0]=="Error"&&s.shift(),ta(s));for(var l=3;s[l]&&zr(s[l])!=r;)++l;for(var d=0;d>>2>>>0]=zr(s[d+l]);return d}var ht=r=>{var i=kt(r)+1,a=zn(i);return Et(r,a,i),a},G={Internals:{jsObjects:[],jsObjectInsert:(r,i)=>{G.Internals.jsObjects[r]=i},bufferOnUnmaps:[],futures:[],futureInsert:(r,i)=>{G.Internals.futures[r]=new Promise(a=>i.finally(()=>a(r)))}},getJsObject:r=>{if(r)return G.Internals.jsObjects[r]},importJsAdapter:(r,i=0)=>{var a=$a(i);return G.Internals.jsObjects[a]=r,a},importJsBindGroup:(r,i=0)=>{var a=Jn(i);return G.Internals.jsObjects[a]=r,a},importJsBindGroupLayout:(r,i=0)=>{var a=Zn(i);return G.Internals.jsObjects[a]=r,a},importJsBuffer:(r,i=0)=>{r.mapState!="pending"||pt(void 0);var a=r.mapState=="mapped"?3:1,s=Ca(i,a);return G.Internals.jsObjectInsert(s,r),r.mapState=="mapped"&&(G.Internals.bufferOnUnmaps[s]=[]),s},importJsCommandBuffer:(r,i=0)=>{var a=Qn(i);return G.Internals.jsObjects[a]=r,a},importJsCommandEncoder:(r,i=0)=>{var a=Yn(i);return G.Internals.jsObjects[a]=r,a},importJsComputePassEncoder:(r,i=0)=>{var a=Xn(i);return G.Internals.jsObjects[a]=r,a},importJsComputePipeline:(r,i=0)=>{var a=ei(i);return G.Internals.jsObjects[a]=r,a},importJsDevice:(r,i=0)=>{var a=ri(i),s=Sa(i,a);return G.Internals.jsObjectInsert(a,r.queue),G.Internals.jsObjectInsert(s,r),s},importJsPipelineLayout:(r,i=0)=>{var a=ma(i);return G.Internals.jsObjects[a]=r,a},importJsQuerySet:(r,i=0)=>{var a=ti(i);return G.Internals.jsObjects[a]=r,a},importJsQueue:(r,i=0)=>{var a=ri(i);return G.Internals.jsObjects[a]=r,a},importJsRenderBundle:(r,i=0)=>{var a=ha(i);return G.Internals.jsObjects[a]=r,a},importJsRenderBundleEncoder:(r,i=0)=>{var a=ga(i);return G.Internals.jsObjects[a]=r,a},importJsRenderPassEncoder:(r,i=0)=>{var a=ya(i);return G.Internals.jsObjects[a]=r,a},importJsRenderPipeline:(r,i=0)=>{var a=ba(i);return G.Internals.jsObjects[a]=r,a},importJsSampler:(r,i=0)=>{var a=_a(i);return G.Internals.jsObjects[a]=r,a},importJsShaderModule:(r,i=0)=>{var a=Ta(i);return G.Internals.jsObjects[a]=r,a},importJsSurface:(r,i=0)=>{var a=wa(i);return G.Internals.jsObjects[a]=r,a},importJsTexture:(r,i=0)=>{var a=va(i);return G.Internals.jsObjects[a]=r,a},importJsTextureView:(r,i=0)=>{var a=xa(i);return G.Internals.jsObjects[a]=r,a},errorCallback:(r,i,a,s)=>{var l=j(),d=ht(a);ci(r,i,d,s),M(l)},setStringView:(r,i,a)=>{V()[r>>>2>>>0]=i,V()[r+4>>>2>>>0]=a},makeStringFromStringView:r=>{var i=V()[r>>>2>>>0],a=V()[r+4>>>2>>>0];return Ue(i,a)},makeStringFromOptionalStringView:r=>{var i=V()[r>>>2>>>0],a=V()[r+4>>>2>>>0];return i?Ue(i,a):a===0?"":void 0},makeColor:r=>({r:Ne()[r>>>3>>>0],g:Ne()[r+8>>>3>>>0],b:Ne()[r+16>>>3>>>0],a:Ne()[r+24>>>3>>>0]}),makeExtent3D:r=>({width:V()[r>>>2>>>0],height:V()[r+4>>>2>>>0],depthOrArrayLayers:V()[r+8>>>2>>>0]}),makeOrigin3D:r=>({x:V()[r>>>2>>>0],y:V()[r+4>>>2>>>0],z:V()[r+8>>>2>>>0]}),makeTexelCopyTextureInfo:r=>({texture:G.getJsObject(V()[r>>>2>>>0]),mipLevel:V()[r+4>>>2>>>0],origin:G.makeOrigin3D(r+8),aspect:G.TextureAspect[V()[r+20>>>2>>>0]]}),makeTexelCopyBufferLayout:r=>{var i=V()[r+8>>>2>>>0],a=V()[r+12>>>2>>>0];return{offset:4294967296*V()[r+4>>>2>>>0]+V()[r>>>2>>>0],bytesPerRow:i===4294967295?void 0:i,rowsPerImage:a===4294967295?void 0:a}},makeTexelCopyBufferInfo:r=>{var i=r+0,a=G.makeTexelCopyBufferLayout(i);return a.buffer=G.getJsObject(V()[r+16>>>2>>>0]),a},makePassTimestampWrites:r=>{if(r!==0)return{querySet:G.getJsObject(V()[r+4>>>2>>>0]),beginningOfPassWriteIndex:V()[r+8>>>2>>>0],endOfPassWriteIndex:V()[r+12>>>2>>>0]}},makePipelineConstants:(r,i)=>{if(r){for(var a={},s=0;s>>3>>>0]}return a}},makePipelineLayout:r=>r?G.getJsObject(r):"auto",makeComputeState:r=>{if(r)return{module:G.getJsObject(V()[r+4>>>2>>>0]),constants:G.makePipelineConstants(V()[r+16>>>2>>>0],V()[r+20>>>2>>>0]),entryPoint:G.makeStringFromOptionalStringView(r+8)}},makeComputePipelineDesc:r=>({label:G.makeStringFromOptionalStringView(r+4),layout:G.makePipelineLayout(V()[r+12>>>2>>>0]),compute:G.makeComputeState(r+16)}),makeRenderPipelineDesc:r=>{function i(y){if(y)return{operation:G.BlendOperation[V()[y>>>2>>>0]],srcFactor:G.BlendFactor[V()[y+4>>>2>>>0]],dstFactor:G.BlendFactor[V()[y+8>>>2>>>0]]}}function a(y){if(y)return{alpha:i(y+12),color:i(y+0)}}function s(y,v){for(var $,I,P=[],D=0;D>>2>>>0])===0?void 0:{format:G.TextureFormat[I],blend:a(V()[$+8>>>2>>>0]),writeMask:V()[$+16>>>2>>>0]}));return P}function l(y){return{compare:G.CompareFunction[V()[y>>>2>>>0]],failOp:G.StencilOperation[V()[y+4>>>2>>>0]],depthFailOp:G.StencilOperation[V()[y+8>>>2>>>0]],passOp:G.StencilOperation[V()[y+12>>>2>>>0]]}}function d(y,v){for(var $,I=[],P=0;P>>2>>>0]],offset:4294967296*V()[$+4+8>>>2>>>0]+V()[$+8>>>2>>>0],shaderLocation:V()[$+16>>>2>>>0]}));return I}function f(y){if(y){var v=V()[y+4>>>2>>>0],$=V()[y+16>>>2>>>0];return v===0&&$===0?null:{arrayStride:4294967296*V()[y+4+8>>>2>>>0]+V()[y+8>>>2>>>0],stepMode:G.VertexStepMode[v],attributes:d($,V()[y+20>>>2>>>0])}}}function h(y,v){if(y){for(var $=[],I=0;I>>2>>>0]),vertex:function(y){if(y)return{module:G.getJsObject(V()[y+4>>>2>>>0]),constants:G.makePipelineConstants(V()[y+16>>>2>>>0],V()[y+20>>>2>>>0]),buffers:h(V()[y+24>>>2>>>0],V()[y+28>>>2>>>0]),entryPoint:G.makeStringFromOptionalStringView(y+8)}}(r+16),primitive:function(y){if(y)return{topology:G.PrimitiveTopology[V()[y+4>>>2>>>0]],stripIndexFormat:G.IndexFormat[V()[y+8>>>2>>>0]],frontFace:G.FrontFace[V()[y+12>>>2>>>0]],cullMode:G.CullMode[V()[y+16>>>2>>>0]],unclippedDepth:!!V()[y+20>>>2>>>0]}}(r+48),depthStencil:function(y){if(y)return{format:G.TextureFormat[V()[y+4>>>2>>>0]],depthWriteEnabled:!!V()[y+8>>>2>>>0],depthCompare:G.CompareFunction[V()[y+12>>>2>>>0]],stencilFront:l(y+16),stencilBack:l(y+32),stencilReadMask:V()[y+48>>>2>>>0],stencilWriteMask:V()[y+52>>>2>>>0],depthBias:R()[y+56>>>2>>>0],depthBiasSlopeScale:Ce()[y+60>>>2>>>0],depthBiasClamp:Ce()[y+64>>>2>>>0]}}(V()[r+72>>>2>>>0]),multisample:function(y){if(y)return{count:V()[y+4>>>2>>>0],mask:V()[y+8>>>2>>>0],alphaToCoverageEnabled:!!V()[y+12>>>2>>>0]}}(r+76),fragment:function(y){if(y)return{module:G.getJsObject(V()[y+4>>>2>>>0]),constants:G.makePipelineConstants(V()[y+16>>>2>>>0],V()[y+20>>>2>>>0]),targets:s(V()[y+24>>>2>>>0],V()[y+28>>>2>>>0]),entryPoint:G.makeStringFromOptionalStringView(y+8)}}(V()[r+92>>>2>>>0])}},fillLimitStruct:(r,i)=>{function a(l,d){var f=r[l];R()[i+d>>>2>>>0]=f}function s(l,d){var f=r[l];re[i+d>>>3]=BigInt(f)}a("maxTextureDimension1D",4),a("maxTextureDimension2D",8),a("maxTextureDimension3D",12),a("maxTextureArrayLayers",16),a("maxBindGroups",20),a("maxBindGroupsPlusVertexBuffers",24),a("maxBindingsPerBindGroup",28),a("maxDynamicUniformBuffersPerPipelineLayout",32),a("maxDynamicStorageBuffersPerPipelineLayout",36),a("maxSampledTexturesPerShaderStage",40),a("maxSamplersPerShaderStage",44),a("maxStorageBuffersPerShaderStage",48),a("maxStorageTexturesPerShaderStage",52),a("maxUniformBuffersPerShaderStage",56),a("minUniformBufferOffsetAlignment",80),a("minStorageBufferOffsetAlignment",84),s("maxUniformBufferBindingSize",64),s("maxStorageBufferBindingSize",72),a("maxVertexBuffers",88),s("maxBufferSize",96),a("maxVertexAttributes",104),a("maxVertexBufferArrayStride",108),a("maxInterStageShaderVariables",112),a("maxColorAttachments",116),a("maxColorAttachmentBytesPerSample",120),a("maxComputeWorkgroupStorageSize",124),a("maxComputeInvocationsPerWorkgroup",128),a("maxComputeWorkgroupSizeX",132),a("maxComputeWorkgroupSizeY",136),a("maxComputeWorkgroupSizeZ",140),a("maxComputeWorkgroupsPerDimension",144)},Int_BufferMapState:{unmapped:1,pending:2,mapped:3},Int_CompilationMessageType:{error:1,warning:2,info:3},Int_DeviceLostReason:{undefined:1,unknown:1,destroyed:2},Int_PreferredFormat:{rgba8unorm:18,bgra8unorm:23},WGSLLanguageFeatureName:{1:"readonly_and_readwrite_storage_textures",2:"packed_4x8_integer_dot_product",3:"unrestricted_pointer_parameters",4:"pointer_composite_access",5:"sized_binding_array"},AddressMode:[,"clamp-to-edge","repeat","mirror-repeat"],BlendFactor:[,"zero","one","src","one-minus-src","src-alpha","one-minus-src-alpha","dst","one-minus-dst","dst-alpha","one-minus-dst-alpha","src-alpha-saturated","constant","one-minus-constant","src1","one-minus-src1","src1alpha","one-minus-src1alpha"],BlendOperation:[,"add","subtract","reverse-subtract","min","max"],BufferBindingType:["binding-not-used",,"uniform","storage","read-only-storage"],BufferMapState:{1:"unmapped",2:"pending",3:"mapped"},CompareFunction:[,"never","less","equal","less-equal","greater","not-equal","greater-equal","always"],CompilationInfoRequestStatus:{1:"success",2:"callback-cancelled"},CompositeAlphaMode:[,"opaque","premultiplied","unpremultiplied","inherit"],CullMode:[,"none","front","back"],ErrorFilter:{1:"validation",2:"out-of-memory",3:"internal"},FeatureLevel:[,"compatibility","core"],FeatureName:{1:"depth-clip-control",2:"depth32float-stencil8",3:"timestamp-query",4:"texture-compression-bc",5:"texture-compression-bc-sliced-3d",6:"texture-compression-etc2",7:"texture-compression-astc",8:"texture-compression-astc-sliced-3d",9:"indirect-first-instance",10:"shader-f16",11:"rg11b10ufloat-renderable",12:"bgra8unorm-storage",13:"float32-filterable",14:"float32-blendable",15:"clip-distances",16:"dual-source-blending",17:"subgroups",18:"core-features-and-limits",327688:"subgroups-f16",327693:"chromium-experimental-unorm16-texture-formats",327694:"chromium-experimental-snorm16-texture-formats",327733:"chromium-experimental-multi-draw-indirect"},FilterMode:[,"nearest","linear"],FrontFace:[,"ccw","cw"],IndexFormat:[,"uint16","uint32"],LoadOp:[,"load","clear"],MipmapFilterMode:[,"nearest","linear"],OptionalBool:["false","true"],PowerPreference:[,"low-power","high-performance"],PredefinedColorSpace:{1:"srgb",2:"display-p3"},PrimitiveTopology:[,"point-list","line-list","line-strip","triangle-list","triangle-strip"],QueryType:{1:"occlusion",2:"timestamp"},SamplerBindingType:["binding-not-used",,"filtering","non-filtering","comparison"],Status:{1:"success",2:"error"},StencilOperation:[,"keep","zero","replace","invert","increment-clamp","decrement-clamp","increment-wrap","decrement-wrap"],StorageTextureAccess:["binding-not-used",,"write-only","read-only","read-write"],StoreOp:[,"store","discard"],SurfaceGetCurrentTextureStatus:{1:"success-optimal",2:"success-suboptimal",3:"timeout",4:"outdated",5:"lost",6:"error"},TextureAspect:[,"all","stencil-only","depth-only"],TextureDimension:[,"1d","2d","3d"],TextureFormat:[,"r8unorm","r8snorm","r8uint","r8sint","r16uint","r16sint","r16float","rg8unorm","rg8snorm","rg8uint","rg8sint","r32float","r32uint","r32sint","rg16uint","rg16sint","rg16float","rgba8unorm","rgba8unorm-srgb","rgba8snorm","rgba8uint","rgba8sint","bgra8unorm","bgra8unorm-srgb","rgb10a2uint","rgb10a2unorm","rg11b10ufloat","rgb9e5ufloat","rg32float","rg32uint","rg32sint","rgba16uint","rgba16sint","rgba16float","rgba32float","rgba32uint","rgba32sint","stencil8","depth16unorm","depth24plus","depth24plus-stencil8","depth32float","depth32float-stencil8","bc1-rgba-unorm","bc1-rgba-unorm-srgb","bc2-rgba-unorm","bc2-rgba-unorm-srgb","bc3-rgba-unorm","bc3-rgba-unorm-srgb","bc4-r-unorm","bc4-r-snorm","bc5-rg-unorm","bc5-rg-snorm","bc6h-rgb-ufloat","bc6h-rgb-float","bc7-rgba-unorm","bc7-rgba-unorm-srgb","etc2-rgb8unorm","etc2-rgb8unorm-srgb","etc2-rgb8a1unorm","etc2-rgb8a1unorm-srgb","etc2-rgba8unorm","etc2-rgba8unorm-srgb","eac-r11unorm","eac-r11snorm","eac-rg11unorm","eac-rg11snorm","astc-4x4-unorm","astc-4x4-unorm-srgb","astc-5x4-unorm","astc-5x4-unorm-srgb","astc-5x5-unorm","astc-5x5-unorm-srgb","astc-6x5-unorm","astc-6x5-unorm-srgb","astc-6x6-unorm","astc-6x6-unorm-srgb","astc-8x5-unorm","astc-8x5-unorm-srgb","astc-8x6-unorm","astc-8x6-unorm-srgb","astc-8x8-unorm","astc-8x8-unorm-srgb","astc-10x5-unorm","astc-10x5-unorm-srgb","astc-10x6-unorm","astc-10x6-unorm-srgb","astc-10x8-unorm","astc-10x8-unorm-srgb","astc-10x10-unorm","astc-10x10-unorm-srgb","astc-12x10-unorm","astc-12x10-unorm-srgb","astc-12x12-unorm","astc-12x12-unorm-srgb"],TextureSampleType:["binding-not-used",,"float","unfilterable-float","depth","sint","uint"],TextureViewDimension:[,"1d","2d","2d-array","cube","cube-array","3d"],ToneMappingMode:{1:"standard",2:"extended"},VertexFormat:{1:"uint8",2:"uint8x2",3:"uint8x4",4:"sint8",5:"sint8x2",6:"sint8x4",7:"unorm8",8:"unorm8x2",9:"unorm8x4",10:"snorm8",11:"snorm8x2",12:"snorm8x4",13:"uint16",14:"uint16x2",15:"uint16x4",16:"sint16",17:"sint16x2",18:"sint16x4",19:"unorm16",20:"unorm16x2",21:"unorm16x4",22:"snorm16",23:"snorm16x2",24:"snorm16x4",25:"float16",26:"float16x2",27:"float16x4",28:"float32",29:"float32x2",30:"float32x3",31:"float32x4",32:"uint32",33:"uint32x2",34:"uint32x3",35:"uint32x4",36:"sint32",37:"sint32x2",38:"sint32x3",39:"sint32x4",40:"unorm10-10-10-2",41:"unorm8x4-bgra"},VertexStepMode:[,"vertex","instance"],FeatureNameString2Enum:{"depth-clip-control":"1","depth32float-stencil8":"2","timestamp-query":"3","texture-compression-bc":"4","texture-compression-bc-sliced-3d":"5","texture-compression-etc2":"6","texture-compression-astc":"7","texture-compression-astc-sliced-3d":"8","indirect-first-instance":"9","shader-f16":"10","rg11b10ufloat-renderable":"11","bgra8unorm-storage":"12","float32-filterable":"13","float32-blendable":"14","clip-distances":"15","dual-source-blending":"16",subgroups:"17","core-features-and-limits":"18","subgroups-f16":"327688","chromium-experimental-unorm16-texture-formats":"327693","chromium-experimental-snorm16-texture-formats":"327694","chromium-experimental-multi-draw-indirect":"327733"},WGSLLanguageFeatureNameString2Enum:{readonly_and_readwrite_storage_textures:"1",packed_4x8_integer_dot_product:"2",unrestricted_pointer_parameters:"3",pointer_composite_access:"4",sized_binding_array:"5"}};function Uy(r,i,a,s,l,d){r>>>=0,i=qe(i),a=qe(a),s>>>=0,l>>>=0,d>>>=0;var f=G.getJsObject(r),h={};if(d){var y=V()[d+12>>>2>>>0];if(y){var v=V()[d+16>>>2>>>0];h.requiredFeatures=Array.from(V().subarray(v>>>2>>>0,v+4*y>>>2>>>0),J=>G.FeatureName[J])}var $=V()[d+20>>>2>>>0];if($){let J=function(oe,ye){var xe=$+ye,De=V()[xe>>>2>>>0];De!=4294967295&&(I[oe]=De)},ee=function(oe,ye){var xe=$+ye,De=V()[xe>>>2>>>0],Le=V()[xe+4>>>2>>>0];De==4294967295&&Le==4294967295||(I[oe]=4294967296*V()[xe+4>>>2>>>0]+V()[xe>>>2>>>0])};var H=J,Z=ee,I={};J("maxTextureDimension1D",4),J("maxTextureDimension2D",8),J("maxTextureDimension3D",12),J("maxTextureArrayLayers",16),J("maxBindGroups",20),J("maxBindGroupsPlusVertexBuffers",24),J("maxDynamicUniformBuffersPerPipelineLayout",32),J("maxDynamicStorageBuffersPerPipelineLayout",36),J("maxSampledTexturesPerShaderStage",40),J("maxSamplersPerShaderStage",44),J("maxStorageBuffersPerShaderStage",48),J("maxStorageTexturesPerShaderStage",52),J("maxUniformBuffersPerShaderStage",56),J("minUniformBufferOffsetAlignment",80),J("minStorageBufferOffsetAlignment",84),ee("maxUniformBufferBindingSize",64),ee("maxStorageBufferBindingSize",72),J("maxVertexBuffers",88),ee("maxBufferSize",96),J("maxVertexAttributes",104),J("maxVertexBufferArrayStride",108),J("maxInterStageShaderVariables",112),J("maxColorAttachments",116),J("maxColorAttachmentBytesPerSample",120),J("maxComputeWorkgroupStorageSize",124),J("maxComputeInvocationsPerWorkgroup",128),J("maxComputeWorkgroupSizeX",132),J("maxComputeWorkgroupSizeY",136),J("maxComputeWorkgroupSizeZ",140),J("maxComputeWorkgroupsPerDimension",144),h.requiredLimits=I}var P=V()[d+24>>>2>>>0];if(P){var D={label:G.makeStringFromOptionalStringView(P+4)};h.defaultQueue=D}h.label=G.makeStringFromOptionalStringView(d+4)}Nt(),G.Internals.futureInsert(i,f.requestDevice(h).then(J=>{mt(),G.Internals.jsObjectInsert(l,J.queue),G.Internals.jsObjectInsert(s,J),a&&(Nt(),G.Internals.futureInsert(a,J.lost.then(ee=>{mt(),J.onuncapturederror=xe=>{};var oe=j(),ye=ht(ee.message);ni(a,G.Int_DeviceLostReason[ee.reason],ye),M(oe)}))),J.onuncapturederror=ee=>{var oe=5;ee.error instanceof GPUValidationError?oe=2:ee.error instanceof GPUOutOfMemoryError?oe=3:ee.error instanceof GPUInternalError&&(oe=4);var ye=j(),xe=ht(ee.error.message);Ia(s,oe,xe),M(ye)},ai(i,1,s,0)},J=>{mt();var ee=j(),oe=ht(J.message);ai(i,3,s,oe),a&&ni(a,4,oe),M(ee)}))}function Ny(r){r>>>=0;var i=G.getJsObject(r),a=G.Internals.bufferOnUnmaps[r];if(a){for(var s=0;s>>=0,i>>>=0,a>>>=0;var s,l=G.getJsObject(r);a===0&&lr("getMappedRange size=0 no longer means WGPU_WHOLE_MAP_SIZE"),a==4294967295&&(a=void 0);try{s=l.getMappedRange(i,a)}catch{return 0}var d=li(16,s.byteLength);return ke().set(new Uint8Array(s),d>>>0),G.Internals.bufferOnUnmaps[r].push(()=>rt(d)),d}function Wy(r,i,a){r>>>=0,i>>>=0,a>>>=0;var s,l=G.getJsObject(r);a===0&&lr("getMappedRange size=0 no longer means WGPU_WHOLE_MAP_SIZE"),a==4294967295&&(a=void 0);try{s=l.getMappedRange(i,a)}catch{return 0}var d=li(16,s.byteLength);return ke().fill(0,d,s.byteLength),G.Internals.bufferOnUnmaps[r].push(()=>{new Uint8Array(s).set(ke().subarray(d>>>0,d+s.byteLength>>>0)),rt(d)}),d}var Ly=function(r,i,a,s,l){r>>>=0,i=qe(i),a=qe(a),s>>>=0,l>>>=0;var d=G.getJsObject(r);G.Internals.bufferOnUnmaps[r]=[],l==4294967295&&(l=void 0),Nt(),G.Internals.futureInsert(i,d.mapAsync(a,s,l).then(()=>{mt(),ii(i,1,0)},f=>{mt(),j();var h=ht(f.message),y=f.name==="AbortError"?4:f.name==="OperationError"?3:0;ii(i,y,h),delete G.Internals.bufferOnUnmaps[r]}))};function Gy(r){r>>>=0;var i=G.getJsObject(r),a=G.Internals.bufferOnUnmaps[r];if(a){for(var s=0;s>>=0,delete G.Internals.jsObjects[r]}function Fy(r,i,a){r>>>=0,i>>>=0,a>>>=0;var s,l=!!V()[i+32>>>2>>>0],d={label:G.makeStringFromOptionalStringView(i+4),usage:V()[i+16>>>2>>>0],size:4294967296*V()[i+4+24>>>2>>>0]+V()[i+24>>>2>>>0],mappedAtCreation:l},f=G.getJsObject(r);try{s=f.createBuffer(d)}catch{return!1}return G.Internals.jsObjectInsert(a,s),l&&(G.Internals.bufferOnUnmaps[a]=[]),!0}function qy(r,i,a){r>>>=0,i>>>=0,a>>>=0;var s=V()[i>>>2>>>0],l=V()[s+4>>>2>>>0],d={label:G.makeStringFromOptionalStringView(i+4),code:""};l===2&&(d.code=G.makeStringFromStringView(s+8));var f=G.getJsObject(r);G.Internals.jsObjectInsert(a,f.createShaderModule(d))}var Ky=r=>{G.getJsObject(r).destroy()},Jy=function(r,i){r>>>=0,i=qe(i);var a=G.getJsObject(r);Nt(),G.Internals.futureInsert(i,a.popErrorScope().then(s=>{mt();var l=5;s?s instanceof GPUValidationError?l=2:s instanceof GPUOutOfMemoryError?l=3:s instanceof GPUInternalError&&(l=4):l=1;var d=j(),f=s?ht(s.message):0;oi(i,1,l,f),M(d)},s=>{mt();var l=j(),d=ht(s.message);oi(i,1,5,d),M(l)}))};function Zy(r,i,a,s){var l;if(i=qe(i),s>>>=0,a>>>=0){var d=V()[a+4>>>2>>>0];l={featureLevel:G.FeatureLevel[d],powerPreference:G.PowerPreference[V()[a+8>>>2>>>0]],forceFallbackAdapter:!!V()[a+12>>>2>>>0]};var f=V()[a>>>2>>>0];if(f!==0){V()[f+4>>>2>>>0];var h=f;l.xrCompatible=!!V()[h+8>>>2>>>0]}}if(!("gpu"in navigator)){var y=j(),v=ht("WebGPU not available on this browser (navigator.gpu is not available)");return cr(i,3,s,v),void M(y)}Nt(),G.Internals.futureInsert(i,navigator.gpu.requestAdapter(l).then($=>{if(mt(),$)G.Internals.jsObjectInsert(s,$),cr(i,1,s,0);else{var I=j(),P=ht("WebGPU not available on this browser (requestAdapter returned null)");cr(i,3,s,P),M(I)}},$=>{mt();var I=j(),P=ht($.message);cr(i,4,s,P),M(I)}))}var na=function(r,i,a){return r>>>=0,i>>>=0,a>>>=0,de.handleAsync(async()=>{var s=[];if(a){var l=(4294967296*V()[a+4>>>2>>>0]+V()[a>>>2>>>0])/1e6;s.length=i+1,s[i]=new Promise(y=>setTimeout(y,l,0))}else s.length=i;for(var d=0;d>>2>>>0]+V()[r+8*d>>>2>>>0];if(!(f in G.Internals.futures))return f;s[d]=G.Internals.futures[f]}let h=await Promise.race(s);return delete G.Internals.futures[h],h})};na.isAsync=!0;var qn={},dr=()=>{if(!dr.strings){var r={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:(typeof navigator=="object"&&navigator.languages&&navigator.languages[0]||"C").replace("-","_")+".UTF-8",_:"./this.program"};for(var i in qn)qn[i]===void 0?delete r[i]:r[i]=qn[i];var a=[];for(var i in r)a.push(`${i}=${r[i]}`);dr.strings=a}return dr.strings},ia=function(r,i){if(m)return Re(19,0,1,r,i);r>>>=0,i>>>=0;var a=0;return dr().forEach((s,l)=>{var d=i+a;V()[r+4*l>>>2>>>0]=d,((f,h)=>{for(var y=0;y>>0]=f.charCodeAt(y);me()[h>>>0]=0})(s,d),a+=s.length+1}),0},oa=function(r,i){if(m)return Re(20,0,1,r,i);r>>>=0,i>>>=0;var a=dr();V()[r>>>2>>>0]=a.length;var s=0;return a.forEach(l=>s+=l.length+1),V()[i>>>2>>>0]=s,0};function aa(r){return m?Re(21,0,1,r):52}function sa(r,i,a,s){return m?Re(22,0,1,r,i,a,s):(i>>>=0,a>>>=0,s>>>=0,52)}function ua(r,i,a,s){return m?Re(23,0,1,r,i,a,s):(i=qe(i),s>>>=0,70)}var Qy=[null,[],[]],Yy=(r,i)=>{var a=Qy[r];i===0||i===10?((r===1?A:O)(To(a)),a.length=0):a.push(i)};function la(r,i,a,s){if(m)return Re(24,0,1,r,i,a,s);i>>>=0,a>>>=0,s>>>=0;for(var l=0,d=0;d>>2>>>0],h=V()[i+4>>>2>>>0];i+=8;for(var y=0;y>>0]);l+=h}return V()[s>>>2>>>0]=l,0}function Xy(r){return r>>>0}function eb(r,i){r>>>=0,i>>>=0;var a=G.getJsObject(r);return G.fillLimitStruct(a.limits,i),1}function tb(r,i){return r>>>=0,G.getJsObject(r).features.has(G.FeatureName[i])}var rb=function(r){r>>>=0;var i=G.getJsObject(r).size;return BigInt(i)},nb=function(r){r>>>=0;var i=G.getJsObject(r).usage;return BigInt(i)};function ib(r,i){var a;r>>>=0,(i>>>=0)&&(a={label:G.makeStringFromOptionalStringView(i+4),timestampWrites:G.makePassTimestampWrites(V()[i+12>>>2>>>0])});var s=G.getJsObject(r),l=Xn(0);return G.Internals.jsObjectInsert(l,s.beginComputePass(a)),l}function ob(r,i,a,s,l,d){r>>>=0,i>>>=0,a=qe(a),s>>>=0,l=qe(l),d=qe(d);var f=G.getJsObject(r),h=G.getJsObject(i),y=G.getJsObject(s);f.copyBufferToBuffer(h,a,y,l,d)}function ab(r,i){r>>>=0;var a=G.getJsObject(r),s=Qn(0);return G.Internals.jsObjectInsert(s,a.finish()),s}function sb(r,i,a,s,l,d){r>>>=0,i>>>=0,l>>>=0,d=qe(d);var f=G.getJsObject(r),h=G.getJsObject(i),y=G.getJsObject(l);f.resolveQuerySet(h,a,s,y,d)}function ub(r,i,a,s){r>>>=0,G.getJsObject(r).dispatchWorkgroups(i,a,s)}function lb(r){r>>>=0,G.getJsObject(r).end()}function db(r,i,a,s,l){r>>>=0,a>>>=0,s>>>=0,l>>>=0;var d=G.getJsObject(r),f=G.getJsObject(a);if(s==0)d.setBindGroup(i,f);else{for(var h=[],y=0;y>>2>>>0]);d.setBindGroup(i,f,h)}}function cb(r,i){r>>>=0,i>>>=0;var a=G.getJsObject(r),s=G.getJsObject(i);a.setPipeline(s)}function pb(r,i,a){r>>>=0,i>>>=0;var s=G.getJsObject(r),l=G.getJsObject(i);s.writeTimestamp(l,a)}function fb(r,i){r>>>=0;var a=G.getJsObject(r),s=Zn(0);return G.Internals.jsObjectInsert(s,a.getBindGroupLayout(i)),s}var mb=r=>V()[r>>>2>>>0]+4294967296*R()[r+4>>>2>>>0];function hb(r,i){function a(f){var h=V()[f+8>>>2>>>0],y=V()[f+32>>>2>>>0],v=V()[f+36>>>2>>>0],$=V()[f+4>>>2>>>0];if(h){var I=mb(f+24);return I==-1&&(I=void 0),{binding:$,resource:{buffer:G.getJsObject(h),offset:4294967296*V()[f+4+16>>>2>>>0]+V()[f+16>>>2>>>0],size:I}}}return y?{binding:$,resource:G.getJsObject(y)}:{binding:$,resource:G.getJsObject(v)}}r>>>=0,i>>>=0;var s={label:G.makeStringFromOptionalStringView(i+4),layout:G.getJsObject(V()[i+12>>>2>>>0]),entries:function(f,h){for(var y=[],v=0;v>>2>>>0],V()[i+20>>>2>>>0])},l=G.getJsObject(r),d=Jn(0);return G.Internals.jsObjectInsert(d,l.createBindGroup(s)),d}function gb(r,i){var a;r>>>=0,(i>>>=0)&&(a={label:G.makeStringFromOptionalStringView(i+4)});var s=G.getJsObject(r),l=Yn(0);return G.Internals.jsObjectInsert(l,s.createCommandEncoder(a)),l}function yb(r,i){r>>>=0,i>>>=0;var a=G.makeComputePipelineDesc(i),s=G.getJsObject(r),l=ei(0);return G.Internals.jsObjectInsert(l,s.createComputePipeline(a)),l}function bb(r,i){r>>>=0,i>>>=0;var a={type:G.QueryType[V()[i+12>>>2>>>0]],count:V()[i+16>>>2>>>0]},s=G.getJsObject(r),l=ti(0);return G.Internals.jsObjectInsert(l,s.createQuerySet(a)),l}var _b=r=>{var i=kt(r)+1,a=Yt(i);return a&&Et(r,a,i),a};function wb(r,i){r>>>=0,i>>>=0;var a=G.getJsObject(r),s=a.adapterInfo.vendor+a.adapterInfo.architecture+a.adapterInfo.device+a.adapterInfo.description,l=_b(s),d=kt(a.adapterInfo.vendor);G.setStringView(i+4,l,d),l+=d;var f=kt(a.adapterInfo.architecture);G.setStringView(i+12,l,f),l+=f;var h=kt(a.adapterInfo.device);G.setStringView(i+20,l,h),l+=h;var y=kt(a.adapterInfo.description);G.setStringView(i+28,l,y),l+=y,R()[i+36>>>2>>>0]=2;var v=a.adapterInfo.isFallbackAdapter?3:4;return R()[i+40>>>2>>>0]=v,R()[i+44>>>2>>>0]=0,R()[i+48>>>2>>>0]=0,1}var vb=function(r,i){r>>>=0,i>>>=0;var a=G.getJsObject(r),s=Yt(4*a.features.size),l=0,d=0;a.features.forEach(f=>{var h=G.FeatureNameString2Enum[f];h!==void 0&&(R()[s+l>>>2>>>0]=h,l+=4,d++)}),V()[i+4>>>2>>>0]=s,V()[i>>>2>>>0]=d};function xb(r,i){r>>>=0,i>>>=0;var a=G.getJsObject(r);return G.fillLimitStruct(a.limits,i),1}function $b(r,i){return r>>>=0,G.getJsObject(r).features.has(G.FeatureName[i])}function Cb(r,i){r>>>=0,G.getJsObject(r).pushErrorScope(G.ErrorFilter[i])}var Sb=function(r,i,a){r>>>=0,i>>>=0,a>>>=0;var s=G.getJsObject(r),l=Array.from(R().subarray(a>>>2>>>0,a+4*i>>>2>>>0),d=>G.getJsObject(d));s.submit(l)};function Tb(r,i,a,s,l){r>>>=0,i>>>=0,a=qe(a),s>>>=0,l>>>=0;var d=G.getJsObject(r),f=G.getJsObject(i),h=ke().subarray(s>>>0,s+l>>>0);d.writeBuffer(f,a,h,0,l)}Te.init(),(()=>{for(var r=new Array(256),i=0;i<256;++i)r[i]=String.fromCharCode(i);No=r})(),Vo=n.BindingError=class extends Error{constructor(r){super(r),this.name="BindingError"}},n.InternalError=class extends Error{constructor(r){super(r),this.name="InternalError"}},St.push(0,1,void 0,1,null,1,!0,1,!1,1),n.count_emval_handles=()=>St.length/2-5-Nn.length;var Ib=[Bn,xo,$o,Io,Ao,ko,Eo,Po,Oo,zo,Bo,Do,jo,Mo,Ro,Uo,Jo,Zo,Qo,ia,oa,aa,sa,ua,la],E=await async function(){function r(l,d){return E=l.exports,E=function(h){var y,v=I=>P=>I(P)>>>0,$=I=>()=>I()>>>0;return(h=Object.assign({},h)).rf=v(h.rf),h.Wf=$(h.Wf),h.Yf=v(h.Yf),h.Jg=(y=h.Jg,(I,P)=>y(I,P)>>>0),h.Og=v(h.Og),h.Pg=$(h.Pg),h.Tg=v(h.Tg),h}(E=de.instrumentWasmExports(E)),f=E.Ag,Te.tlsInitFunctions.push(f),E.Bg,W=d,fo(),E;var f}po();var i,a,s=mo();if(n.instantiateWasm)return new Promise((l,d)=>{n.instantiateWasm(s,(f,h)=>{r(f,h),l(f.exports)})});if(m)return new Promise(l=>{Je=d=>{var f=new WebAssembly.Instance(d,mo());l(r(f,d))}});At??=n.locateFile?(i="ort-wasm-simd-threaded.jsep.wasm",n.locateFile?n.locateFile(i,T):T+i):new URL("ort-wasm-simd-threaded.jsep.wasm",import.meta.url).href;try{return r((a=await ag(ie,At,s)).instance,a.module)}catch(l){return o(l),Promise.reject(l)}}(),da=r=>(da=E.rf)(r),ca=()=>(ca=E.sf)(),Kn=(n._OrtInit=(r,i)=>(n._OrtInit=E.tf)(r,i),n._OrtGetLastError=(r,i)=>(n._OrtGetLastError=E.uf)(r,i),n._OrtCreateSessionOptions=(r,i,a,s,l,d,f,h,y,v)=>(n._OrtCreateSessionOptions=E.vf)(r,i,a,s,l,d,f,h,y,v),n._OrtAppendExecutionProvider=(r,i,a,s,l)=>(n._OrtAppendExecutionProvider=E.wf)(r,i,a,s,l),n._OrtAddFreeDimensionOverride=(r,i,a)=>(n._OrtAddFreeDimensionOverride=E.xf)(r,i,a),n._OrtAddSessionConfigEntry=(r,i,a)=>(n._OrtAddSessionConfigEntry=E.yf)(r,i,a),n._OrtReleaseSessionOptions=r=>(n._OrtReleaseSessionOptions=E.zf)(r),n._OrtCreateSession=(r,i,a)=>(n._OrtCreateSession=E.Af)(r,i,a),n._OrtReleaseSession=r=>(n._OrtReleaseSession=E.Bf)(r),n._OrtGetInputOutputCount=(r,i,a)=>(n._OrtGetInputOutputCount=E.Cf)(r,i,a),n._OrtGetInputOutputMetadata=(r,i,a,s)=>(n._OrtGetInputOutputMetadata=E.Df)(r,i,a,s),n._OrtFree=r=>(n._OrtFree=E.Ef)(r),n._OrtCreateTensor=(r,i,a,s,l,d)=>(n._OrtCreateTensor=E.Ff)(r,i,a,s,l,d),n._OrtGetTensorData=(r,i,a,s,l)=>(n._OrtGetTensorData=E.Gf)(r,i,a,s,l),n._OrtReleaseTensor=r=>(n._OrtReleaseTensor=E.Hf)(r),n._OrtCreateRunOptions=(r,i,a,s)=>(n._OrtCreateRunOptions=E.If)(r,i,a,s),n._OrtAddRunConfigEntry=(r,i,a)=>(n._OrtAddRunConfigEntry=E.Jf)(r,i,a),n._OrtReleaseRunOptions=r=>(n._OrtReleaseRunOptions=E.Kf)(r),n._OrtCreateBinding=r=>(n._OrtCreateBinding=E.Lf)(r),n._OrtBindInput=(r,i,a)=>(n._OrtBindInput=E.Mf)(r,i,a),n._OrtBindOutput=(r,i,a,s)=>(n._OrtBindOutput=E.Nf)(r,i,a,s),n._OrtClearBoundOutputs=r=>(n._OrtClearBoundOutputs=E.Of)(r),n._OrtReleaseBinding=r=>(n._OrtReleaseBinding=E.Pf)(r),n._OrtRunWithBinding=(r,i,a,s,l)=>(n._OrtRunWithBinding=E.Qf)(r,i,a,s,l),n._OrtRun=(r,i,a,s,l,d,f,h)=>(n._OrtRun=E.Rf)(r,i,a,s,l,d,f,h),n._OrtEndProfiling=r=>(n._OrtEndProfiling=E.Sf)(r),n._OrtGetWebGpuDevice=r=>(Kn=n._OrtGetWebGpuDevice=E.Tf)(r)),Br=(n._JsepOutput=(r,i,a)=>(n._JsepOutput=E.Uf)(r,i,a),n._JsepGetNodeName=r=>(n._JsepGetNodeName=E.Vf)(r),()=>(Br=E.Wf)()),rt=n._free=r=>(rt=n._free=E.Xf)(r),Yt=n._malloc=r=>(Yt=n._malloc=E.Yf)(r),pa=n._wgpuBufferRelease=r=>(pa=n._wgpuBufferRelease=E.Zf)(r),fa=n._wgpuCreateInstance=r=>(fa=n._wgpuCreateInstance=E._f)(r),Jn=r=>(Jn=E.$f)(r),Zn=r=>(Zn=E.ag)(r),Qn=r=>(Qn=E.bg)(r),Yn=r=>(Yn=E.cg)(r),Xn=r=>(Xn=E.dg)(r),ei=r=>(ei=E.eg)(r),ma=r=>(ma=E.fg)(r),ti=r=>(ti=E.gg)(r),ha=r=>(ha=E.hg)(r),ga=r=>(ga=E.ig)(r),ya=r=>(ya=E.jg)(r),ba=r=>(ba=E.kg)(r),_a=r=>(_a=E.lg)(r),wa=r=>(wa=E.mg)(r),va=r=>(va=E.ng)(r),xa=r=>(xa=E.og)(r),$a=r=>($a=E.pg)(r),Ca=(r,i)=>(Ca=E.qg)(r,i),Sa=(r,i)=>(Sa=E.rg)(r,i),ri=r=>(ri=E.sg)(r),Ta=r=>(Ta=E.tg)(r),ni=(r,i,a)=>(ni=E.ug)(r,i,a),ii=(r,i,a)=>(ii=E.vg)(r,i,a),oi=(r,i,a,s)=>(oi=E.wg)(r,i,a,s),cr=(r,i,a,s)=>(cr=E.xg)(r,i,a,s),ai=(r,i,a,s)=>(ai=E.yg)(r,i,a,s),Ia=(r,i,a)=>(Ia=E.zg)(r,i,a),si=(r,i,a,s,l,d)=>(si=E.Cg)(r,i,a,s,l,d),Aa=()=>(Aa=E.Dg)(),ka=(r,i,a,s,l)=>(ka=E.Eg)(r,i,a,s,l),Ea=r=>(Ea=E.Fg)(r),ui=r=>(ui=E.Gg)(r),Pa=(r,i)=>(Pa=E.Hg)(r,i),Oa=()=>(Oa=E.Ig)(),li=(r,i)=>(li=E.Jg)(r,i),U=(r,i)=>(U=E.Kg)(r,i),za=r=>(za=E.Lg)(r),Ba=(r,i)=>(Ba=E.Mg)(r,i),Da=r=>(Da=E.Ng)(r),ja=r=>(ja=E.Og)(r),Ma=()=>(Ma=E.Pg)(),Ra=r=>(Ra=E.Qg)(r),Ua=r=>(Ua=E.Rg)(r),Na=(r,i,a)=>(Na=E.Sg)(r,i,a),Va=r=>(Va=E.Tg)(r),Wa=n.dynCall_vii=(r,i,a)=>(Wa=n.dynCall_vii=E.Ug)(r,i,a),La=n.dynCall_iiii=(r,i,a,s)=>(La=n.dynCall_iiii=E.Vg)(r,i,a,s),Ga=n.dynCall_iii=(r,i,a)=>(Ga=n.dynCall_iii=E.Wg)(r,i,a),di=n.dynCall_ii=(r,i)=>(di=n.dynCall_ii=E.Xg)(r,i),Ha=n.dynCall_iiiiiii=(r,i,a,s,l,d,f)=>(Ha=n.dynCall_iiiiiii=E.Yg)(r,i,a,s,l,d,f),Fa=n.dynCall_vi=(r,i)=>(Fa=n.dynCall_vi=E.Zg)(r,i),qa=n.dynCall_v=r=>(qa=n.dynCall_v=E._g)(r),Ka=n.dynCall_iiiiii=(r,i,a,s,l,d)=>(Ka=n.dynCall_iiiiii=E.$g)(r,i,a,s,l,d),ci=n.dynCall_viii=(r,i,a,s)=>(ci=n.dynCall_viii=E.ah)(r,i,a,s),Ja=n.dynCall_i=r=>(Ja=n.dynCall_i=E.bh)(r),Za=n.dynCall_iiiii=(r,i,a,s,l)=>(Za=n.dynCall_iiiii=E.ch)(r,i,a,s,l),Qa=n.dynCall_viiii=(r,i,a,s,l)=>(Qa=n.dynCall_viiii=E.dh)(r,i,a,s,l),Ya=n.dynCall_viiiii=(r,i,a,s,l,d)=>(Ya=n.dynCall_viiiii=E.eh)(r,i,a,s,l,d),Xa=n.dynCall_vijii=(r,i,a,s,l)=>(Xa=n.dynCall_vijii=E.fh)(r,i,a,s,l),es=n.dynCall_vijjiii=(r,i,a,s,l,d,f)=>(es=n.dynCall_vijjiii=E.gh)(r,i,a,s,l,d,f),ts=n.dynCall_vijj=(r,i,a,s)=>(ts=n.dynCall_vijj=E.hh)(r,i,a,s),rs=n.dynCall_vijji=(r,i,a,s,l)=>(rs=n.dynCall_vijji=E.ih)(r,i,a,s,l),ns=n.dynCall_diii=(r,i,a,s)=>(ns=n.dynCall_diii=E.jh)(r,i,a,s),is=n.dynCall_viiij=(r,i,a,s,l)=>(is=n.dynCall_viiij=E.kh)(r,i,a,s,l),os=n.dynCall_iiij=(r,i,a,s)=>(os=n.dynCall_iiij=E.lh)(r,i,a,s),as=n.dynCall_viijii=(r,i,a,s,l,d)=>(as=n.dynCall_viijii=E.mh)(r,i,a,s,l,d),ss=n.dynCall_iif=(r,i,a)=>(ss=n.dynCall_iif=E.nh)(r,i,a),us=n.dynCall_jiii=(r,i,a,s)=>(us=n.dynCall_jiii=E.oh)(r,i,a,s),ls=n.dynCall_viijijj=(r,i,a,s,l,d,f)=>(ls=n.dynCall_viijijj=E.ph)(r,i,a,s,l,d,f),ds=n.dynCall_vij=(r,i,a)=>(ds=n.dynCall_vij=E.qh)(r,i,a),cs=n.dynCall_viij=(r,i,a,s)=>(cs=n.dynCall_viij=E.rh)(r,i,a,s),ps=n.dynCall_iiiiid=(r,i,a,s,l,d)=>(ps=n.dynCall_iiiiid=E.sh)(r,i,a,s,l,d),fs=n.dynCall_viijj=(r,i,a,s,l)=>(fs=n.dynCall_viijj=E.th)(r,i,a,s,l),ms=n.dynCall_iij=(r,i,a)=>(ms=n.dynCall_iij=E.uh)(r,i,a),hs=n.dynCall_iiiiij=(r,i,a,s,l,d)=>(hs=n.dynCall_iiiiij=E.vh)(r,i,a,s,l,d),gs=n.dynCall_j=r=>(gs=n.dynCall_j=E.wh)(r),ys=n.dynCall_viiiiiii=(r,i,a,s,l,d,f,h)=>(ys=n.dynCall_viiiiiii=E.xh)(r,i,a,s,l,d,f,h),bs=n.dynCall_iiiiiiiii=(r,i,a,s,l,d,f,h,y)=>(bs=n.dynCall_iiiiiiiii=E.yh)(r,i,a,s,l,d,f,h,y),_s=n.dynCall_iiiiijiiiii=(r,i,a,s,l,d,f,h,y,v,$)=>(_s=n.dynCall_iiiiijiiiii=E.zh)(r,i,a,s,l,d,f,h,y,v,$),ws=n.dynCall_iiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$)=>(ws=n.dynCall_iiiiiiiiiii=E.Ah)(r,i,a,s,l,d,f,h,y,v,$),vs=n.dynCall_vijjjiiiiij=(r,i,a,s,l,d,f,h,y,v,$)=>(vs=n.dynCall_vijjjiiiiij=E.Bh)(r,i,a,s,l,d,f,h,y,v,$),xs=n.dynCall_viiiiii=(r,i,a,s,l,d,f)=>(xs=n.dynCall_viiiiii=E.Ch)(r,i,a,s,l,d,f),$s=n.dynCall_viji=(r,i,a,s)=>($s=n.dynCall_viji=E.Dh)(r,i,a,s),Cs=n.dynCall_viiiiiiiii=(r,i,a,s,l,d,f,h,y,v)=>(Cs=n.dynCall_viiiiiiiii=E.Eh)(r,i,a,s,l,d,f,h,y,v),Ss=n.dynCall_fi=(r,i)=>(Ss=n.dynCall_fi=E.Fh)(r,i),Ts=n.dynCall_fii=(r,i,a)=>(Ts=n.dynCall_fii=E.Gh)(r,i,a),Is=n.dynCall_ji=(r,i)=>(Is=n.dynCall_ji=E.Hh)(r,i),As=n.dynCall_di=(r,i)=>(As=n.dynCall_di=E.Ih)(r,i),ks=n.dynCall_jii=(r,i,a)=>(ks=n.dynCall_jii=E.Jh)(r,i,a),Es=n.dynCall_dii=(r,i,a)=>(Es=n.dynCall_dii=E.Kh)(r,i,a),Ps=n.dynCall_viijiii=(r,i,a,s,l,d,f)=>(Ps=n.dynCall_viijiii=E.Lh)(r,i,a,s,l,d,f),Os=n.dynCall_viiiiiiii=(r,i,a,s,l,d,f,h,y)=>(Os=n.dynCall_viiiiiiii=E.Mh)(r,i,a,s,l,d,f,h,y),zs=n.dynCall_iiiiiiiij=(r,i,a,s,l,d,f,h,y)=>(zs=n.dynCall_iiiiiiiij=E.Nh)(r,i,a,s,l,d,f,h,y),Bs=n.dynCall_iiiiiiii=(r,i,a,s,l,d,f,h)=>(Bs=n.dynCall_iiiiiiii=E.Oh)(r,i,a,s,l,d,f,h),Ds=n.dynCall_viiji=(r,i,a,s,l)=>(Ds=n.dynCall_viiji=E.Ph)(r,i,a,s,l),js=n.dynCall_viiiiij=(r,i,a,s,l,d,f)=>(js=n.dynCall_viiiiij=E.Qh)(r,i,a,s,l,d,f),Ms=n.dynCall_viijjjiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I)=>(Ms=n.dynCall_viijjjiiiiii=E.Rh)(r,i,a,s,l,d,f,h,y,v,$,I),Rs=n.dynCall_viiijiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I)=>(Rs=n.dynCall_viiijiiiiiii=E.Sh)(r,i,a,s,l,d,f,h,y,v,$,I),Us=n.dynCall_iiiiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P)=>(Us=n.dynCall_iiiiiiiiiiiii=E.Th)(r,i,a,s,l,d,f,h,y,v,$,I,P),Ns=n.dynCall_viiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$)=>(Ns=n.dynCall_viiiiiiiiii=E.Uh)(r,i,a,s,l,d,f,h,y,v,$),Vs=n.dynCall_viiiiiiiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J)=>(Vs=n.dynCall_viiiiiiiiiiiiiiii=E.Vh)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J),Ws=n.dynCall_viid=(r,i,a,s)=>(Ws=n.dynCall_viid=E.Wh)(r,i,a,s),Ls=n.dynCall_vid=(r,i,a)=>(Ls=n.dynCall_vid=E.Xh)(r,i,a),Gs=n.dynCall_viiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I)=>(Gs=n.dynCall_viiiiiiiiiii=E.Yh)(r,i,a,s,l,d,f,h,y,v,$,I),Hs=n.dynCall_viiijjjii=(r,i,a,s,l,d,f,h,y)=>(Hs=n.dynCall_viiijjjii=E.Zh)(r,i,a,s,l,d,f,h,y),Fs=n.dynCall_iid=(r,i,a)=>(Fs=n.dynCall_iid=E._h)(r,i,a),qs=n.dynCall_viiiij=(r,i,a,s,l,d)=>(qs=n.dynCall_viiiij=E.$h)(r,i,a,s,l,d),Ks=n.dynCall_viiijiiiii=(r,i,a,s,l,d,f,h,y,v)=>(Ks=n.dynCall_viiijiiiii=E.ai)(r,i,a,s,l,d,f,h,y,v),Js=n.dynCall_jj=(r,i)=>(Js=n.dynCall_jj=E.bi)(r,i),Zs=n.dynCall_iiiijii=(r,i,a,s,l,d,f)=>(Zs=n.dynCall_iiiijii=E.ci)(r,i,a,s,l,d,f),Qs=n.dynCall_iiijii=(r,i,a,s,l,d)=>(Qs=n.dynCall_iiijii=E.di)(r,i,a,s,l,d),Ys=n.dynCall_viiiiiiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z)=>(Ys=n.dynCall_viiiiiiiiiiiiiii=E.ei)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z),Xs=n.dynCall_iiijjj=(r,i,a,s,l,d)=>(Xs=n.dynCall_iiijjj=E.fi)(r,i,a,s,l,d),eu=n.dynCall_ij=(r,i)=>(eu=n.dynCall_ij=E.gi)(r,i),tu=n.dynCall_viiiiji=(r,i,a,s,l,d,f)=>(tu=n.dynCall_viiiiji=E.hi)(r,i,a,s,l,d,f),ru=n.dynCall_iijjji=(r,i,a,s,l,d)=>(ru=n.dynCall_iijjji=E.ii)(r,i,a,s,l,d),nu=n.dynCall_vjiiiiii=(r,i,a,s,l,d,f,h)=>(nu=n.dynCall_vjiiiiii=E.ji)(r,i,a,s,l,d,f,h),iu=n.dynCall_vijjiiiii=(r,i,a,s,l,d,f,h,y)=>(iu=n.dynCall_vijjiiiii=E.ki)(r,i,a,s,l,d,f,h,y),ou=n.dynCall_jiij=(r,i,a,s)=>(ou=n.dynCall_jiij=E.li)(r,i,a,s),au=n.dynCall_iijijjijiji=(r,i,a,s,l,d,f,h,y,v,$)=>(au=n.dynCall_iijijjijiji=E.mi)(r,i,a,s,l,d,f,h,y,v,$),su=n.dynCall_iijijji=(r,i,a,s,l,d,f)=>(su=n.dynCall_iijijji=E.ni)(r,i,a,s,l,d,f),uu=n.dynCall_ijijji=(r,i,a,s,l,d)=>(uu=n.dynCall_ijijji=E.oi)(r,i,a,s,l,d),lu=n.dynCall_iiiiiiij=(r,i,a,s,l,d,f,h)=>(lu=n.dynCall_iiiiiiij=E.pi)(r,i,a,s,l,d,f,h),du=n.dynCall_viiijjiii=(r,i,a,s,l,d,f,h,y)=>(du=n.dynCall_viiijjiii=E.qi)(r,i,a,s,l,d,f,h,y),cu=n.dynCall_vif=(r,i,a)=>(cu=n.dynCall_vif=E.ri)(r,i,a),pu=n.dynCall_viif=(r,i,a,s)=>(pu=n.dynCall_viif=E.si)(r,i,a,s),fu=n.dynCall_iiiiijji=(r,i,a,s,l,d,f,h)=>(fu=n.dynCall_iiiiijji=E.ti)(r,i,a,s,l,d,f,h),mu=n.dynCall_iiiiji=(r,i,a,s,l,d)=>(mu=n.dynCall_iiiiji=E.ui)(r,i,a,s,l,d),hu=n.dynCall_iiiifi=(r,i,a,s,l,d)=>(hu=n.dynCall_iiiifi=E.vi)(r,i,a,s,l,d),gu=n.dynCall_iiiiiiiiijii=(r,i,a,s,l,d,f,h,y,v,$,I)=>(gu=n.dynCall_iiiiiiiiijii=E.wi)(r,i,a,s,l,d,f,h,y,v,$,I),yu=n.dynCall_iiiijjii=(r,i,a,s,l,d,f,h)=>(yu=n.dynCall_iiiijjii=E.xi)(r,i,a,s,l,d,f,h),bu=n.dynCall_iiiiiijjjii=(r,i,a,s,l,d,f,h,y,v,$)=>(bu=n.dynCall_iiiiiijjjii=E.yi)(r,i,a,s,l,d,f,h,y,v,$),_u=n.dynCall_iiijiii=(r,i,a,s,l,d,f)=>(_u=n.dynCall_iiijiii=E.zi)(r,i,a,s,l,d,f),wu=n.dynCall_iiiiiiiijjjfi=(r,i,a,s,l,d,f,h,y,v,$,I,P)=>(wu=n.dynCall_iiiiiiiijjjfi=E.Ai)(r,i,a,s,l,d,f,h,y,v,$,I,P),vu=n.dynCall_iijiiii=(r,i,a,s,l,d,f)=>(vu=n.dynCall_iijiiii=E.Bi)(r,i,a,s,l,d,f),xu=n.dynCall_viiiijj=(r,i,a,s,l,d,f)=>(xu=n.dynCall_viiiijj=E.Ci)(r,i,a,s,l,d,f),$u=n.dynCall_iijjjii=(r,i,a,s,l,d,f)=>($u=n.dynCall_iijjjii=E.Di)(r,i,a,s,l,d,f),Cu=n.dynCall_jij=(r,i,a)=>(Cu=n.dynCall_jij=E.Ei)(r,i,a),Su=n.dynCall_jjj=(r,i,a)=>(Su=n.dynCall_jjj=E.Fi)(r,i,a),Tu=n.dynCall_iiji=(r,i,a,s)=>(Tu=n.dynCall_iiji=E.Gi)(r,i,a,s),Iu=n.dynCall_viffiii=(r,i,a,s,l,d,f)=>(Iu=n.dynCall_viffiii=E.Hi)(r,i,a,s,l,d,f),Au=n.dynCall_viifiii=(r,i,a,s,l,d,f)=>(Au=n.dynCall_viifiii=E.Ii)(r,i,a,s,l,d,f),ku=n.dynCall_viiiiidiidi=(r,i,a,s,l,d,f,h,y,v,$)=>(ku=n.dynCall_viiiiidiidi=E.Ji)(r,i,a,s,l,d,f,h,y,v,$),Eu=n.dynCall_viiiiiiiiidi=(r,i,a,s,l,d,f,h,y,v,$,I)=>(Eu=n.dynCall_viiiiiiiiidi=E.Ki)(r,i,a,s,l,d,f,h,y,v,$,I),Pu=n.dynCall_viiiiiiiiiiiiiifi=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J)=>(Pu=n.dynCall_viiiiiiiiiiiiiifi=E.Li)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J),Ou=n.dynCall_ijii=(r,i,a,s)=>(Ou=n.dynCall_ijii=E.Mi)(r,i,a,s),zu=n.dynCall_viijiiiijiii=(r,i,a,s,l,d,f,h,y,v,$,I)=>(zu=n.dynCall_viijiiiijiii=E.Ni)(r,i,a,s,l,d,f,h,y,v,$,I),Bu=n.dynCall_vijjjjjjjjjjjjji=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z)=>(Bu=n.dynCall_vijjjjjjjjjjjjji=E.Oi)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z),Du=n.dynCall_viiijii=(r,i,a,s,l,d,f)=>(Du=n.dynCall_viiijii=E.Pi)(r,i,a,s,l,d,f),ju=n.dynCall_vijjjiiji=(r,i,a,s,l,d,f,h,y)=>(ju=n.dynCall_vijjjiiji=E.Qi)(r,i,a,s,l,d,f,h,y),Mu=n.dynCall_iiiijiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)=>(Mu=n.dynCall_iiiijiiiiiiiiii=E.Ri)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H),Ru=n.dynCall_iiiiiiiiii=(r,i,a,s,l,d,f,h,y,v)=>(Ru=n.dynCall_iiiiiiiiii=E.Si)(r,i,a,s,l,d,f,h,y,v),Uu=n.dynCall_vj=(r,i)=>(Uu=n.dynCall_vj=E.Ti)(r,i),Nu=n.dynCall_vfiii=(r,i,a,s,l)=>(Nu=n.dynCall_vfiii=E.Ui)(r,i,a,s,l),Vu=n.dynCall_viiiiff=(r,i,a,s,l,d,f)=>(Vu=n.dynCall_viiiiff=E.Vi)(r,i,a,s,l,d,f),Wu=n.dynCall_viiiiiff=(r,i,a,s,l,d,f,h)=>(Wu=n.dynCall_viiiiiff=E.Wi)(r,i,a,s,l,d,f,h),Lu=n.dynCall_viiff=(r,i,a,s,l)=>(Lu=n.dynCall_viiff=E.Xi)(r,i,a,s,l),Gu=n.dynCall_viiiiiiiiifiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)=>(Gu=n.dynCall_viiiiiiiiifiiii=E.Yi)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H),Hu=n.dynCall_viiiiiiiijj=(r,i,a,s,l,d,f,h,y,v,$)=>(Hu=n.dynCall_viiiiiiiijj=E.Zi)(r,i,a,s,l,d,f,h,y,v,$),Fu=n.dynCall_iiiiiiiiiiiiiifii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J)=>(Fu=n.dynCall_iiiiiiiiiiiiiifii=E._i)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J),qu=n.dynCall_viiiiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D)=>(qu=n.dynCall_viiiiiiiiiiiii=E.$i)(r,i,a,s,l,d,f,h,y,v,$,I,P,D),Ku=n.dynCall_iiiiiiiiiiiiiiiiiiifii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe,De)=>(Ku=n.dynCall_iiiiiiiiiiiiiiiiiiifii=E.aj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe,De),Ju=n.dynCall_vijjiiiiiii=(r,i,a,s,l,d,f,h,y,v,$)=>(Ju=n.dynCall_vijjiiiiiii=E.bj)(r,i,a,s,l,d,f,h,y,v,$),Zu=n.dynCall_iiiijjj=(r,i,a,s,l,d,f)=>(Zu=n.dynCall_iiiijjj=E.cj)(r,i,a,s,l,d,f),Qu=n.dynCall_fffffff=(r,i,a,s,l,d,f)=>(Qu=n.dynCall_fffffff=E.dj)(r,i,a,s,l,d,f),Yu=n.dynCall_viiiiiijiifiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D)=>(Yu=n.dynCall_viiiiiijiifiii=E.ej)(r,i,a,s,l,d,f,h,y,v,$,I,P,D),Xu=n.dynCall_vjjjjjjffjifiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee)=>(Xu=n.dynCall_vjjjjjjffjifiiiiii=E.fj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee),el=n.dynCall_viiiiiiffjifiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J)=>(el=n.dynCall_viiiiiiffjifiiiii=E.gj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J),tl=n.dynCall_viiiiiiffjfiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z)=>(tl=n.dynCall_viiiiiiffjfiiiii=E.hj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z),rl=n.dynCall_viiiiiiffjiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)=>(rl=n.dynCall_viiiiiiffjiiiii=E.ij)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H),nl=n.dynCall_vjjjjjjjjfffjifiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe)=>(nl=n.dynCall_vjjjjjjjjfffjifiiiiii=E.jj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe),il=n.dynCall_vjjjjjjfffifijiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe)=>(il=n.dynCall_vjjjjjjfffifijiiiii=E.kj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe),ol=n.dynCall_vjjjjjjfffifiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee)=>(ol=n.dynCall_vjjjjjjfffifiiiiii=E.lj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee),al=n.dynCall_vjjjjjjjjfffiiifiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe)=>(al=n.dynCall_vjjjjjjjjfffiiifiiiii=E.mj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe),sl=n.dynCall_vijiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P)=>(sl=n.dynCall_vijiiiiiiiiii=E.nj)(r,i,a,s,l,d,f,h,y,v,$,I,P),ul=n.dynCall_vijjfffiii=(r,i,a,s,l,d,f,h,y,v)=>(ul=n.dynCall_vijjfffiii=E.oj)(r,i,a,s,l,d,f,h,y,v),ll=n.dynCall_jiijjiif=(r,i,a,s,l,d,f,h)=>(ll=n.dynCall_jiijjiif=E.pj)(r,i,a,s,l,d,f,h),dl=n.dynCall_vijjjjjjifiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)=>(dl=n.dynCall_vijjjjjjifiiiii=E.qj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H),cl=n.dynCall_vjjjjjiiii=(r,i,a,s,l,d,f,h,y,v)=>(cl=n.dynCall_vjjjjjiiii=E.rj)(r,i,a,s,l,d,f,h,y,v),pl=n.dynCall_vjjjjfiii=(r,i,a,s,l,d,f,h,y)=>(pl=n.dynCall_vjjjjfiii=E.sj)(r,i,a,s,l,d,f,h,y),fl=n.dynCall_viiiiiijiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D)=>(fl=n.dynCall_viiiiiijiiiiii=E.tj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D),ml=n.dynCall_vijjii=(r,i,a,s,l,d)=>(ml=n.dynCall_vijjii=E.uj)(r,i,a,s,l,d),hl=n.dynCall_viiiiijjiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P)=>(hl=n.dynCall_viiiiijjiiiii=E.vj)(r,i,a,s,l,d,f,h,y,v,$,I,P),gl=n.dynCall_iiiiiji=(r,i,a,s,l,d,f)=>(gl=n.dynCall_iiiiiji=E.wj)(r,i,a,s,l,d,f),yl=n.dynCall_viiiiijiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P)=>(yl=n.dynCall_viiiiijiiiiii=E.xj)(r,i,a,s,l,d,f,h,y,v,$,I,P),bl=n.dynCall_iiiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I)=>(bl=n.dynCall_iiiiiiiiiiii=E.yj)(r,i,a,s,l,d,f,h,y,v,$,I),_l=n.dynCall_viiijiiiiii=(r,i,a,s,l,d,f,h,y,v,$)=>(_l=n.dynCall_viiijiiiiii=E.zj)(r,i,a,s,l,d,f,h,y,v,$),wl=n.dynCall_viiiijii=(r,i,a,s,l,d,f,h)=>(wl=n.dynCall_viiiijii=E.Aj)(r,i,a,s,l,d,f,h),vl=n.dynCall_viijjiii=(r,i,a,s,l,d,f,h)=>(vl=n.dynCall_viijjiii=E.Bj)(r,i,a,s,l,d,f,h),xl=n.dynCall_viiiji=(r,i,a,s,l,d)=>(xl=n.dynCall_viiiji=E.Cj)(r,i,a,s,l,d),$l=n.dynCall_viiiiiijii=(r,i,a,s,l,d,f,h,y,v)=>($l=n.dynCall_viiiiiijii=E.Dj)(r,i,a,s,l,d,f,h,y,v),Cl=n.dynCall_viiiiijjji=(r,i,a,s,l,d,f,h,y,v)=>(Cl=n.dynCall_viiiiijjji=E.Ej)(r,i,a,s,l,d,f,h,y,v),Sl=n.dynCall_vijiii=(r,i,a,s,l,d)=>(Sl=n.dynCall_vijiii=E.Fj)(r,i,a,s,l,d),Tl=n.dynCall_iiijiiii=(r,i,a,s,l,d,f,h)=>(Tl=n.dynCall_iiijiiii=E.Gj)(r,i,a,s,l,d,f,h),Il=n.dynCall_viiiiiijjiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D)=>(Il=n.dynCall_viiiiiijjiiiii=E.Hj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D),Al=n.dynCall_viiiiiiijiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)=>(Al=n.dynCall_viiiiiiijiiiiii=E.Ij)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H),kl=n.dynCall_viiiiiji=(r,i,a,s,l,d,f,h)=>(kl=n.dynCall_viiiiiji=E.Jj)(r,i,a,s,l,d,f,h),El=n.dynCall_fiif=(r,i,a,s)=>(El=n.dynCall_fiif=E.Kj)(r,i,a,s),Pl=n.dynCall_viijjjiii=(r,i,a,s,l,d,f,h,y)=>(Pl=n.dynCall_viijjjiii=E.Lj)(r,i,a,s,l,d,f,h,y),Ol=n.dynCall_viiiiiifiii=(r,i,a,s,l,d,f,h,y,v,$)=>(Ol=n.dynCall_viiiiiifiii=E.Mj)(r,i,a,s,l,d,f,h,y,v,$),zl=n.dynCall_viijji=(r,i,a,s,l,d)=>(zl=n.dynCall_viijji=E.Nj)(r,i,a,s,l,d),Bl=n.dynCall_iiiiiiiiiiijijji=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z)=>(Bl=n.dynCall_iiiiiiiiiiijijji=E.Oj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z),Dl=n.dynCall_jiijjiii=(r,i,a,s,l,d,f,h)=>(Dl=n.dynCall_jiijjiii=E.Pj)(r,i,a,s,l,d,f,h),jl=n.dynCall_viifiifijjjii=(r,i,a,s,l,d,f,h,y,v,$,I,P)=>(jl=n.dynCall_viifiifijjjii=E.Qj)(r,i,a,s,l,d,f,h,y,v,$,I,P),Ml=n.dynCall_viiiiiiiiiiiiiiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe,De,Le,at)=>(Ml=n.dynCall_viiiiiiiiiiiiiiiiiiiiiii=E.Rj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe,De,Le,at),Rl=n.dynCall_viiiiifiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P)=>(Rl=n.dynCall_viiiiifiiiiii=E.Sj)(r,i,a,s,l,d,f,h,y,v,$,I,P),Ul=n.dynCall_vijjiiiiii=(r,i,a,s,l,d,f,h,y,v)=>(Ul=n.dynCall_vijjiiiiii=E.Tj)(r,i,a,s,l,d,f,h,y,v),Nl=n.dynCall_vijiiiiiiijjii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D)=>(Nl=n.dynCall_vijiiiiiiijjii=E.Uj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D),Vl=n.dynCall_viiiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P)=>(Vl=n.dynCall_viiiiiiiiiiii=E.Vj)(r,i,a,s,l,d,f,h,y,v,$,I,P),Wl=n.dynCall_viiiiiiiiiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe)=>(Wl=n.dynCall_viiiiiiiiiiiiiiiiii=E.Wj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe),Ll=n.dynCall_viiiiiiiiiiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye)=>(Ll=n.dynCall_viiiiiiiiiiiiiiiiiii=E.Xj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye),Gl=n.dynCall_viiijiiiiiiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye)=>(Gl=n.dynCall_viiijiiiiiiiiiiiiiii=E.Yj)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye),Hl=n.dynCall_viiiijjj=(r,i,a,s,l,d,f,h)=>(Hl=n.dynCall_viiiijjj=E.Zj)(r,i,a,s,l,d,f,h),Fl=n.dynCall_viiiiiiijjj=(r,i,a,s,l,d,f,h,y,v,$)=>(Fl=n.dynCall_viiiiiiijjj=E._j)(r,i,a,s,l,d,f,h,y,v,$),ql=n.dynCall_iiiiiiiiiiiiiiiiiiiifi=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe,De)=>(ql=n.dynCall_iiiiiiiiiiiiiiiiiiiifi=E.$j)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe,De),Kl=n.dynCall_viiijiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)=>(Kl=n.dynCall_viiijiiiiiiiiii=E.ak)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H),Jl=n.dynCall_viiiiif=(r,i,a,s,l,d,f)=>(Jl=n.dynCall_viiiiif=E.bk)(r,i,a,s,l,d,f),Zl=n.dynCall_viiif=(r,i,a,s,l)=>(Zl=n.dynCall_viiif=E.ck)(r,i,a,s,l),Ql=n.dynCall_viiiiiiiiifi=(r,i,a,s,l,d,f,h,y,v,$,I)=>(Ql=n.dynCall_viiiiiiiiifi=E.dk)(r,i,a,s,l,d,f,h,y,v,$,I),Yl=n.dynCall_viiiiid=(r,i,a,s,l,d,f)=>(Yl=n.dynCall_viiiiid=E.ek)(r,i,a,s,l,d,f),Xl=n.dynCall_viiid=(r,i,a,s,l)=>(Xl=n.dynCall_viiid=E.fk)(r,i,a,s,l),ed=n.dynCall_iiif=(r,i,a,s)=>(ed=n.dynCall_iiif=E.gk)(r,i,a,s),td=n.dynCall_iiiij=(r,i,a,s,l)=>(td=n.dynCall_iiiij=E.hk)(r,i,a,s,l),rd=n.dynCall_iiiiiif=(r,i,a,s,l,d,f)=>(rd=n.dynCall_iiiiiif=E.ik)(r,i,a,s,l,d,f),nd=n.dynCall_vijij=(r,i,a,s,l)=>(nd=n.dynCall_vijij=E.jk)(r,i,a,s,l),id=n.dynCall_viiijji=(r,i,a,s,l,d,f)=>(id=n.dynCall_viiijji=E.kk)(r,i,a,s,l,d,f),od=n.dynCall_viiiiiiffii=(r,i,a,s,l,d,f,h,y,v,$)=>(od=n.dynCall_viiiiiiffii=E.lk)(r,i,a,s,l,d,f,h,y,v,$),ad=n.dynCall_iifii=(r,i,a,s,l)=>(ad=n.dynCall_iifii=E.mk)(r,i,a,s,l),sd=n.dynCall_viiiiifi=(r,i,a,s,l,d,f,h)=>(sd=n.dynCall_viiiiifi=E.nk)(r,i,a,s,l,d,f,h),ud=n.dynCall_iifjii=(r,i,a,s,l,d)=>(ud=n.dynCall_iifjii=E.ok)(r,i,a,s,l,d),ld=n.dynCall_vidi=(r,i,a,s)=>(ld=n.dynCall_vidi=E.pk)(r,i,a,s),dd=n.dynCall_viiijiji=(r,i,a,s,l,d,f,h)=>(dd=n.dynCall_viiijiji=E.qk)(r,i,a,s,l,d,f,h),cd=n.dynCall_viiijij=(r,i,a,s,l,d,f)=>(cd=n.dynCall_viiijij=E.rk)(r,i,a,s,l,d,f),pd=n.dynCall_vijjj=(r,i,a,s,l)=>(pd=n.dynCall_vijjj=E.sk)(r,i,a,s,l),fd=n.dynCall_vjiij=(r,i,a,s,l)=>(fd=n.dynCall_vjiij=E.tk)(r,i,a,s,l),md=n.dynCall_diiiii=(r,i,a,s,l,d)=>(md=n.dynCall_diiiii=E.uk)(r,i,a,s,l,d),hd=n.dynCall_diiii=(r,i,a,s,l)=>(hd=n.dynCall_diiii=E.vk)(r,i,a,s,l),gd=n.dynCall_iiiji=(r,i,a,s,l)=>(gd=n.dynCall_iiiji=E.wk)(r,i,a,s,l),yd=n.dynCall_ijiijji=(r,i,a,s,l,d,f)=>(yd=n.dynCall_ijiijji=E.xk)(r,i,a,s,l,d,f),bd=n.dynCall_viiijjiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I)=>(bd=n.dynCall_viiijjiiiiii=E.yk)(r,i,a,s,l,d,f,h,y,v,$,I),_d=n.dynCall_viijjijjjjiii=(r,i,a,s,l,d,f,h,y,v,$,I,P)=>(_d=n.dynCall_viijjijjjjiii=E.zk)(r,i,a,s,l,d,f,h,y,v,$,I,P),wd=n.dynCall_ijiii=(r,i,a,s,l)=>(wd=n.dynCall_ijiii=E.Ak)(r,i,a,s,l),vd=n.dynCall_ijiiiiji=(r,i,a,s,l,d,f,h)=>(vd=n.dynCall_ijiiiiji=E.Bk)(r,i,a,s,l,d,f,h),xd=n.dynCall_iiifii=(r,i,a,s,l,d)=>(xd=n.dynCall_iiifii=E.Ck)(r,i,a,s,l,d),$d=n.dynCall_ijiij=(r,i,a,s,l)=>($d=n.dynCall_ijiij=E.Dk)(r,i,a,s,l),Cd=n.dynCall_viijiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D)=>(Cd=n.dynCall_viijiiiiiiiiii=E.Ek)(r,i,a,s,l,d,f,h,y,v,$,I,P,D),Sd=n.dynCall_fiiii=(r,i,a,s,l)=>(Sd=n.dynCall_fiiii=E.Fk)(r,i,a,s,l),Td=n.dynCall_jfi=(r,i,a)=>(Td=n.dynCall_jfi=E.Gk)(r,i,a),Id=n.dynCall_viiiiiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)=>(Id=n.dynCall_viiiiiiiiiiiiii=E.Hk)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H),Ad=n.dynCall_viiiiiiiiiiiiiiiiiiii=(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe)=>(Ad=n.dynCall_viiiiiiiiiiiiiiiiiiii=E.Ik)(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe),kd=n.dynCall_fiii=(r,i,a,s)=>(kd=n.dynCall_fiii=E.Jk)(r,i,a,s),Ed=r=>(Ed=E.Kk)(r),Pd=()=>(Pd=E.Lk)(),Od=r=>(Od=E.Mk)(r),zd=()=>(zd=E.Nk)();function Ab(r,i,a,s){var l=j();try{return La(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;U(1,0)}}function kb(r,i,a){var s=j();try{return Ga(r,i,a)}catch(l){if(M(s),l!==l+0)throw l;U(1,0)}}function Eb(r,i,a){var s=j();try{Wa(r,i,a)}catch(l){if(M(s),l!==l+0)throw l;U(1,0)}}function Pb(r,i){var a=j();try{return di(r,i)}catch(s){if(M(a),s!==s+0)throw s;U(1,0)}}function Ob(r,i){var a=j();try{Fa(r,i)}catch(s){if(M(a),s!==s+0)throw s;U(1,0)}}function zb(r){var i=j();try{qa(r)}catch(a){if(M(i),a!==a+0)throw a;U(1,0)}}function Bb(r,i,a,s,l,d,f){var h=j();try{return Ha(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function Db(r,i,a,s,l){var d=j();try{Xa(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function jb(r,i,a,s,l,d){var f=j();try{return Ka(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function Mb(r,i,a,s){var l=j();try{ci(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;U(1,0)}}function Rb(r,i,a,s,l,d,f){var h=j();try{es(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function Ub(r,i,a,s,l){var d=j();try{Qa(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function Nb(r,i,a,s,l){var d=j();try{return Za(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function Vb(r,i,a,s,l,d,f){var h=j();try{xs(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function Wb(r,i,a,s,l,d){var f=j();try{Ya(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function Lb(r){var i=j();try{return Ja(r)}catch(a){if(M(i),a!==a+0)throw a;U(1,0)}}function Gb(r,i,a,s){var l=j();try{return os(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;U(1,0)}}function Hb(r,i,a,s,l,d){var f=j();try{as(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function Fb(r,i,a,s){var l=j();try{return us(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;return U(1,0),0n}}function qb(r,i,a,s,l,d,f){var h=j();try{ls(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function Kb(r,i,a){var s=j();try{ds(r,i,a)}catch(l){if(M(s),l!==l+0)throw l;U(1,0)}}function Jb(r,i,a,s){var l=j();try{cs(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;U(1,0)}}function Zb(r,i,a){var s=j();try{return ss(r,i,a)}catch(l){if(M(s),l!==l+0)throw l;U(1,0)}}function Qb(r,i,a){var s=j();try{return ms(r,i,a)}catch(l){if(M(s),l!==l+0)throw l;U(1,0)}}function Yb(r,i,a,s,l){var d=j();try{fs(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function Xb(r,i,a,s,l,d,f,h){var y=j();try{ys(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;U(1,0)}}function e_(r,i,a,s,l,d,f,h,y,v,$){var I=j();try{return _s(r,i,a,s,l,d,f,h,y,v,$)}catch(P){if(M(I),P!==P+0)throw P;U(1,0)}}function t_(r,i,a,s,l,d,f,h,y,v,$){var I=j();try{return ws(r,i,a,s,l,d,f,h,y,v,$)}catch(P){if(M(I),P!==P+0)throw P;U(1,0)}}function r_(r,i,a,s,l,d,f,h,y,v,$){var I=j();try{vs(r,i,a,s,l,d,f,h,y,v,$)}catch(P){if(M(I),P!==P+0)throw P;U(1,0)}}function n_(r,i,a,s,l,d,f,h,y,v){var $=j();try{Cs(r,i,a,s,l,d,f,h,y,v)}catch(I){if(M($),I!==I+0)throw I;U(1,0)}}function i_(r,i,a,s,l,d,f,h,y){var v=j();try{return bs(r,i,a,s,l,d,f,h,y)}catch($){if(M(v),$!==$+0)throw $;U(1,0)}}function o_(r,i){var a=j();try{return Ss(r,i)}catch(s){if(M(a),s!==s+0)throw s;U(1,0)}}function a_(r,i){var a=j();try{return Is(r,i)}catch(s){if(M(a),s!==s+0)throw s;return U(1,0),0n}}function s_(r,i){var a=j();try{return As(r,i)}catch(s){if(M(a),s!==s+0)throw s;U(1,0)}}function u_(r,i,a,s){var l=j();try{ts(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;U(1,0)}}function l_(r,i,a,s,l,d,f){var h=j();try{Du(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function d_(r,i,a,s,l,d,f){var h=j();try{Ps(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function c_(r,i,a,s,l,d,f,h){var y=j();try{wl(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;U(1,0)}}function p_(r,i,a,s,l,d,f,h,y){var v=j();try{Os(r,i,a,s,l,d,f,h,y)}catch($){if(M(v),$!==$+0)throw $;U(1,0)}}function f_(r,i,a,s){var l=j();try{$s(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;U(1,0)}}function m_(r,i,a,s,l,d,f,h,y){var v=j();try{return zs(r,i,a,s,l,d,f,h,y)}catch($){if(M(v),$!==$+0)throw $;U(1,0)}}function h_(r,i,a,s,l,d,f,h){var y=j();try{return Bs(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;U(1,0)}}function g_(r,i,a,s,l){var d=j();try{Ds(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function y_(r,i,a){var s=j();try{return ks(r,i,a)}catch(l){if(M(s),l!==l+0)throw l;return U(1,0),0n}}function b_(r,i,a,s,l,d,f){var h=j();try{js(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function __(r,i,a,s,l,d,f,h,y,v,$,I){var P=j();try{Ms(r,i,a,s,l,d,f,h,y,v,$,I)}catch(D){if(M(P),D!==D+0)throw D;U(1,0)}}function w_(r,i,a,s,l,d,f,h,y,v,$,I){var P=j();try{Rs(r,i,a,s,l,d,f,h,y,v,$,I)}catch(D){if(M(P),D!==D+0)throw D;U(1,0)}}function v_(r,i,a,s,l,d,f,h,y,v,$,I,P){var D=j();try{return Us(r,i,a,s,l,d,f,h,y,v,$,I,P)}catch(H){if(M(D),H!==H+0)throw H;U(1,0)}}function x_(r,i,a,s,l,d,f,h,y,v,$){var I=j();try{Ns(r,i,a,s,l,d,f,h,y,v,$)}catch(P){if(M(I),P!==P+0)throw P;U(1,0)}}function $_(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J){var ee=j();try{Vs(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J)}catch(oe){if(M(ee),oe!==oe+0)throw oe;U(1,0)}}function C_(r,i,a,s){var l=j();try{Ws(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;U(1,0)}}function S_(r,i,a){var s=j();try{Ls(r,i,a)}catch(l){if(M(s),l!==l+0)throw l;U(1,0)}}function T_(r,i,a,s,l){var d=j();try{is(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function I_(r,i,a,s,l){var d=j();try{rs(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function A_(r,i,a,s,l,d,f,h,y,v,$,I){var P=j();try{Gs(r,i,a,s,l,d,f,h,y,v,$,I)}catch(D){if(M(P),D!==D+0)throw D;U(1,0)}}function k_(r,i,a){var s=j();try{return Fs(r,i,a)}catch(l){if(M(s),l!==l+0)throw l;U(1,0)}}function E_(r,i,a,s,l,d){var f=j();try{qs(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function P_(r,i,a,s,l,d,f,h,y,v){var $=j();try{Ks(r,i,a,s,l,d,f,h,y,v)}catch(I){if(M($),I!==I+0)throw I;U(1,0)}}function O_(r,i,a,s,l,d,f,h,y){var v=j();try{Hs(r,i,a,s,l,d,f,h,y)}catch($){if(M(v),$!==$+0)throw $;U(1,0)}}function z_(r,i){var a=j();try{return eu(r,i)}catch(s){if(M(a),s!==s+0)throw s;U(1,0)}}function B_(r,i,a,s,l,d,f,h,y,v,$,I,P,D){var H=j();try{Cd(r,i,a,s,l,d,f,h,y,v,$,I,P,D)}catch(Z){if(M(H),Z!==Z+0)throw Z;U(1,0)}}function D_(r,i,a,s,l,d){var f=j();try{return Qs(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function j_(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z){var J=j();try{Ys(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z)}catch(ee){if(M(J),ee!==ee+0)throw ee;U(1,0)}}function M_(r,i,a,s){var l=j();try{return ou(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;return U(1,0),0n}}function R_(r,i,a,s,l,d){var f=j();try{return Xs(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function U_(r,i,a,s,l,d,f,h){var y=j();try{nu(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;U(1,0)}}function N_(r,i,a,s,l,d,f,h,y){var v=j();try{iu(r,i,a,s,l,d,f,h,y)}catch($){if(M(v),$!==$+0)throw $;U(1,0)}}function V_(r,i,a,s,l,d,f){var h=j();try{tu(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function W_(r,i,a,s,l,d){var f=j();try{return ru(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function L_(r,i,a,s,l,d,f,h,y,v,$){var I=j();try{return au(r,i,a,s,l,d,f,h,y,v,$)}catch(P){if(M(I),P!==P+0)throw P;U(1,0)}}function G_(r,i,a,s,l,d,f){var h=j();try{return su(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function H_(r,i,a,s,l,d){var f=j();try{return uu(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function F_(r,i){var a=j();try{return Js(r,i)}catch(s){if(M(a),s!==s+0)throw s;return U(1,0),0n}}function q_(r,i,a,s,l,d,f){var h=j();try{return Zs(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function K_(r,i,a){var s=j();try{cu(r,i,a)}catch(l){if(M(s),l!==l+0)throw l;U(1,0)}}function J_(r,i,a,s,l,d,f,h){var y=j();try{return lu(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;U(1,0)}}function Z_(r,i,a,s,l,d,f,h,y){var v=j();try{du(r,i,a,s,l,d,f,h,y)}catch($){if(M(v),$!==$+0)throw $;U(1,0)}}function Q_(r,i,a,s){var l=j();try{pu(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;U(1,0)}}function Y_(r,i,a,s,l,d,f,h){var y=j();try{return fu(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;U(1,0)}}function X_(r,i,a,s,l,d){var f=j();try{return mu(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function e0(r,i,a,s,l,d){var f=j();try{return hu(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function t0(r,i,a,s,l,d,f,h,y,v,$,I){var P=j();try{return gu(r,i,a,s,l,d,f,h,y,v,$,I)}catch(D){if(M(P),D!==D+0)throw D;U(1,0)}}function r0(r,i,a,s,l,d,f,h){var y=j();try{return yu(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;U(1,0)}}function n0(r,i,a,s,l,d,f,h,y,v,$){var I=j();try{return bu(r,i,a,s,l,d,f,h,y,v,$)}catch(P){if(M(I),P!==P+0)throw P;U(1,0)}}function i0(r,i,a,s,l,d,f){var h=j();try{return _u(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function o0(r,i,a,s,l,d,f,h,y,v,$,I,P){var D=j();try{return wu(r,i,a,s,l,d,f,h,y,v,$,I,P)}catch(H){if(M(D),H!==H+0)throw H;U(1,0)}}function a0(r,i,a,s,l,d,f){var h=j();try{return vu(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function s0(r,i,a,s,l,d,f){var h=j();try{xu(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function u0(r,i,a,s,l,d,f){var h=j();try{return $u(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function l0(r,i,a){var s=j();try{return Cu(r,i,a)}catch(l){if(M(s),l!==l+0)throw l;return U(1,0),0n}}function d0(r,i,a){var s=j();try{return Su(r,i,a)}catch(l){if(M(s),l!==l+0)throw l;return U(1,0),0n}}function c0(r,i,a,s){var l=j();try{return Tu(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;U(1,0)}}function p0(r,i,a,s,l,d,f){var h=j();try{Iu(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function f0(r,i,a,s,l,d,f){var h=j();try{Au(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function m0(r,i,a,s,l,d,f,h,y,v,$){var I=j();try{ku(r,i,a,s,l,d,f,h,y,v,$)}catch(P){if(M(I),P!==P+0)throw P;U(1,0)}}function h0(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J){var ee=j();try{Pu(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J)}catch(oe){if(M(ee),oe!==oe+0)throw oe;U(1,0)}}function g0(r,i,a,s,l,d,f,h,y,v,$,I){var P=j();try{zu(r,i,a,s,l,d,f,h,y,v,$,I)}catch(D){if(M(P),D!==D+0)throw D;U(1,0)}}function y0(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z){var J=j();try{Bu(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z)}catch(ee){if(M(J),ee!==ee+0)throw ee;U(1,0)}}function b0(r,i,a,s,l,d,f,h,y){var v=j();try{ju(r,i,a,s,l,d,f,h,y)}catch($){if(M(v),$!==$+0)throw $;U(1,0)}}function _0(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H){var Z=j();try{return Mu(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)}catch(J){if(M(Z),J!==J+0)throw J;U(1,0)}}function w0(r,i,a,s,l,d,f,h,y,v){var $=j();try{return Ru(r,i,a,s,l,d,f,h,y,v)}catch(I){if(M($),I!==I+0)throw I;U(1,0)}}function v0(r,i){var a=j();try{Uu(r,i)}catch(s){if(M(a),s!==s+0)throw s;U(1,0)}}function x0(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H){var Z=j();try{Gu(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)}catch(J){if(M(Z),J!==J+0)throw J;U(1,0)}}function $0(r,i,a,s,l){var d=j();try{Nu(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function C0(r,i,a,s,l,d,f){var h=j();try{Vu(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function S0(r,i,a,s,l){var d=j();try{Lu(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function T0(r,i,a,s,l,d,f,h){var y=j();try{Wu(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;U(1,0)}}function I0(r,i,a,s,l,d,f,h,y,v,$){var I=j();try{Hu(r,i,a,s,l,d,f,h,y,v,$)}catch(P){if(M(I),P!==P+0)throw P;U(1,0)}}function A0(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J){var ee=j();try{return Fu(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J)}catch(oe){if(M(ee),oe!==oe+0)throw oe;U(1,0)}}function k0(r,i,a,s,l,d,f,h,y,v,$,I,P,D){var H=j();try{qu(r,i,a,s,l,d,f,h,y,v,$,I,P,D)}catch(Z){if(M(H),Z!==Z+0)throw Z;U(1,0)}}function E0(r,i,a,s,l){var d=j();try{return Sd(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function P0(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe,De){var Le=j();try{return Ku(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe,De)}catch(at){if(M(Le),at!==at+0)throw at;U(1,0)}}function O0(r,i,a,s,l,d,f,h,y,v,$){var I=j();try{Ju(r,i,a,s,l,d,f,h,y,v,$)}catch(P){if(M(I),P!==P+0)throw P;U(1,0)}}function z0(r,i,a,s,l,d,f){var h=j();try{return Zu(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function B0(r,i,a,s,l,d,f,h,y,v,$,I,P,D){var H=j();try{Yu(r,i,a,s,l,d,f,h,y,v,$,I,P,D)}catch(Z){if(M(H),Z!==Z+0)throw Z;U(1,0)}}function D0(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee){var oe=j();try{Xu(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee)}catch(ye){if(M(oe),ye!==ye+0)throw ye;U(1,0)}}function j0(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J){var ee=j();try{el(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J)}catch(oe){if(M(ee),oe!==oe+0)throw oe;U(1,0)}}function M0(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z){var J=j();try{tl(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z)}catch(ee){if(M(J),ee!==ee+0)throw ee;U(1,0)}}function R0(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H){var Z=j();try{rl(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)}catch(J){if(M(Z),J!==J+0)throw J;U(1,0)}}function U0(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe){var De=j();try{nl(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe)}catch(Le){if(M(De),Le!==Le+0)throw Le;U(1,0)}}function N0(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe){var ye=j();try{il(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe)}catch(xe){if(M(ye),xe!==xe+0)throw xe;U(1,0)}}function V0(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee){var oe=j();try{ol(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee)}catch(ye){if(M(oe),ye!==ye+0)throw ye;U(1,0)}}function W0(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe){var De=j();try{al(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe)}catch(Le){if(M(De),Le!==Le+0)throw Le;U(1,0)}}function L0(r,i,a,s,l,d,f,h,y,v,$,I,P){var D=j();try{sl(r,i,a,s,l,d,f,h,y,v,$,I,P)}catch(H){if(M(D),H!==H+0)throw H;U(1,0)}}function G0(r,i,a,s,l,d,f,h,y,v){var $=j();try{ul(r,i,a,s,l,d,f,h,y,v)}catch(I){if(M($),I!==I+0)throw I;U(1,0)}}function H0(r,i,a,s,l,d,f,h){var y=j();try{return ll(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;return U(1,0),0n}}function F0(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H){var Z=j();try{dl(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)}catch(J){if(M(Z),J!==J+0)throw J;U(1,0)}}function q0(r,i,a,s,l,d,f,h,y,v){var $=j();try{cl(r,i,a,s,l,d,f,h,y,v)}catch(I){if(M($),I!==I+0)throw I;U(1,0)}}function K0(r,i,a,s,l,d,f,h,y){var v=j();try{pl(r,i,a,s,l,d,f,h,y)}catch($){if(M(v),$!==$+0)throw $;U(1,0)}}function J0(r,i,a,s,l,d,f){var h=j();try{return Qu(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function Z0(r,i,a){var s=j();try{return Td(r,i,a)}catch(l){if(M(s),l!==l+0)throw l;return U(1,0),0n}}function Q0(r,i,a,s,l,d,f,h,y,v,$,I,P,D){var H=j();try{fl(r,i,a,s,l,d,f,h,y,v,$,I,P,D)}catch(Z){if(M(H),Z!==Z+0)throw Z;U(1,0)}}function Y0(r,i,a,s,l,d,f,h,y,v,$,I,P){var D=j();try{hl(r,i,a,s,l,d,f,h,y,v,$,I,P)}catch(H){if(M(D),H!==H+0)throw H;U(1,0)}}function X0(r,i,a,s,l,d,f){var h=j();try{return gl(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function ew(r,i,a,s,l,d,f,h,y,v,$,I,P){var D=j();try{yl(r,i,a,s,l,d,f,h,y,v,$,I,P)}catch(H){if(M(D),H!==H+0)throw H;U(1,0)}}function tw(r,i,a,s,l,d,f,h,y,v,$,I){var P=j();try{return bl(r,i,a,s,l,d,f,h,y,v,$,I)}catch(D){if(M(P),D!==D+0)throw D;U(1,0)}}function rw(r,i,a,s,l,d){var f=j();try{ml(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function nw(r,i,a,s,l,d,f,h,y,v,$){var I=j();try{_l(r,i,a,s,l,d,f,h,y,v,$)}catch(P){if(M(I),P!==P+0)throw P;U(1,0)}}function iw(r,i,a,s,l,d,f,h){var y=j();try{vl(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;U(1,0)}}function ow(r,i,a,s,l,d){var f=j();try{xl(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function aw(r,i,a,s,l,d,f,h,y,v){var $=j();try{$l(r,i,a,s,l,d,f,h,y,v)}catch(I){if(M($),I!==I+0)throw I;U(1,0)}}function sw(r,i,a,s){var l=j();try{return Ou(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;U(1,0)}}function uw(r,i,a,s,l,d,f,h,y,v){var $=j();try{Cl(r,i,a,s,l,d,f,h,y,v)}catch(I){if(M($),I!==I+0)throw I;U(1,0)}}function lw(r,i,a,s,l,d){var f=j();try{Sl(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function dw(r,i,a){var s=j();try{return Ts(r,i,a)}catch(l){if(M(s),l!==l+0)throw l;U(1,0)}}function cw(r,i,a,s,l,d,f,h){var y=j();try{return Tl(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;U(1,0)}}function pw(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H){var Z=j();try{Id(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)}catch(J){if(M(Z),J!==J+0)throw J;U(1,0)}}function fw(r,i,a,s,l,d,f,h,y,v,$,I,P,D){var H=j();try{Il(r,i,a,s,l,d,f,h,y,v,$,I,P,D)}catch(Z){if(M(H),Z!==Z+0)throw Z;U(1,0)}}function mw(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H){var Z=j();try{Al(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)}catch(J){if(M(Z),J!==J+0)throw J;U(1,0)}}function hw(r,i,a,s,l,d,f,h){var y=j();try{kl(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;U(1,0)}}function gw(r,i,a,s){var l=j();try{return El(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;U(1,0)}}function yw(r,i,a){var s=j();try{return Es(r,i,a)}catch(l){if(M(s),l!==l+0)throw l;U(1,0)}}function bw(r,i,a,s,l,d,f,h,y){var v=j();try{Pl(r,i,a,s,l,d,f,h,y)}catch($){if(M(v),$!==$+0)throw $;U(1,0)}}function _w(r,i,a,s,l,d,f,h,y,v,$){var I=j();try{Ol(r,i,a,s,l,d,f,h,y,v,$)}catch(P){if(M(I),P!==P+0)throw P;U(1,0)}}function ww(r,i,a,s,l,d){var f=j();try{zl(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function vw(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z){var J=j();try{return Bl(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z)}catch(ee){if(M(J),ee!==ee+0)throw ee;U(1,0)}}function xw(r,i,a,s,l,d,f,h){var y=j();try{return Dl(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;return U(1,0),0n}}function $w(r,i,a,s,l,d,f,h,y,v,$,I,P){var D=j();try{jl(r,i,a,s,l,d,f,h,y,v,$,I,P)}catch(H){if(M(D),H!==H+0)throw H;U(1,0)}}function Cw(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe,De,Le,at){var bv=j();try{Ml(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe,De,Le,at)}catch(pi){if(M(bv),pi!==pi+0)throw pi;U(1,0)}}function Sw(r,i,a,s,l,d,f,h,y,v,$,I,P){var D=j();try{Rl(r,i,a,s,l,d,f,h,y,v,$,I,P)}catch(H){if(M(D),H!==H+0)throw H;U(1,0)}}function Tw(r,i,a,s,l,d,f,h,y,v){var $=j();try{Ul(r,i,a,s,l,d,f,h,y,v)}catch(I){if(M($),I!==I+0)throw I;U(1,0)}}function Iw(r,i,a,s,l,d,f,h,y,v,$,I,P,D){var H=j();try{Nl(r,i,a,s,l,d,f,h,y,v,$,I,P,D)}catch(Z){if(M(H),Z!==Z+0)throw Z;U(1,0)}}function Aw(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye){var xe=j();try{Gl(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye)}catch(De){if(M(xe),De!==De+0)throw De;U(1,0)}}function kw(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe){var De=j();try{Ad(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe)}catch(Le){if(M(De),Le!==Le+0)throw Le;U(1,0)}}function Ew(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye){var xe=j();try{Ll(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye)}catch(De){if(M(xe),De!==De+0)throw De;U(1,0)}}function Pw(r,i,a,s,l,d,f,h,y,v,$,I,P){var D=j();try{Vl(r,i,a,s,l,d,f,h,y,v,$,I,P)}catch(H){if(M(D),H!==H+0)throw H;U(1,0)}}function Ow(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe){var ye=j();try{Wl(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe)}catch(xe){if(M(ye),xe!==xe+0)throw xe;U(1,0)}}function zw(r,i,a,s,l,d,f,h){var y=j();try{Hl(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;U(1,0)}}function Bw(r,i,a,s,l,d,f,h,y,v,$){var I=j();try{Fl(r,i,a,s,l,d,f,h,y,v,$)}catch(P){if(M(I),P!==P+0)throw P;U(1,0)}}function Dw(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe,De){var Le=j();try{return ql(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H,Z,J,ee,oe,ye,xe,De)}catch(at){if(M(Le),at!==at+0)throw at;U(1,0)}}function jw(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H){var Z=j();try{Kl(r,i,a,s,l,d,f,h,y,v,$,I,P,D,H)}catch(J){if(M(Z),J!==J+0)throw J;U(1,0)}}function Mw(r,i,a,s,l,d,f){var h=j();try{Jl(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function Rw(r,i,a,s,l){var d=j();try{Zl(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function Uw(r,i,a,s,l,d,f,h,y,v,$,I){var P=j();try{Ql(r,i,a,s,l,d,f,h,y,v,$,I)}catch(D){if(M(P),D!==D+0)throw D;U(1,0)}}function Nw(r,i,a,s,l,d,f){var h=j();try{Yl(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function Vw(r,i,a,s,l){var d=j();try{Xl(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function Ww(r,i,a,s,l,d,f,h,y,v,$,I){var P=j();try{Eu(r,i,a,s,l,d,f,h,y,v,$,I)}catch(D){if(M(P),D!==D+0)throw D;U(1,0)}}function Lw(r,i,a,s,l,d,f){var h=j();try{return rd(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function Gw(r,i,a,s,l){var d=j();try{nd(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function Hw(r,i,a,s,l,d,f){var h=j();try{id(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function Fw(r,i,a,s,l,d,f,h,y,v,$){var I=j();try{od(r,i,a,s,l,d,f,h,y,v,$)}catch(P){if(M(I),P!==P+0)throw P;U(1,0)}}function qw(r,i,a,s,l){var d=j();try{return ad(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function Kw(r,i,a,s,l,d,f,h){var y=j();try{sd(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;U(1,0)}}function Jw(r,i,a,s,l,d){var f=j();try{return ud(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function Zw(r,i,a,s){var l=j();try{ld(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;U(1,0)}}function Qw(r,i,a,s,l,d,f,h){var y=j();try{dd(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;U(1,0)}}function Yw(r,i,a,s,l,d,f){var h=j();try{cd(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function Xw(r,i,a,s,l){var d=j();try{pd(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function ev(r,i,a,s,l){var d=j();try{return td(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function tv(r,i,a,s){var l=j();try{return ed(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;U(1,0)}}function rv(r,i,a,s){var l=j();try{return kd(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;U(1,0)}}function nv(r,i,a,s,l){var d=j();try{fd(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function iv(r,i,a,s){var l=j();try{return ns(r,i,a,s)}catch(d){if(M(l),d!==d+0)throw d;U(1,0)}}function ov(r,i,a,s,l,d){var f=j();try{return md(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function av(r,i,a,s,l){var d=j();try{return hd(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function sv(r,i,a,s,l){var d=j();try{return gd(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function uv(r,i,a,s,l,d,f){var h=j();try{return yd(r,i,a,s,l,d,f)}catch(y){if(M(h),y!==y+0)throw y;U(1,0)}}function lv(r,i,a,s,l,d,f,h,y,v,$,I){var P=j();try{bd(r,i,a,s,l,d,f,h,y,v,$,I)}catch(D){if(M(P),D!==D+0)throw D;U(1,0)}}function dv(r,i,a,s,l){var d=j();try{return wd(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function cv(r,i,a,s,l,d,f,h,y,v,$,I,P){var D=j();try{_d(r,i,a,s,l,d,f,h,y,v,$,I,P)}catch(H){if(M(D),H!==H+0)throw H;U(1,0)}}function pv(r,i,a,s,l,d,f,h){var y=j();try{return vd(r,i,a,s,l,d,f,h)}catch(v){if(M(y),v!==v+0)throw v;U(1,0)}}function fv(r,i,a,s,l,d){var f=j();try{return xd(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function mv(r,i,a,s,l){var d=j();try{return $d(r,i,a,s,l)}catch(f){if(M(d),f!==f+0)throw f;U(1,0)}}function hv(r){var i=j();try{return gs(r)}catch(a){if(M(i),a!==a+0)throw a;return U(1,0),0n}}function gv(r,i,a,s,l,d){var f=j();try{return hs(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}function yv(r,i,a,s,l,d){var f=j();try{return ps(r,i,a,s,l,d)}catch(h){if(M(f),h!==h+0)throw h;U(1,0)}}return n.stackSave=j,n.stackRestore=M,n.stackAlloc=zn,n.setValue=function(r,i,a="i8"){switch(a.endsWith("*")&&(a="*"),a){case"i1":case"i8":me()[r>>>0]=i;break;case"i16":je()[r>>>1>>>0]=i;break;case"i32":R()[r>>>2>>>0]=i;break;case"i64":re[r>>>3]=BigInt(i);break;case"float":Ce()[r>>>2>>>0]=i;break;case"double":Ne()[r>>>3>>>0]=i;break;case"*":V()[r>>>2>>>0]=i;break;default:pt(`invalid type for setValue: ${a}`)}},n.getValue=function(r,i="i8"){switch(i.endsWith("*")&&(i="*"),i){case"i1":case"i8":return me()[r>>>0];case"i16":return je()[r>>>1>>>0];case"i32":return R()[r>>>2>>>0];case"i64":return re[r>>>3];case"float":return Ce()[r>>>2>>>0];case"double":return Ne()[r>>>3>>>0];case"*":return V()[r>>>2>>>0];default:pt(`invalid type for getValue: ${i}`)}},n.UTF8ToString=Ue,n.stringToUTF8=Et,n.lengthBytesUTF8=kt,function r(){if(Mt>0)Zt=r;else{if(m)return t(n),void ot();(i=>{for(;i.length>0;)i.shift()(n)})(_o),Mt>0?Zt=r:(n.calledRun=!0,ve||(ot(),t(n)))}}(),n.PTR_SIZE=4,n.webgpuInit=r=>{let i=new WeakMap,a=1,s=r,l,d;n.webgpuRegisterDevice=y=>{if(d!==void 0)throw new Error("another WebGPU EP inference session is being created.");if(y){let v=i.get(y);if(!v){let $=fa(0),I=G.importJsDevice(y,$);v=[a++,$,I],i.set(y,v)}return l=y,d=v[0],v}return l=void 0,void(d=0)};let f=new Map;n.webgpuOnCreateSession=y=>{if(d===void 0)return;let v=d;if(d=void 0,y){let $=Kn(v);if(f.set(y,$),v===0){let I=l??G.getJsObject($);s(I)}}l=void 0},n.webgpuOnReleaseSession=y=>{f.delete(y)};let h=Symbol("gpuBufferMetadata");n.webgpuRegisterBuffer=(y,v,$)=>{if($)return y[h]=[$,NaN],$;{let I=y[h];if(I)return I[1]++,I[0];let P=f.get(v);if(P===void 0)throw new Error("Invalid session handle passed to webgpuRegisterBuffer");let D=G.importJsBuffer(y,P);return y[h]=[D,1],D}},n.webgpuUnregisterBuffer=y=>{let v=y[h];if(!v)throw new Error("Buffer is not registered");v[1]--,v[1]===0&&(pa(v[0]),delete y[h])},n.webgpuGetBuffer=y=>G.getJsObject(y),n.webgpuCreateDownloader=(y,v,$)=>{let I=f.get($);if(I===void 0)throw new Error("Invalid session handle passed to webgpuRegisterBuffer");let P=y,D=G.getJsObject(I),H=v,Z=16*Math.ceil(Number(H)/16);return async()=>{let J={size:Z,usage:9},ee=D.createBuffer(J);try{let oe=D.createCommandEncoder();return oe.copyBufferToBuffer(P,0,ee,0,Z),D.queue.submit([oe.finish()]),await ee.mapAsync(GPUMapMode.READ),ee.getMappedRange().slice(0,H)}finally{ee.destroy()}}},n.webgpuUploadExternalBuffer=(y,v)=>{let $=v.buffer,I=v.byteOffset,P=v.byteLength,D=16*Math.ceil(Number(P)/16),H=G.getJsObject(y);if(!l){let ye=Kn(d);l=G.getJsObject(ye)}let Z={mappedAtCreation:!0,size:D,usage:6},J=l.createBuffer(Z),ee=J.getMappedRange();new Uint8Array(ee).set(new Uint8Array($,I,P)),J.unmap();let oe=l.createCommandEncoder();oe.copyBufferToBuffer(J,0,H,0,D),l.queue.submit([oe.finish()]),J.destroy()}},u}),Tv=fc,Iv=globalThis.self?.name?.startsWith("em-pthread");Iv&&fc()});var bc,Ci,Av,et,_c,$i,kv,Ev,wc,Pv,gc,vc,yc,xc,Nr=X(()=>{"use strict";Ur();bc=typeof location>"u"?void 0:location.origin,Ci=import.meta.url>"file:"&&import.meta.url<"file;",Av=()=>{if(!!1){if(Ci){let e=URL;return new URL(new e("ort.webgpu.bundle.min.mjs",import.meta.url).href,bc).href}return import.meta.url}},et=Av(),_c=()=>{if(et&&!et.startsWith("blob:"))return et.substring(0,et.lastIndexOf("/")+1)},$i=(e,t)=>{try{let o=t??et;return(o?new URL(e,o):new URL(e)).origin===bc}catch{return!1}},kv=(e,t)=>{let o=t??et;try{return(o?new URL(e,o):new URL(e)).href}catch{return}},Ev=(e,t)=>`${t??"./"}${e}`,wc=async e=>{let o=await(await fetch(e,{credentials:"same-origin"})).blob();return URL.createObjectURL(o)},Pv=async e=>(await import(/*webpackIgnore:true*/e)).default,gc=(pc(),pr(cc)).default,vc=async()=>{if(!et)throw new Error("Failed to load proxy worker: cannot determine the script source URL.");if($i(et))return[void 0,gc()];let e=await wc(et);return[e,gc(e)]},yc=(hc(),pr(mc)).default,xc=async(e,t,o)=>{if(!e&&!t&&yc&&et&&$i(et))return[void 0,yc];{let n="ort-wasm-simd-threaded.jsep.mjs",u=e??kv(n,t),c=!!1&&o&&u&&!$i(u,t),p=c?await wc(u):u??Ev(n,t);return[c?p:void 0,await Pv(p)]}}});var Si,Ti,Jr,$c,Ov,zv,Bv,Vr,Oe,Ot=X(()=>{"use strict";Nr();Ti=!1,Jr=!1,$c=!1,Ov=()=>{if(typeof SharedArrayBuffer>"u")return!1;try{return typeof MessageChannel<"u"&&new MessageChannel().port1.postMessage(new SharedArrayBuffer(1)),WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,5,4,1,3,1,1,10,11,1,9,0,65,0,254,16,2,0,26,11]))}catch{return!1}},zv=()=>{try{return WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,10,30,1,28,0,65,0,253,15,253,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,253,186,1,26,11]))}catch{return!1}},Bv=()=>{try{return WebAssembly.validate(new Uint8Array([0,97,115,109,1,0,0,0,1,5,1,96,0,1,123,3,2,1,0,10,19,1,17,0,65,1,253,15,65,2,253,15,65,3,253,15,253,147,2,11]))}catch{return!1}},Vr=async e=>{if(Ti)return Promise.resolve();if(Jr)throw new Error("multiple calls to 'initializeWebAssembly()' detected.");if($c)throw new Error("previous call to 'initializeWebAssembly()' failed.");Jr=!0;let t=e.initTimeout,o=e.numThreads;if(e.simd!==!1){if(e.simd==="relaxed"){if(!Bv())throw new Error("Relaxed WebAssembly SIMD is not supported in the current environment.")}else if(!zv())throw new Error("WebAssembly SIMD is not supported in the current environment.")}let n=Ov();o>1&&!n&&(typeof self<"u"&&!self.crossOriginIsolated&&console.warn("env.wasm.numThreads is set to "+o+", but this will not work unless you enable crossOriginIsolated mode. See https://web.dev/cross-origin-isolation-guide/ for more info."),console.warn("WebAssembly multi-threading is not supported in the current environment. Falling back to single-threading."),e.numThreads=o=1);let u=e.wasmPaths,c=typeof u=="string"?u:void 0,p=u?.mjs,m=p?.href??p,g=u?.wasm,b=g?.href??g,_=e.wasmBinary,[w,x]=await xc(m,c,o>1),S=!1,C=[];if(t>0&&C.push(new Promise(T=>{setTimeout(()=>{S=!0,T()},t)})),C.push(new Promise((T,z)=>{let k={numThreads:o};if(_)k.wasmBinary=_;else if(b||c)k.locateFile=A=>b??c+A;else if(m&&m.indexOf("blob:")!==0)k.locateFile=A=>new URL(A,m).href;else if(w){let A=_c();A&&(k.locateFile=O=>A+O)}x(k).then(A=>{Jr=!1,Ti=!0,Si=A,T(),w&&URL.revokeObjectURL(w)},A=>{Jr=!1,$c=!0,z(A)})})),await Promise.race(C),S)throw new Error(`WebAssembly backend initializing failed due to timeout: ${t}ms`)},Oe=()=>{if(Ti&&Si)return Si;throw new Error("WebAssembly is not initialized yet.")}});var tt,hr,Ee,Zr=X(()=>{"use strict";Ot();tt=(e,t)=>{let o=Oe(),n=o.lengthBytesUTF8(e)+1,u=o._malloc(n);return o.stringToUTF8(e,u,n),t.push(u),u},hr=(e,t,o,n)=>{if(typeof e=="object"&&e!==null){if(o.has(e))throw new Error("Circular reference in options");o.add(e)}Object.entries(e).forEach(([u,c])=>{let p=t?t+u:u;if(typeof c=="object")hr(c,p+".",o,n);else if(typeof c=="string"||typeof c=="number")n(p,c.toString());else if(typeof c=="boolean")n(p,c?"1":"0");else throw new Error(`Can't handle extra config type: ${typeof c}`)})},Ee=e=>{let t=Oe(),o=t.stackSave();try{let n=t.PTR_SIZE,u=t.stackAlloc(2*n);t._OrtGetLastError(u,u+n);let c=Number(t.getValue(u,n===4?"i32":"i64")),p=t.getValue(u+n,"*"),m=p?t.UTF8ToString(p):"";throw new Error(`${e} ERROR_CODE: ${c}, ERROR_MESSAGE: ${m}`)}finally{t.stackRestore(o)}}});var Cc,Sc=X(()=>{"use strict";Ot();Zr();Cc=e=>{let t=Oe(),o=0,n=[],u=e||{};try{if(e?.logSeverityLevel===void 0)u.logSeverityLevel=2;else if(typeof e.logSeverityLevel!="number"||!Number.isInteger(e.logSeverityLevel)||e.logSeverityLevel<0||e.logSeverityLevel>4)throw new Error(`log serverity level is not valid: ${e.logSeverityLevel}`);if(e?.logVerbosityLevel===void 0)u.logVerbosityLevel=0;else if(typeof e.logVerbosityLevel!="number"||!Number.isInteger(e.logVerbosityLevel))throw new Error(`log verbosity level is not valid: ${e.logVerbosityLevel}`);e?.terminate===void 0&&(u.terminate=!1);let c=0;return e?.tag!==void 0&&(c=tt(e.tag,n)),o=t._OrtCreateRunOptions(u.logSeverityLevel,u.logVerbosityLevel,!!u.terminate,c),o===0&&Ee("Can't create run options."),e?.extra!==void 0&&hr(e.extra,"",new WeakSet,(p,m)=>{let g=tt(p,n),b=tt(m,n);t._OrtAddRunConfigEntry(o,g,b)!==0&&Ee(`Can't set a run config entry: ${p} - ${m}.`)}),[o,n]}catch(c){throw o!==0&&t._OrtReleaseRunOptions(o),n.forEach(p=>t._free(p)),c}}});var Dv,jv,Mv,Qr,Rv,Tc,Ic=X(()=>{"use strict";Ot();Zr();Dv=e=>{switch(e){case"disabled":return 0;case"basic":return 1;case"extended":return 2;case"all":return 99;default:throw new Error(`unsupported graph optimization level: ${e}`)}},jv=e=>{switch(e){case"sequential":return 0;case"parallel":return 1;default:throw new Error(`unsupported execution mode: ${e}`)}},Mv=e=>{e.extra||(e.extra={}),e.extra.session||(e.extra.session={});let t=e.extra.session;t.use_ort_model_bytes_directly||(t.use_ort_model_bytes_directly="1"),e.executionProviders&&e.executionProviders.some(o=>(typeof o=="string"?o:o.name)==="webgpu")&&(e.enableMemPattern=!1)},Qr=(e,t,o,n)=>{let u=tt(t,n),c=tt(o,n);Oe()._OrtAddSessionConfigEntry(e,u,c)!==0&&Ee(`Can't set a session config entry: ${t} - ${o}.`)},Rv=async(e,t,o)=>{for(let n of t){let u=typeof n=="string"?n:n.name,c=[];switch(u){case"webnn":if(u="WEBNN",typeof n!="string"){let w=n?.deviceType;w&&Qr(e,"deviceType",w,o)}break;case"webgpu":if(u="JS",typeof n!="string"){let _=n;if(_?.preferredLayout){if(_.preferredLayout!=="NCHW"&&_.preferredLayout!=="NHWC")throw new Error(`preferredLayout must be either 'NCHW' or 'NHWC': ${_.preferredLayout}`);Qr(e,"preferredLayout",_.preferredLayout,o)}}break;case"wasm":case"cpu":continue;default:throw new Error(`not supported execution provider: ${u}`)}let p=tt(u,o),m=c.length,g=0,b=0;if(m>0){g=Oe()._malloc(m*Oe().PTR_SIZE),o.push(g),b=Oe()._malloc(m*Oe().PTR_SIZE),o.push(b);for(let _=0;_{let t=Oe(),o=0,n=[],u=e||{};Mv(u);try{let c=Dv(u.graphOptimizationLevel??"all"),p=jv(u.executionMode??"sequential"),m=typeof u.logId=="string"?tt(u.logId,n):0,g=u.logSeverityLevel??2;if(!Number.isInteger(g)||g<0||g>4)throw new Error(`log serverity level is not valid: ${g}`);let b=u.logVerbosityLevel??0;if(!Number.isInteger(b)||b<0||b>4)throw new Error(`log verbosity level is not valid: ${b}`);let _=typeof u.optimizedModelFilePath=="string"?tt(u.optimizedModelFilePath,n):0;if(o=t._OrtCreateSessionOptions(c,!!u.enableCpuMemArena,!!u.enableMemPattern,p,!!u.enableProfiling,0,m,g,b,_),o===0&&Ee("Can't create session options."),u.executionProviders&&await Rv(o,u.executionProviders,n),u.enableGraphCapture!==void 0){if(typeof u.enableGraphCapture!="boolean")throw new Error(`enableGraphCapture must be a boolean value: ${u.enableGraphCapture}`);Qr(o,"enableGraphCapture",u.enableGraphCapture.toString(),n)}if(u.freeDimensionOverrides)for(let[w,x]of Object.entries(u.freeDimensionOverrides)){if(typeof w!="string")throw new Error(`free dimension override name must be a string: ${w}`);if(typeof x!="number"||!Number.isInteger(x)||x<0)throw new Error(`free dimension override value must be a non-negative integer: ${x}`);let S=tt(w,n);t._OrtAddFreeDimensionOverride(o,S,x)!==0&&Ee(`Can't set a free dimension override: ${w} - ${x}.`)}return u.extra!==void 0&&hr(u.extra,"",new WeakSet,(w,x)=>{Qr(o,w,x,n)}),[o,n]}catch(c){throw o!==0&&t._OrtReleaseSessionOptions(o)!==0&&Ee("Can't release session options."),n.forEach(p=>t._free(p)),c}}});var zt,gt,Bt,er,gr,Yr,Xr,Ii,ce=X(()=>{"use strict";zt=e=>{switch(e){case"int8":return 3;case"uint8":return 2;case"bool":return 9;case"int16":return 5;case"uint16":return 4;case"int32":return 6;case"uint32":return 12;case"float16":return 10;case"float32":return 1;case"float64":return 11;case"string":return 8;case"int64":return 7;case"uint64":return 13;case"int4":return 22;case"uint4":return 21;default:throw new Error(`unsupported data type: ${e}`)}},gt=e=>{switch(e){case 3:return"int8";case 2:return"uint8";case 9:return"bool";case 5:return"int16";case 4:return"uint16";case 6:return"int32";case 12:return"uint32";case 10:return"float16";case 1:return"float32";case 11:return"float64";case 8:return"string";case 7:return"int64";case 13:return"uint64";case 22:return"int4";case 21:return"uint4";default:throw new Error(`unsupported data type: ${e}`)}},Bt=(e,t)=>{let o=[-1,4,1,1,2,2,4,8,-1,1,2,8,4,8,-1,-1,-1,-1,-1,-1,-1,.5,.5][e],n=typeof t=="number"?t:t.reduce((u,c)=>u*c,1);return o>0?Math.ceil(n*o):void 0},er=e=>{switch(e){case"float16":return typeof Float16Array<"u"&&Float16Array.from?Float16Array:Uint16Array;case"float32":return Float32Array;case"uint8":return Uint8Array;case"int8":return Int8Array;case"uint16":return Uint16Array;case"int16":return Int16Array;case"int32":return Int32Array;case"bool":return Uint8Array;case"float64":return Float64Array;case"uint32":return Uint32Array;case"int64":return BigInt64Array;case"uint64":return BigUint64Array;default:throw new Error(`unsupported type: ${e}`)}},gr=e=>{switch(e){case"verbose":return 0;case"info":return 1;case"warning":return 2;case"error":return 3;case"fatal":return 4;default:throw new Error(`unsupported logging level: ${e}`)}},Yr=e=>e==="float32"||e==="float16"||e==="int32"||e==="int64"||e==="uint32"||e==="uint8"||e==="bool"||e==="uint4"||e==="int4",Xr=e=>e==="float32"||e==="float16"||e==="int32"||e==="int64"||e==="uint32"||e==="uint64"||e==="int8"||e==="uint8"||e==="bool"||e==="uint4"||e==="int4",Ii=e=>{switch(e){case"none":return 0;case"cpu":return 1;case"cpu-pinned":return 2;case"texture":return 3;case"gpu-buffer":return 4;case"ml-tensor":return 5;default:throw new Error(`unsupported data location: ${e}`)}}});var yr,Ai=X(()=>{"use strict";Ur();yr=async e=>{if(typeof e=="string")if(!1)try{let{readFile:t}=mi("node:fs/promises");return new Uint8Array(await t(e))}catch(t){if(t.code==="ERR_FS_FILE_TOO_LARGE"){let{createReadStream:o}=mi("node:fs"),n=o(e),u=[];for await(let c of n)u.push(c);return new Uint8Array(Buffer.concat(u))}throw t}else{let t=await fetch(e);if(!t.ok)throw new Error(`failed to load external data file: ${e}`);let o=t.headers.get("Content-Length"),n=o?parseInt(o,10):0;if(n<1073741824)return new Uint8Array(await t.arrayBuffer());{if(!t.body)throw new Error(`failed to load external data file: ${e}, no response body.`);let u=t.body.getReader(),c;try{c=new ArrayBuffer(n)}catch(m){if(m instanceof RangeError){let g=Math.ceil(n/65536);c=new WebAssembly.Memory({initial:g,maximum:g}).buffer}else throw m}let p=0;for(;;){let{done:m,value:g}=await u.read();if(m)break;let b=g.byteLength;new Uint8Array(c,p,b).set(g),p+=b}return new Uint8Array(c,0,n)}}else return e instanceof Blob?new Uint8Array(await e.arrayBuffer()):e instanceof Uint8Array?e:new Uint8Array(e)}});var Uv,Nv,Ac,kc,en,Vv,$e,yt=X(()=>{"use strict";ce();Uv=["V","I","W","E","F"],Nv=(e,t)=>{console.log(`[${Uv[e]},${new Date().toISOString()}]${t}`)},en=(e,t)=>{Ac=e,kc=t},Vv=(e,t)=>{let o=gr(e),n=gr(Ac);o>=n&&Nv(o,typeof t=="function"?t():t)},$e=(...e)=>{kc&&Vv(...e)}});var ki,bt,L,Ht,tn,Ec,Pc,be=X(()=>{"use strict";ki=class{static calcMatMulShape(t,o){return t[1]!==o[0]?void 0:[t[0],o[1]]}},bt=class{static calcShape(t,o,n=!1){let u=t.length,c=o.length;if(u===0)return o;if(c===0)return t;let p=Math.max(t.length,o.length),m=new Array(p);if(n){if(u<2||c<2)return;let g=ki.calcMatMulShape([t[u-2],t[u-1]],[o[c-2],o[c-1]]);if(g===void 0)return;[m[p-2],m[p-1]]=g}for(let g=n?3:1;g<=p;g++){let b=u-g<0?1:t[u-g],_=c-g<0?1:o[c-g];if(b!==_&&b>1&&_>1)return;let w=Math.max(b,_);if(b&&_)m[p-g]=Math.max(b,_);else{if(w>1)return;m[p-g]=0}}return m}static isValidBroadcast(t,o){let n=t.length,u=o.length;if(n>u)return!1;for(let c=1;c<=n;c++)if(t[n-c]!==1&&t[n-c]!==o[u-c])return!1;return!0}},L=class e{static size(t){return e.getSizeFromDimensionRange(t,0,t.length)}static convertShape(t,o=4){let n=t.length;if(n===0)return[];let u=new Array(n),c=n-1;for(;c>=0;){if(t[c]%o===0){u[c]=t[c]/o;break}if(o%t[c]!==0)throw new Error("cannot convert shape");u[c]=1,o/=t[c],c--}for(c--;c>=0;c--)u[c]=t[c];return u}static sizeFromDimension(t,o){if(o<0||o>t.length)throw new Error(`invalid dimension of ${o} for sizeFromDimension as Tensor has ${t.length} dimensions.`);return e.getSizeFromDimensionRange(t,o,t.length)}static sizeToDimension(t,o){if(o<0||o>t.length)throw new Error(`invalid dimension of ${o} for sizeToDimension as Tensor has ${t.length} dimensions.`);return e.getSizeFromDimensionRange(t,0,o)}static getSizeFromDimensionRange(t,o,n){let u=1;for(let c=o;c=0;--u)n[u]=n[u+1]*t[u+1];return n}static normalizeAxis(t,o){if(t<-o&&t>=o)throw new Error("unsupported axis for this operation.");return t<0?t+o:t}static normalizeAxes(t,o){return t.map(n=>this.normalizeAxis(n,o??t.length))}static sortBasedOnPerm(t,o){return o?o.map(n=>t[n]):t.slice().reverse()}static padShape(t,o){let n=t.length;return t.map((u,c)=>u+o[c]+o[c+n])}static areEqual(t,o){return t.length!==o.length?!1:t.every((n,u)=>n===o[u])}},Ht=class e{static adjustPoolAttributes(t,o,n,u,c,p){if(!t&&n.length!==o.length-2)throw new Error("length of specified kernel shapes should be 2 less than length of input dimensions");if(t)for(let m=0;m=n.length?n.push(o[m+2]):n[m]=o[m+2];for(let m=0;m=n[m]||p[m+n.length]>=n[m])throw new Error("pads should be smaller than kernel")}}static adjustPadsBasedOnAutoPad(t,o,n,u,c,p,m){if(m){if(c.length!==2*(t.length-2))throw new Error("length of pads should be twice the length of data dimensions");if(o.length!==t.length-2)throw new Error("length of strides should be the length of data dimensions");if(u.length!==t.length-2)throw new Error("length of kernel shapes should be the length of data dimensions");for(let g=0;g{"use strict";ce();rn=(e,t)=>new(er(t))(e)});var zc,Oi,Bc,Wv,Oc,Lv,Dc,nn,on,Pi,jc,Mc=X(()=>{"use strict";ce();yt();zc=new Map([["float32",32],["float16",16],["int32",32],["uint32",32],["int64",64],["uint64",64],["int8",8],["uint8",8],["int4",4],["uint4",4]]),Oi=(e,t)=>{if(t==="int32")return e;let o=zc.get(t);if(!o)throw new Error(`WebNN backend does not support data type: ${t}`);let n=o/8;if(e.byteLength%n!==0)throw new Error(`Invalid Uint8Array length - must be a multiple of ${n}.`);let u=e.byteLength/n,c=new(er(t))(e.buffer,e.byteOffset,u);switch(t){case"int64":case"uint64":{let p=new Int32Array(u);for(let m=0;m2147483647n||g<-2147483648n)throw new Error("Can not convert int64 data to int32 - value out of range.");p[m]=Number(g)}return new Uint8Array(p.buffer)}case"int8":case"uint8":case"uint32":{if(t==="uint32"&&c.some(m=>m>2147483647))throw new Error("Can not convert uint32 data to int32 - value out of range.");let p=Int32Array.from(c,Number);return new Uint8Array(p.buffer)}default:throw new Error(`Unsupported data conversion from ${t} to 'int32'`)}},Bc=(e,t)=>{if(t==="int32")return e;if(e.byteLength%4!==0)throw new Error("Invalid Uint8Array length - must be a multiple of 4 (int32).");let o=e.byteLength/4,n=new Int32Array(e.buffer,e.byteOffset,o);switch(t){case"int64":{let u=BigInt64Array.from(n,BigInt);return new Uint8Array(u.buffer)}case"uint64":{if(n.some(c=>c<0))throw new Error("Can not convert int32 data to uin64 - negative value found.");let u=BigUint64Array.from(n,BigInt);return new Uint8Array(u.buffer)}case"int8":{if(n.some(c=>c<-128||c>127))throw new Error("Can not convert int32 data to int8 - value out of range.");let u=Int8Array.from(n,Number);return new Uint8Array(u.buffer)}case"uint8":{if(n.some(u=>u<0||u>255))throw new Error("Can not convert int32 data to uint8 - value out of range.");return Uint8Array.from(n,Number)}case"uint32":{if(n.some(c=>c<0))throw new Error("Can not convert int32 data to uint32 - negative value found.");let u=Uint32Array.from(n,Number);return new Uint8Array(u.buffer)}default:throw new Error(`Unsupported data conversion from 'int32' to ${t}`)}},Wv=1,Oc=()=>Wv++,Lv=new Map([["int8","int32"],["uint8","int32"],["uint32","int32"],["int64","int32"]]),Dc=(e,t)=>{let o=zc.get(e);if(!o)throw new Error(`WebNN backend does not support data type: ${e}`);return t.length>0?Math.ceil(t.reduce((n,u)=>n*u)*o/8):0},nn=class{constructor(t){this.isDataConverted=!1;let{sessionId:o,context:n,tensor:u,dataType:c,shape:p,fallbackDataType:m}=t;this.sessionId=o,this.mlContext=n,this.mlTensor=u,this.dataType=c,this.tensorShape=p,this.fallbackDataType=m}get tensor(){return this.mlTensor}get type(){return this.dataType}get fallbackType(){return this.fallbackDataType}get shape(){return this.tensorShape}get byteLength(){return Dc(this.dataType,this.tensorShape)}destroy(){$e("verbose",()=>"[WebNN] TensorWrapper.destroy"),this.mlTensor.destroy()}write(t){this.mlContext.writeTensor(this.mlTensor,t)}async read(t){if(this.fallbackDataType){let o=await this.mlContext.readTensor(this.mlTensor),n=Bc(new Uint8Array(o),this.dataType);if(t){(t instanceof ArrayBuffer?new Uint8Array(t):new Uint8Array(t.buffer,t.byteOffset,t.byteLength)).set(n);return}else return n.buffer}else return t?this.mlContext.readTensor(this.mlTensor,t):this.mlContext.readTensor(this.mlTensor)}canReuseTensor(t,o,n){return this.mlContext===t&&this.dataType===o&&this.tensorShape.length===n.length&&this.tensorShape.every((u,c)=>u===n[c])}setIsDataConverted(t){this.isDataConverted=t}},on=class{constructor(t,o){this.tensorManager=t;this.wrapper=o}get tensorWrapper(){return this.wrapper}releaseTensor(){this.tensorWrapper&&(this.tensorManager.releaseTensor(this.tensorWrapper),this.wrapper=void 0)}async ensureTensor(t,o,n,u){let c=this.tensorManager.getMLContext(t),p;if(!c.opSupportLimits().input.dataTypes.includes(o)){if(p=Lv.get(o),!p||!c.opSupportLimits().input.dataTypes.includes(p))throw new Error(`WebNN backend does not support data type: ${o}`);$e("verbose",()=>`[WebNN] TensorIdTracker.ensureTensor: fallback dataType from ${o} to ${p}`)}if(this.wrapper){if(this.wrapper.canReuseTensor(c,o,n))return this.wrapper.tensor;if(u){if(this.wrapper.byteLength!==Dc(o,n))throw new Error("Unable to copy data to tensor with different size.");this.activeUpload=new Uint8Array(await this.wrapper.read())}this.tensorManager.releaseTensor(this.wrapper)}let m=typeof MLTensorUsage>"u"?void 0:MLTensorUsage.READ|MLTensorUsage.WRITE;return this.wrapper=await this.tensorManager.getCachedTensor(t,o,n,m,!0,!0,p),u&&this.activeUpload&&(this.wrapper.write(this.activeUpload),this.activeUpload=void 0),this.wrapper.tensor}upload(t){let o=t;if(this.wrapper){if(this.wrapper.fallbackType)if(this.wrapper.fallbackType==="int32")o=Oi(t,this.wrapper.type),this.wrapper.setIsDataConverted(!0);else throw new Error(`Unsupported fallback data type: ${this.wrapper.fallbackType}`);if(t.byteLength===this.wrapper.byteLength){this.wrapper.write(o);return}else $e("verbose",()=>"Data size does not match tensor size. Releasing tensor."),this.releaseTensor()}this.activeUpload?this.activeUpload.set(o):this.activeUpload=new Uint8Array(o)}async download(t){if(this.activeUpload){let o=this.wrapper?.isDataConverted?Bc(this.activeUpload,this.wrapper?.type):this.activeUpload;if(t){t instanceof ArrayBuffer?new Uint8Array(t).set(o):new Uint8Array(t.buffer,t.byteOffset,t.byteLength).set(o);return}else return o.buffer}if(!this.wrapper)throw new Error("Tensor has not been created.");return t?this.wrapper.read(t):this.wrapper.read()}},Pi=class{constructor(t){this.backend=t;this.tensorTrackersById=new Map;this.freeTensors=[];this.externalTensors=new Set}getMLContext(t){let o=this.backend.getMLContext(t);if(!o)throw new Error("MLContext not found for session.");return o}reserveTensorId(){let t=Oc();return this.tensorTrackersById.set(t,new on(this)),t}releaseTensorId(t){let o=this.tensorTrackersById.get(t);o&&(this.tensorTrackersById.delete(t),o.tensorWrapper&&this.releaseTensor(o.tensorWrapper))}async ensureTensor(t,o,n,u,c){$e("verbose",()=>`[WebNN] TensorManager.ensureTensor {tensorId: ${o}, dataType: ${n}, shape: ${u}, copyOld: ${c}}`);let p=this.tensorTrackersById.get(o);if(!p)throw new Error("Tensor not found.");return p.ensureTensor(t,n,u,c)}upload(t,o){let n=this.tensorTrackersById.get(t);if(!n)throw new Error("Tensor not found.");n.upload(o)}async download(t,o){$e("verbose",()=>`[WebNN] TensorManager.download {tensorId: ${t}, dstBuffer: ${o?.byteLength}}`);let n=this.tensorTrackersById.get(t);if(!n)throw new Error("Tensor not found.");return n.download(o)}releaseTensorsForSession(t){for(let o of this.freeTensors)o.sessionId===t&&o.destroy();this.freeTensors=this.freeTensors.filter(o=>o.sessionId!==t)}registerTensor(t,o,n,u){let c=this.getMLContext(t),p=Oc(),m=new nn({sessionId:t,context:c,tensor:o,dataType:n,shape:u});return this.tensorTrackersById.set(p,new on(this,m)),this.externalTensors.add(m),p}async getCachedTensor(t,o,n,u,c,p,m){let g=this.getMLContext(t);for(let[_,w]of this.freeTensors.entries())if(w.canReuseTensor(g,o,n)){$e("verbose",()=>`[WebNN] Reusing tensor {dataType: ${o}, ${m?`fallbackDataType: ${m},`:""} shape: ${n}`);let x=this.freeTensors.splice(_,1)[0];return x.sessionId=t,x}$e("verbose",()=>`[WebNN] MLContext.createTensor {dataType: ${o}, ${m?`fallbackDataType: ${m},`:""} shape: ${n}}`);let b=await g.createTensor({dataType:m??o,shape:n,dimensions:n,usage:u,writable:c,readable:p});return new nn({sessionId:t,context:g,tensor:b,dataType:o,shape:n,fallbackDataType:m})}releaseTensor(t){this.externalTensors.has(t)&&this.externalTensors.delete(t),this.freeTensors.push(t)}},jc=(...e)=>new Pi(...e)});var an,Gv,sn,Rc=X(()=>{"use strict";ce();Ot();Ei();Mc();yt();an=new Map([[1,"float32"],[10,"float16"],[6,"int32"],[12,"uint32"],[7,"int64"],[13,"uint64"],[22,"int4"],[21,"uint4"],[3,"int8"],[2,"uint8"],[9,"uint8"]]),Gv=(e,t)=>{if(e===t)return!0;if(e===void 0||t===void 0)return!1;let o=Object.keys(e).sort(),n=Object.keys(t).sort();return o.length===n.length&&o.every((u,c)=>u===n[c]&&e[u]===t[u])},sn=class{constructor(t){this.tensorManager=jc(this);this.mlContextBySessionId=new Map;this.sessionIdsByMLContext=new Map;this.mlContextCache=[];this.sessionGraphInputs=new Map;this.sessionGraphOutputs=new Map;this.temporaryGraphInputs=[];this.temporaryGraphOutputs=[];this.temporarySessionTensorIds=new Map;en(t.logLevel,!!t.debug)}get currentSessionId(){if(this.activeSessionId===void 0)throw new Error("No active session");return this.activeSessionId}onRunStart(t){$e("verbose",()=>`[WebNN] onRunStart {sessionId: ${t}}`),this.activeSessionId=t}onRunEnd(t){$e("verbose",()=>`[WebNN] onRunEnd {sessionId: ${t}}`);let o=this.temporarySessionTensorIds.get(t);if(o){for(let n of o)$e("verbose",()=>`[WebNN] releasing temporary tensor {tensorId: ${n}}`),this.tensorManager.releaseTensorId(n);this.temporarySessionTensorIds.delete(t),this.activeSessionId=void 0}}async createMLContext(t){if(t instanceof GPUDevice){let n=this.mlContextCache.findIndex(u=>u.gpuDevice===t);if(n!==-1)return this.mlContextCache[n].mlContext;{let u=await navigator.ml.createContext(t);return this.mlContextCache.push({gpuDevice:t,mlContext:u}),u}}else if(t===void 0){let n=this.mlContextCache.findIndex(u=>u.options===void 0&&u.gpuDevice===void 0);if(n!==-1)return this.mlContextCache[n].mlContext;{let u=await navigator.ml.createContext();return this.mlContextCache.push({mlContext:u}),u}}let o=this.mlContextCache.findIndex(n=>Gv(n.options,t));if(o!==-1)return this.mlContextCache[o].mlContext;{let n=await navigator.ml.createContext(t);return this.mlContextCache.push({options:t,mlContext:n}),n}}registerMLContext(t,o){this.mlContextBySessionId.set(t,o);let n=this.sessionIdsByMLContext.get(o);n||(n=new Set,this.sessionIdsByMLContext.set(o,n)),n.add(t),this.temporaryGraphInputs.length>0&&(this.sessionGraphInputs.set(t,this.temporaryGraphInputs),this.temporaryGraphInputs=[]),this.temporaryGraphOutputs.length>0&&(this.sessionGraphOutputs.set(t,this.temporaryGraphOutputs),this.temporaryGraphOutputs=[])}onReleaseSession(t){this.sessionGraphInputs.delete(t),this.sessionGraphOutputs.delete(t);let o=this.mlContextBySessionId.get(t);if(!o)return;this.tensorManager.releaseTensorsForSession(t),this.mlContextBySessionId.delete(t);let n=this.sessionIdsByMLContext.get(o);if(n.delete(t),n.size===0){this.sessionIdsByMLContext.delete(o);let u=this.mlContextCache.findIndex(c=>c.mlContext===o);u!==-1&&this.mlContextCache.splice(u,1)}}getMLContext(t){return this.mlContextBySessionId.get(t)}reserveTensorId(){return this.tensorManager.reserveTensorId()}releaseTensorId(t){$e("verbose",()=>`[WebNN] releaseTensorId {tensorId: ${t}}`),this.tensorManager.releaseTensorId(t)}async ensureTensor(t,o,n,u,c){let p=an.get(n);if(!p)throw new Error(`Unsupported ONNX data type: ${n}`);return this.tensorManager.ensureTensor(t??this.currentSessionId,o,p,u,c)}async createTemporaryTensor(t,o,n){$e("verbose",()=>`[WebNN] createTemporaryTensor {onnxDataType: ${o}, shape: ${n}}`);let u=an.get(o);if(!u)throw new Error(`Unsupported ONNX data type: ${o}`);let c=this.tensorManager.reserveTensorId();await this.tensorManager.ensureTensor(t,c,u,n,!1);let p=this.temporarySessionTensorIds.get(t);return p?p.push(c):this.temporarySessionTensorIds.set(t,[c]),c}uploadTensor(t,o){if(!Oe().shouldTransferToMLTensor)throw new Error("Trying to upload to a MLTensor while shouldTransferToMLTensor is false");$e("verbose",()=>`[WebNN] uploadTensor {tensorId: ${t}, data: ${o.byteLength}}`),this.tensorManager.upload(t,o)}async downloadTensor(t,o){return this.tensorManager.download(t,o)}createMLTensorDownloader(t,o){return async()=>{let n=await this.tensorManager.download(t);return rn(n,o)}}registerMLTensor(t,o,n,u){let c=an.get(n);if(!c)throw new Error(`Unsupported ONNX data type: ${n}`);let p=this.tensorManager.registerTensor(t,o,c,u);return $e("verbose",()=>`[WebNN] registerMLTensor {tensor: ${o}, dataType: ${c}, dimensions: ${u}} -> {tensorId: ${p}}`),p}registerMLConstant(t,o,n,u,c,p,m=!1){if(!p)throw new Error("External mounted files are not available.");let g=t;t.startsWith("./")&&(g=t.substring(2));let b=p.get(g);if(!b)throw new Error(`File with name ${g} not found in preloaded files.`);if(o+n>b.byteLength)throw new Error("Out of bounds: data offset and length exceed the external file data size.");let _=b.slice(o,o+n).buffer,w;switch(c.dataType){case"float32":w=new Float32Array(_);break;case"float16":w=typeof Float16Array<"u"&&Float16Array.from?new Float16Array(_):new Uint16Array(_);break;case"int32":w=new Int32Array(_);break;case"uint32":w=new Uint32Array(_);break;case"int64":if(m){let x=Oi(new Uint8Array(_),"int64");w=new Int32Array(x.buffer),c.dataType="int32"}else w=new BigInt64Array(_);break;case"uint64":w=new BigUint64Array(_);break;case"int8":w=new Int8Array(_);break;case"int4":case"uint4":case"uint8":w=new Uint8Array(_);break;default:throw new Error(`Unsupported data type: ${c.dataType} in creating WebNN Constant from external data.`)}return $e("verbose",()=>`[WebNN] registerMLConstant {dataType: ${c.dataType}, shape: ${c.shape}}} ${m?"(Note: it was int64 data type and registered to int32 as workaround)":""}`),u.constant(c,w)}registerGraphInput(t){this.temporaryGraphInputs.push(t)}registerGraphOutput(t){this.temporaryGraphOutputs.push(t)}isGraphInput(t,o){let n=this.sessionGraphInputs.get(t);return n?n.includes(o):!1}isGraphOutput(t,o){let n=this.sessionGraphOutputs.get(t);return n?n.includes(o):!1}isGraphInputOutputTypeSupported(t,o,n=!0){let u=this.mlContextBySessionId.get(t),c=an.get(zt(o));return typeof c>"u"?!1:n?!!u?.opSupportLimits().input.dataTypes.includes(c):!!u?.opSupportLimits().output.dataTypes.includes(c)}flush(){}}});var un=X(()=>{"use strict"});var Uc,zi,Bi,Hv,Fv,Nc,ji,Di,Wc,Lc=X(()=>{"use strict";yt();un();Uc=new Map([[64,250],[128,200],[256,200],[512,200],[2048,230],[4096,200],[8192,50],[16384,50],[32768,50],[65536,50],[131072,50],[262144,50],[524288,50],[1048576,50],[2097152,30],[4194304,20],[8388608,10],[12582912,10],[16777216,10],[26214400,15],[33554432,22],[44236800,2],[58982400,6],[67108864,6],[134217728,6],[167772160,6]]),zi=[],Bi=e=>Math.ceil(Number(e)/16)*16,Hv=e=>{for(let t=0;tFv++,ji=async(e,t,o,n)=>{let u=Bi(o),c=e.device.createBuffer({size:u,usage:GPUBufferUsage.COPY_DST|GPUBufferUsage.MAP_READ});try{let p=e.getCommandEncoder();e.endComputePass(),p.copyBufferToBuffer(t,0,c,0,u),e.flush(),await c.mapAsync(GPUMapMode.READ);let m=c.getMappedRange();if(n){let g=n();return g.set(new Uint8Array(m,0,o)),g}else return new Uint8Array(m.slice(0,o))}finally{c.destroy()}},Di=class{constructor(t){this.backend=t;this.storageCache=new Map,this.freeBuffers=new Map,this.freeUniformBuffers=new Map,this.buffersPending=[],this.capturedPendingBuffers=new Map;for(let[o]of Uc)zi.push(o),this.freeBuffers.set(o,[]),this.freeUniformBuffers.set(o,[]);this.sessionCount=0}upload(t,o){let n=o.buffer,u=o.byteOffset,c=o.byteLength,p=Bi(c),m=this.storageCache.get(t);if(!m)throw new Error("gpu data for uploading does not exist");if(Number(m.originalSize)!==c)throw new Error(`inconsistent data size. gpu data size=${m.originalSize}, data size=${c}`);let g=this.backend.device.createBuffer({mappedAtCreation:!0,size:p,usage:GPUBufferUsage.MAP_WRITE|GPUBufferUsage.COPY_SRC}),b=g.getMappedRange();new Uint8Array(b).set(new Uint8Array(n,u,c)),g.unmap();let _=this.backend.device.createCommandEncoder();_.copyBufferToBuffer(g,0,m.gpuData.buffer,0,p),this.backend.device.queue.submit([_.finish()]),g.destroy(),$e("verbose",()=>`[WebGPU] GpuDataManager.upload(id=${t})`)}memcpy(t,o){let n=this.storageCache.get(t);if(!n)throw new Error("source gpu data for memcpy does not exist");let u=this.storageCache.get(o);if(!u)throw new Error("destination gpu data for memcpy does not exist");if(n.originalSize!==u.originalSize)throw new Error("inconsistent source and destination gpu data size");let c=Bi(n.originalSize),p=this.backend.getCommandEncoder();this.backend.endComputePass(),p.copyBufferToBuffer(n.gpuData.buffer,0,u.gpuData.buffer,0,c)}registerExternalBuffer(t,o,n){let u;if(n){if(u=n[0],t===n[1])return $e("verbose",()=>`[WebGPU] GpuDataManager.registerExternalBuffer(size=${o}) => id=${u}, buffer is the same, skip.`),u;if(this.backend.capturedCommandList.has(this.backend.currentSessionId))throw new Error(`Registering a different external buffer under graph capture mode is not supported yet. + Please use the previous external buffer!`)}else u=Nc();return this.storageCache.set(u,{gpuData:{id:u,type:0,buffer:t},originalSize:o}),$e("verbose",()=>`[WebGPU] GpuDataManager.registerExternalBuffer(size=${o}) => id=${u}, registered.`),u}unregisterExternalBuffer(t){t!==void 0&&(this.storageCache.delete(t),$e("verbose",()=>`[WebGPU] GpuDataManager.unregisterExternalBuffer() => id=${t}`))}create(t,o=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST){let n=Hv(t),u,c=(o&GPUBufferUsage.STORAGE)===GPUBufferUsage.STORAGE,p=(o&GPUBufferUsage.UNIFORM)===GPUBufferUsage.UNIFORM;if(c||p){let b=(c?this.freeBuffers:this.freeUniformBuffers).get(n);b?b.length>0?u=b.pop():u=this.backend.device.createBuffer({size:n,usage:o}):u=this.backend.device.createBuffer({size:n,usage:o})}else u=this.backend.device.createBuffer({size:n,usage:o});let m={id:Nc(),type:0,buffer:u};return this.storageCache.set(m.id,{gpuData:m,originalSize:Number(t)}),$e("verbose",()=>`[WebGPU] GpuDataManager.create(size=${t}) => id=${m.id}`),m}get(t){return this.storageCache.get(t)?.gpuData}release(t){let o=typeof t=="bigint"?Number(t):t,n=this.storageCache.get(o);if(!n){if(this.storageCache.size===0)return 0;throw new Error("releasing data does not exist")}return $e("verbose",()=>`[WebGPU] GpuDataManager.release(id=${o}), gpuDataId=${n.gpuData.id}`),this.storageCache.delete(o),this.buffersPending.push(n.gpuData.buffer),n.originalSize}async download(t,o){let n=this.storageCache.get(Number(t));if(!n)throw new Error("data does not exist");await ji(this.backend,n.gpuData.buffer,n.originalSize,o)}refreshPendingBuffers(){if(this.buffersPending.length!==0)if(this.backend.sessionStatus==="default"){for(let t of this.buffersPending){let o=Uc.get(t.size);if((t.usage&GPUBufferUsage.STORAGE)===GPUBufferUsage.STORAGE){let n=this.freeBuffers.get(t.size)||[];o===void 0||n.length>=o?t.destroy():n.push(t)}else if((t.usage&GPUBufferUsage.UNIFORM)===GPUBufferUsage.UNIFORM){let n=this.freeUniformBuffers.get(t.size)||[];o===void 0||n.length>=o?t.destroy():n.push(t)}else t.destroy()}this.buffersPending=[]}else{let t=this.capturedPendingBuffers.get(this.backend.currentSessionId);t||(t=[],this.capturedPendingBuffers.set(this.backend.currentSessionId,t));for(let o of this.buffersPending)t.push(o);this.buffersPending=[]}}dispose(){this.freeBuffers.forEach(t=>{t.forEach(o=>{o.destroy()})}),this.freeUniformBuffers.forEach(t=>{t.forEach(o=>{o.destroy()})}),this.storageCache.forEach(t=>{t.gpuData.buffer.destroy()}),this.capturedPendingBuffers.forEach(t=>{t.forEach(o=>{o.destroy()})}),this.storageCache=new Map,this.freeBuffers=new Map,this.freeUniformBuffers=new Map,this.capturedPendingBuffers=new Map}onCreateSession(){this.sessionCount+=1}onReleaseSession(t){let o=this.capturedPendingBuffers.get(t);o&&(o.forEach(n=>{n.destroy()}),this.capturedPendingBuffers.delete(t)),this.sessionCount-=1,this.sessionCount===0&&($e("warning",()=>"[WebGPU] Clearing webgpu buffer cache"),this.storageCache.forEach(n=>{n.gpuData.buffer.destroy()}),this.storageCache=new Map)}},Wc=(...e)=>new Di(...e)});var Mi,pe,We=X(()=>{"use strict";Mi=class{constructor(t){Object.assign(this,t)}get cacheKey(){return this.key||(this.key=Object.getOwnPropertyNames(this).sort().map(t=>`${this[t]}`).join(";")),this.key}},pe=e=>new Mi(e)});var Ft,Ui,Be,He,te,Ae,Ni,qt,ut,ae,ln,F,Y,Gc,dn,Ri,Hc,we=X(()=>{"use strict";ce();be();Ft=64,Ui=(e,t)=>{if(t===3)throw new Error("vec3 has same alignment as vec4, use vec4 instead");switch(Number(e)){case 10:return t>1?`vec${t}`:"f16";case 1:return t>1?`vec${t}`:"f32";case 6:return t>1?`vec${t}`:"i32";case 12:return t>1?`vec${t}`:"u32";case 7:if(t>1)throw new Error("currently not supported vecX of uint64 yet");return["vec2","i32"];case 13:if(t>1)throw new Error("currently not supported vecX of uint64 yet");return["vec2","u32"];case 9:if(t!==4)throw new Error("bool must be vec4");return["u32","vec4"];case 22:return"i32";case 21:return"u32";default:throw new Error(`Unknown data type: ${e}`)}},Be=(e,t=1)=>{let o=Ui(e,t);return typeof o=="string"?o:o[0]},He=(e,t=1)=>{let o=Ui(e,t);return typeof o=="string"?o:o[1]},te=(...e)=>{let t=[];return e.forEach(o=>{o.length!==0&&t.push({type:12,data:o},{type:12,data:L.computeStrides(o)})}),t},Ae=e=>e%4===0?4:e%2===0?2:1,Ni=(e="f32",t,o="0")=>!t||t===1?`${e}(${o})`:`vec${t}<${e}>(${o})`,qt=(e,t,o)=>e==="f32"?o:t===1?`f32(${o})`:`vec${t}(${o})`,ut=(e,t)=>t===4?`(${e}.x + ${e}.y + ${e}.z + ${e}.w)`:t===2?`(${e}.x + ${e}.y)`:t===3?`(${e}.x + ${e}.y + ${e}.z)`:e,ae=(e,t,o,n)=>e.startsWith("uniforms.")&&o>4?typeof t=="string"?n==="f16"?`${e}[(${t}) / 8][(${t}) % 8 / 4][(${t}) % 8 % 4]`:`${e}[(${t}) / 4][(${t}) % 4]`:n==="f16"?`${e}[${Math.floor(t/8)}][${Math.floor(t%8/4)}][${t%8%4}]`:`${e}[${Math.floor(t/4)}][${t%4}]`:o>1?`${e}[${t}]`:e,ln=(e,t,o,n,u)=>{let c=typeof o=="number",p=c?o:o.length,m=[...new Array(p).keys()],g=p<2?"u32":p<=4?`vec${p}`:`array`,b=Ui(t,u),_=typeof b=="string"?b:b[1],w=typeof b=="string"?b:b[0],x={indices:g,value:_,storage:w,tensor:t},S=R=>typeof R=="string"?R:`${R}u`,C={offsetToIndices:!1,indicesToOffset:!1,broadcastedIndicesToOffset:!1,set:!1,setByIndices:!1,get:!1,getByIndices:!1},T=c?"uniforms.":"",z=`${T}${e}_shape`,k=`${T}${e}_strides`,A="";for(let R=0;R ${x.indices} { + var indices: ${x.indices}; + var current = offset; + ${A} + return indices; + }`,B=R=>(C.offsetToIndices=!0,p<2?R:`o2i_${e}(${R})`),W=[];if(p>=2)for(let R=p-1;R>=0;R--)W.push(`${ae(k,R,p)} * (indices[${R}])`);let N=p<2?"":` + fn i2o_${e}(indices: ${x.indices}) -> u32 { + return ${W.join("+")}; + }`,q=R=>(C.indicesToOffset=!0,p<2?R:`i2o_${e}(${R})`),K=(...R)=>p===0?"0u":`${x.indices}(${R.map(S).join(",")})`,Q=(R,V)=>p<2?`${R}`:`${ae(R,V,p)}`,ne=(R,V,Ce)=>p<2?`${R}=${Ce};`:`${ae(R,V,p)}=${Ce};`,se={},ue=(R,V)=>{C.broadcastedIndicesToOffset=!0;let Ce=`${V.name}broadcastedIndicesTo${e}Offset`;if(Ce in se)return`${Ce}(${R})`;let Ne=[];for(let Je=p-1;Je>=0;Je--){let Ve=V.indicesGet("outputIndices",Je+V.rank-p);Ne.push(`${Q(k,Je)} * (${Ve} % ${Q(z,Je)})`)}return se[Ce]=`fn ${Ce}(outputIndices: ${V.type.indices}) -> u32 { + return ${Ne.length>0?Ne.join("+"):"0u"}; + }`,`${Ce}(${R})`},ge=(R,V)=>(()=>{if(x.storage===x.value)return`${e}[${R}]=${V};`;if(x.storage==="vec2"&&x.value==="i32")return`${e}[${R}]=vec2(u32(${V}), select(0u, 0xFFFFFFFFu, ${V} < 0));`;if(x.storage==="vec2"&&x.value==="u32")return`${e}[${R}]=vec2(u32(${V}), 0u);`;if(x.storage==="u32"&&x.value==="vec4")return`${e}[${R}]=dot(vec4(0x1, 0x100, 0x10000, 0x1000000), vec4(${V}));`;throw new Error(`not supported combination of storage type ${x.storage} and value type ${x.value} yet`)})(),re=R=>(()=>{if(x.storage===x.value)return`${e}[${R}]`;if(x.storage==="vec2"&&x.value==="i32")return`i32(${e}[${R}].x)`;if(x.storage==="vec2"&&x.value==="u32")return`u32(${e}[${R}].x)`;if(x.storage==="u32"&&x.value==="vec4")return`vec4(bool(${e}[${R}] & 0xFFu), bool(${e}[${R}] & 0xFF00u), bool(${e}[${R}] & 0xFF0000u), bool(${e}[${R}] & 0xFF000000u))`;throw new Error(`not supported combination of storage type ${x.storage} and value type ${x.value} yet`)})(),Se=p<2?"":` + fn get_${e}ByIndices(indices: ${x.indices}) -> ${_} { + return ${re(`i2o_${e}(indices)`)}; + }`,fe=p<2?"":(()=>{let R=m.map(Ce=>`d${Ce}: u32`).join(", "),V=m.map(Ce=>`d${Ce}`).join(", ");return` + fn get_${e}(${R}) -> ${_} { + return get_${e}ByIndices(${K(V)}); + }`})(),ie=(...R)=>{if(R.length!==p)throw new Error(`indices length must be ${p}`);let V=R.map(S).join(",");return p===0?re("0u"):p===1?re(V[0]):(C.get=!0,C.getByIndices=!0,C.indicesToOffset=!0,`get_${e}(${V})`)},ve=R=>p<2?re(R):(C.getByIndices=!0,C.indicesToOffset=!0,`get_${e}ByIndices(${R})`),le=p<2?"":` + fn set_${e}ByIndices(indices: ${x.indices}, value: ${_}) { + ${ge(`i2o_${e}(indices)`,"value")} + }`,me=p<2?"":(()=>{let R=m.map(Ce=>`d${Ce}: u32`).join(", "),V=m.map(Ce=>`d${Ce}`).join(", ");return` + fn set_${e}(${R}, value: ${_}) { + set_${e}ByIndices(${K(V)}, value); + }`})();return{impl:()=>{let R=[],V=!1;return C.offsetToIndices&&(R.push(O),V=!0),C.indicesToOffset&&(R.push(N),V=!0),C.broadcastedIndicesToOffset&&(Object.values(se).forEach(Ce=>R.push(Ce)),V=!0),C.set&&(R.push(me),V=!0),C.setByIndices&&(R.push(le),V=!0),C.get&&(R.push(fe),V=!0),C.getByIndices&&(R.push(Se),V=!0),!c&&V&&R.unshift(`const ${z} = ${x.indices}(${o.join(",")});`,`const ${k} = ${x.indices}(${L.computeStrides(o).join(",")});`),R.join(` +`)},type:x,offsetToIndices:B,indicesToOffset:q,broadcastedIndicesToOffset:ue,indices:K,indicesGet:Q,indicesSet:ne,set:(...R)=>{if(R.length!==p+1)throw new Error(`indices length must be ${p}`);let V=R[p];if(typeof V!="string")throw new Error("value must be string");let Ce=R.slice(0,p).map(S).join(",");return p===0?ge("0u",V):p===1?ge(Ce[0],V):(C.set=!0,C.setByIndices=!0,C.indicesToOffset=!0,`set_${e}(${Ce}, ${V})`)},setByOffset:ge,setByIndices:(R,V)=>p<2?ge(R,V):(C.setByIndices=!0,C.indicesToOffset=!0,`set_${e}ByIndices(${R}, ${V});`),get:ie,getByOffset:re,getByIndices:ve,usage:n,name:e,strides:k,shape:z,rank:p}},F=(e,t,o,n=1)=>ln(e,t,o,"input",n),Y=(e,t,o,n=1)=>ln(e,t,o,"output",n),Gc=(e,t,o)=>ln(e,t,o,"atomicOutput",1),dn=(e,t,o,n=1)=>ln(e,t,o,"internal",n),Ri=class{constructor(t,o){this.normalizedDispatchGroup=t;this.limits=o;this.internalVariables=[];this.variables=[];this.uniforms=[];this.variableIndex=0}guardAgainstOutOfBoundsWorkgroupSizes(t){return`if (global_idx >= ${typeof t=="number"?`${t}u`:t}) { return; }`}mainStart(t=Ft){let o=typeof t=="number"?t:t[0],n=typeof t=="number"?1:t[1],u=typeof t=="number"?1:t[2];if(o>this.limits.maxComputeWorkgroupSizeX||n>this.limits.maxComputeWorkgroupSizeY||u>this.limits.maxComputeWorkgroupSizeZ)throw new Error(`workgroup size [${o}, ${n}, ${u}] exceeds the maximum workgroup size [${this.limits.maxComputeWorkgroupSizeX}, ${this.limits.maxComputeWorkgroupSizeY}, ${this.limits.maxComputeWorkgroupSizeZ}].`);if(o*n*u>this.limits.maxComputeInvocationsPerWorkgroup)throw new Error(`workgroup size [${o}, ${n}, ${u}] exceeds the maximum workgroup invocations ${this.limits.maxComputeInvocationsPerWorkgroup}.`);let c=this.normalizedDispatchGroup[1]===1&&this.normalizedDispatchGroup[2]===1,p=c?`@builtin(global_invocation_id) global_id : vec3, + @builtin(workgroup_id) workgroup_id : vec3, + @builtin(local_invocation_index) local_idx : u32, + @builtin(local_invocation_id) local_id : vec3`:`@builtin(global_invocation_id) global_id : vec3, + @builtin(local_invocation_id) local_id : vec3, + @builtin(local_invocation_index) local_idx : u32, + @builtin(workgroup_id) workgroup_id : vec3, + @builtin(num_workgroups) num_workgroups : vec3`,m=c?`let global_idx = global_id.x; + let workgroup_index = workgroup_id.x;`:`let workgroup_index = workgroup_id.z * num_workgroups[0] * num_workgroups[1] + + workgroup_id.y * num_workgroups[0] + workgroup_id.x; + let global_idx = workgroup_index * ${o*n*u}u + local_idx;`;return`@compute @workgroup_size(${o}, ${n}, ${u}) + fn main(${p}) { + ${m} + `}appendVariableUniforms(t){t.rank!==0&&(t.shape.startsWith("uniforms.")&&this.uniforms.push({name:t.shape.replace("uniforms.",""),type:"u32",length:t.rank}),t.strides.startsWith("uniforms.")&&this.uniforms.push({name:t.strides.replace("uniforms.",""),type:"u32",length:t.rank}))}declareVariable(t,o){if(t.usage==="internal")throw new Error("cannot use internal variable with declareVariable(). use registerInternalVariables() instead.");this.variables.push(t),this.appendVariableUniforms(t);let n=t.usage==="input"?"read":"read_write",u=t.usage==="atomicOutput"?"atomic":t.type.storage;return`@group(0) @binding(${o}) var ${t.name}: array<${u}>;`}declareVariables(...t){return t.map(o=>this.declareVariable(o,this.variableIndex++)).join(` +`)}registerInternalVariable(t){if(t.usage!=="internal")throw new Error("cannot use input or output variable with registerInternalVariable(). use declareVariables() instead.");this.internalVariables.push(t),this.appendVariableUniforms(t)}registerInternalVariables(...t){return t.forEach(o=>this.registerInternalVariable(o)),this}registerUniform(t,o,n=1){return this.uniforms.push({name:t,type:o,length:n}),this}registerUniforms(t){return this.uniforms=this.uniforms.concat(t),this}uniformDeclaration(){if(this.uniforms.length===0)return"";let t=[];for(let{name:o,type:n,length:u}of this.uniforms)if(u&&u>4)n==="f16"?t.push(`@align(16) ${o}:array, ${Math.ceil(u/8)}>`):t.push(`${o}:array, ${Math.ceil(u/4)}>`);else{let c=u==null||u===1?n:`vec${u}<${n}>`;t.push(`${o}:${c}`)}return` + struct Uniforms { ${t.join(", ")} }; + @group(0) @binding(${this.variableIndex}) var uniforms: Uniforms;`}get additionalImplementations(){return this.uniformDeclaration()+this.variables.map(t=>t.impl()).join(` +`)+this.internalVariables.map(t=>t.impl()).join(` +`)}get variablesInfo(){if(this.uniforms.length===0)return;let t=o=>[12,10,1,6][["u32","f16","f32","i32"].indexOf(o)];return this.uniforms.map(o=>[t(o.type),o.length??1])}},Hc=(e,t)=>new Ri(e,t)});var qv,Fc,Kv,Jv,Zv,Qv,Fe,qc,Kc,Tt=X(()=>{"use strict";ce();be();We();we();qv=(e,t)=>{if(!e||e.length!==1)throw new Error("Transpose requires 1 input.");if(t.length!==0&&t.length!==e[0].dims.length)throw new Error(`perm size ${t.length} does not match input rank ${e[0].dims.length}`)},Fc=(e,t)=>t.length!==0?t:[...new Array(e).keys()].reverse(),Kv=(e,t)=>L.sortBasedOnPerm(e,Fc(e.length,t)),Jv=(e,t,o,n)=>{let u=`fn perm(i: ${n.type.indices}) -> ${o.type.indices} { + var a: ${o.type.indices};`;for(let c=0;c{let o=[],n=[];for(let u=0;u{let o=0;for(let n=0;n{let o=e.dataType,n=e.dims.length,u=Fc(n,t),c=Kv(e.dims,u),p=e.dims,m=c,g=n<2||Qv(u,e.dims),b;if(g)return b=T=>{let z=F("input",o,p,4),k=Y("output",o,m,4);return` + ${T.registerUniform("output_size","u32").declareVariables(z,k)} + ${T.mainStart()} + ${T.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + output[global_idx] = input[global_idx]; + }`},{name:"TransposeCopy",shaderCache:{inputDependencies:["type"]},getRunData:()=>{let T=L.size(c);return{outputs:[{dims:c,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(T/64/4)},programUniforms:[{type:12,data:Math.ceil(T/4)}]}},getShaderSource:b};let{newShape:_,newPerm:w}=Zv(e.dims,u),x=L.areEqual(w,[2,3,1]),S=L.areEqual(w,[3,1,2]);if(_.length===2||x||S){p=x?[_[0],_[1]*_[2]]:S?[_[0]*_[1],_[2]]:_,m=[p[1],p[0]];let T=16;return b=z=>{let k=F("a",o,p.length),A=Y("output",o,m.length);return` + ${z.registerUniform("output_size","u32").declareVariables(k,A)} + var tile : array, ${T}>; + ${z.mainStart([T,T,1])} + let stride = (uniforms.output_shape[1] - 1) / ${T} + 1; + let workgroup_id_x = workgroup_index % stride; + let workgroup_id_y = workgroup_index / stride; + let input_col = workgroup_id_y * ${T}u + local_id.x; + let input_row = workgroup_id_x * ${T}u + local_id.y; + if (input_row < uniforms.a_shape[0] && input_col < uniforms.a_shape[1]) { + tile[local_id.y][local_id.x] = ${k.getByIndices(`${k.type.indices}(input_row, input_col)`)}; + } + workgroupBarrier(); + + let output_col = workgroup_id_x * ${T}u + local_id.x; + let output_row = workgroup_id_y * ${T}u + local_id.y; + if (output_row < uniforms.output_shape[0] && output_col < uniforms.output_shape[1]) { + ${A.setByIndices(`${A.type.indices}(output_row, output_col)`,"tile[local_id.x][local_id.y]")} + } + }`},{name:"TransposeShared",shaderCache:{inputDependencies:["type"]},getRunData:()=>{let z=L.size(c);return{outputs:[{dims:c,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(m[1]/T),y:Math.ceil(m[0]/T)},programUniforms:[{type:12,data:z},...te(p,m)]}},getShaderSource:b}}return b=T=>{let z=F("a",o,p.length),k=Y("output",o,m.length);return` + ${T.registerUniform("output_size","u32").declareVariables(z,k)} + + ${Jv(u,n,z,k)} + + ${T.mainStart()} + ${T.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + + let indices = ${k.offsetToIndices("global_idx")}; + let aIndices = perm(indices); + + ${k.setByOffset("global_idx",z.getByIndices("aIndices"))} + }`},{name:"Transpose",shaderCache:{hint:`${t}`,inputDependencies:["rank"]},getRunData:()=>{let T=L.size(c);return{outputs:[{dims:c,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(T/64)},programUniforms:[{type:12,data:T},...te(p,m)]}},getShaderSource:b}},qc=(e,t)=>{qv(e.inputs,t.perm),e.compute(Fe(e.inputs[0],t.perm))},Kc=e=>pe({perm:e.perm})});var Yv,Xv,e2,t2,r2,n2,i2,o2,a2,s2,_t,Jc,Zc,Qc,Yc,Xc,ep,tp,rp,np,ip,op=X(()=>{"use strict";ce();be();we();cn();Tt();Yv={max:"select(bestValue, candidate, candidate > bestValue)",min:"select(bestValue, candidate, candidate < bestValue)",mean:"bestValue + candidate",sum:"bestValue + candidate",prod:"bestValue * candidate",sumSquare:"bestValue + candidate * candidate",logSumExp:"bestValue + exp(candidate)",l1:"bestValue + abs(candidate)",l2:"bestValue + candidate * candidate",logSum:"bestValue + candidate"},Xv={max:"select(bestValue, candidate, candidate > bestValue)",min:"select(bestValue, candidate, candidate < bestValue)",mean:"bestValue + candidate",sum:"bestValue + candidate",prod:"bestValue * candidate",sumSquare:"bestValue + candidate",logSumExp:"bestValue + candidate",l1:"bestValue + candidate",l2:"bestValue + candidate",logSum:"bestValue + candidate"},e2={max:"_A[offset]",min:"_A[offset]",mean:"0",sum:"0",prod:"1",sumSquare:"0",logSumExp:"0",l1:"0",l2:"0",logSum:"0"},t2={max:"bestValue",min:"bestValue",sum:"bestValue",prod:"bestValue",sumSquare:"bestValue",logSumExp:"log(bestValue)",l1:"bestValue",l2:"sqrt(bestValue)",logSum:"log(bestValue)"},r2=(e,t)=>{let o=[];for(let n=t-e;n{let o=[],n=e.length;for(let c=0;ce[c]);return[o,u]},i2=(e,t)=>{let o=e.length+t.length,n=[],u=0;for(let c=0;c{for(let o=0;o{let o=[];if(!o2(e,t)){for(let n=0;no.push(n))}return o},s2=(e,t,o,n,u,c,p)=>{let m=o[0].dims,g=L.size(c),b=L.size(p),_=F("_A",o[0].dataType,m),w=Y("output",u,c),x=64;g===1&&(x=256);let S=` + var aBestValues : array; + `,C=T=>` + ${T.registerUniform("reduceSize","u32").declareVariables(_,w)} + ${S} + fn DIV_CEIL(a : u32, b : u32) -> u32 { + return ((a - 1u) / b + 1u); + } + ${T.mainStart(x)} + + let outputIndex = global_idx / ${x}; + let offset = outputIndex * uniforms.reduceSize; + + var bestValue = f32(${e2[n]}); + let Length = uniforms.reduceSize; + for (var k = local_idx; k < Length; k = k + ${x}) { + let candidate = f32(${_.getByOffset("offset + k")}); + bestValue = ${Yv[n]}; + } + aBestValues[local_idx] = bestValue; + workgroupBarrier(); + + var reduceSize = min(Length, ${x}u); + for (var currentSize = reduceSize / 2u; reduceSize > 1u; + currentSize = reduceSize / 2u) { + let interval = DIV_CEIL(reduceSize, 2u); + if (local_idx < currentSize) { + let candidate = aBestValues[local_idx + interval]; + bestValue = ${Xv[n]}; + aBestValues[local_idx] = bestValue; + } + reduceSize = interval; + workgroupBarrier(); + } + + if (local_idx == 0u) { + ${w.setByOffset("outputIndex",`${n==="mean"?`${w.type.storage}(bestValue / f32(uniforms.reduceSize))`:`${w.type.storage}(${t2[n]})`}`)}; + } + }`;return{name:e,shaderCache:{hint:`${t};${x}`,inputDependencies:["type"]},getShaderSource:C,getRunData:()=>({outputs:[{dims:c,dataType:u}],dispatchGroup:{x:g},programUniforms:[{type:12,data:b}]})}},_t=(e,t,o,n)=>{let u=e.inputs.length===1?o:Vi(e.inputs,o),c=u.axes;c.length===0&&!u.noopWithEmptyAxes&&(c=e.inputs[0].dims.map((S,C)=>C));let p=L.normalizeAxes(c,e.inputs[0].dims.length),m=p,g=e.inputs[0],b=a2(m,e.inputs[0].dims.length);b.length>0&&(g=e.compute(Fe(e.inputs[0],b),{inputs:[0],outputs:[-1]})[0],m=r2(m.length,g.dims.length));let[_,w]=n2(g.dims,m),x=_;u.keepDims&&(x=i2(_,p)),e.compute(s2(t,u.cacheKey,[g],n,e.inputs[0].dataType,x,w),{inputs:[g]})},Jc=(e,t)=>{_t(e,"ReduceMeanShared",t,"mean")},Zc=(e,t)=>{_t(e,"ReduceL1Shared",t,"l1")},Qc=(e,t)=>{_t(e,"ReduceL2Shared",t,"l2")},Yc=(e,t)=>{_t(e,"ReduceLogSumExpShared",t,"logSumExp")},Xc=(e,t)=>{_t(e,"ReduceMaxShared",t,"max")},ep=(e,t)=>{_t(e,"ReduceMinShared",t,"min")},tp=(e,t)=>{_t(e,"ReduceProdShared",t,"prod")},rp=(e,t)=>{_t(e,"ReduceSumShared",t,"sum")},np=(e,t)=>{_t(e,"ReduceSumSquareShared",t,"sumSquare")},ip=(e,t)=>{_t(e,"ReduceLogSumShared",t,"logSum")}});var wt,u2,pn,Vi,vt,l2,d2,c2,p2,f2,m2,h2,g2,y2,b2,xt,ap,sp,up,lp,dp,cp,pp,fp,mp,hp,cn=X(()=>{"use strict";ce();be();We();we();op();wt=e=>{if(!e||e.length===0||e.length>2)throw new Error("Reduce op requires 1 or 2 inputs.");if(e.length===2&&e[1].dims.length!==1)throw new Error("Invalid axes input dims.")},u2=e=>["","",`var value = ${e.getByIndices("input_indices")};`,""],pn=(e,t,o,n,u,c,p=!1,m=!1)=>{let g=[],b=o[0].dims,_=b.length,w=L.normalizeAxes(u,_),x=!m&&w.length===0;b.forEach((z,k)=>{x||w.indexOf(k)>=0?p&&g.push(1):g.push(z)});let S=g.length,C=L.size(g);return{name:e,shaderCache:t,getShaderSource:z=>{let k=[],A=F("_A",o[0].dataType,_),O=Y("output",c,S),B=n(A,O,w),W=B[2];for(let N=0,q=0;N<_;N++)x||w.indexOf(N)>=0?(p&&q++,W=`for(var j${N}: u32 = 0; j${N} < ${b[N]}; j${N}++) { + ${B[2].includes("last_index")?`let last_index = j${N};`:""} + ${A.indicesSet("input_indices",N,`j${N}`)} + ${W} + }`):(k.push(`${A.indicesSet("input_indices",N,O.indicesGet("output_indices",q))};`),q++);return` + + ${z.registerUniform("output_size","u32").declareVariables(A,O)} + + ${z.mainStart()} + ${z.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + var input_indices: ${A.type.indices}; + let output_indices = ${O.offsetToIndices("global_idx")}; + + ${k.join(` +`)} + ${B[0]} // init ops for reduce max/min + ${B[1]} + ${W} + ${B[3]} + ${B.length===4?O.setByOffset("global_idx","value"):B.slice(4).join(` +`)} + }`},getRunData:()=>({outputs:[{dims:g,dataType:c}],dispatchGroup:{x:Math.ceil(C/64)},programUniforms:[{type:12,data:C},...te(b,g)]})}},Vi=(e,t)=>{let o=[];return e[1].dims[0]>0&&e[1].getBigInt64Array().forEach(n=>o.push(Number(n))),pe({axes:o,keepDims:t.keepDims,noopWithEmptyAxes:t.noopWithEmptyAxes})},vt=(e,t,o,n)=>{let u=e.inputs,c=u.length===1?o:Vi(u,o);e.compute(pn(t,{hint:c.cacheKey,inputDependencies:["rank"]},[u[0]],c.noopWithEmptyAxes&&c.axes.length===0?u2:n,c.axes,u[0].dataType,c.keepDims,c.noopWithEmptyAxes),{inputs:[0]})},l2=(e,t)=>{wt(e.inputs),vt(e,"ReduceLogSum",t,(n,u)=>[`var value = ${u.type.storage}(0);`,"",`value += ${n.getByIndices("input_indices")};`,"value = log(value);"])},d2=(e,t)=>{wt(e.inputs),vt(e,"ReduceL1",t,(n,u)=>[`var value = ${u.type.storage}(0);`,"",`value += abs(${n.getByIndices("input_indices")});`,""])},c2=(e,t)=>{wt(e.inputs),vt(e,"ReduceL2",t,(n,u)=>[`var t = ${u.type.value}(0); var value = ${u.type.value}(0);`,"",`t = ${n.getByIndices("input_indices")}; value += (t * t);`,"value = sqrt(value);"])},p2=(e,t)=>{wt(e.inputs),vt(e,"ReduceLogSumExp",t,(n,u)=>[`var value = ${u.type.storage}(0);`,"",`value += exp(${n.getByIndices("input_indices")});`,"value = log(value);"])},f2=(e,t)=>{wt(e.inputs),vt(e,"ReduceMax",t,(n,u,c)=>{let p=[];for(let m=0;m=0||c.length===0)&&p.push(n.indicesSet("input_indices",m,0));return[`${p.join(` +`)}`,`var value = ${n.getByIndices("input_indices")};`,`value = max(value, ${n.getByIndices("input_indices")});`,""]})},m2=(e,t)=>{wt(e.inputs),vt(e,"ReduceMean",t,(n,u,c)=>{let p=1;for(let m=0;m=0||c.length===0)&&(p*=e.inputs[0].dims[m]);return["var sum = f32(0);","",`sum += f32(${n.getByIndices("input_indices")});`,`let value = ${u.type.value}(sum / ${p});`]})},h2=(e,t)=>{wt(e.inputs),vt(e,"ReduceMin",t,(n,u,c)=>{let p=[];for(let m=0;m=0||c.length===0)&&p.push(`input_indices[${m}] = 0;`);return[`${p.join(` +`)}`,`var value = ${n.getByIndices("input_indices")};`,`value = min(value, ${n.getByIndices("input_indices")});`,""]})},g2=(e,t)=>{wt(e.inputs),vt(e,"ReduceProd",t,(n,u)=>[`var value = ${u.type.storage}(1);`,"",`value *= ${n.getByIndices("input_indices")};`,""])},y2=(e,t)=>{wt(e.inputs),vt(e,"ReduceSum",t,(n,u)=>[`var value = ${u.type.storage}(0);`,"",`value += ${n.getByIndices("input_indices")};`,""])},b2=(e,t)=>{wt(e.inputs),vt(e,"ReduceSumSquare",t,(n,u)=>[`var t = ${u.type.value}(0); var value = ${u.type.value}(0);`,"",`t = ${n.getByIndices("input_indices")}; value += t * t;`,""])},xt=(e,t,o)=>{if(t.length===0)return o;let n=1,u=1;for(let c=0;c1024},ap=(e,t)=>{xt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?m2(e,t):Jc(e,t)},sp=(e,t)=>{xt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?d2(e,t):Zc(e,t)},up=(e,t)=>{xt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?c2(e,t):Qc(e,t)},lp=(e,t)=>{xt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?p2(e,t):Yc(e,t)},dp=(e,t)=>{xt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?f2(e,t):Xc(e,t)},cp=(e,t)=>{xt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?h2(e,t):ep(e,t)},pp=(e,t)=>{xt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?g2(e,t):tp(e,t)},fp=(e,t)=>{xt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?y2(e,t):rp(e,t)},mp=(e,t)=>{xt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?b2(e,t):np(e,t)},hp=(e,t)=>{xt(e.inputs[0].dims,t.axes,t.noopWithEmptyAxes)?l2(e,t):ip(e,t)}});var gp,yp,bp,Wi,_p=X(()=>{"use strict";ce();We();cn();gp=e=>{if(!e||e.length===0||e.length>2)throw new Error("ArgMinMaxOp op requires 1 or 2 inputs.");if(e[0].dataType!==1)throw new Error("Invalid input type.")},yp=(e,t)=>{gp(e.inputs);let o=(n,u,c)=>{let p=[];for(let m=0;m=0||c.length===0)&&p.push(`input_indices[${m}] = 0;`);return[`${p.join(` +`)}`,`var value = ${n.getByIndices("input_indices")}; +var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLastIndex>0?"<=":"<"} value) { + value = ${n.getByIndices("input_indices")}; + best_index = i32(last_index); + }`,"",u.setByOffset("global_idx","best_index")]};e.compute(pn("ArgMin",{hint:t.cacheKey,inputDependencies:["rank"]},[e.inputs[0]],o,[t.axis],7,t.keepDims),{inputs:[0]})},bp=(e,t)=>{gp(e.inputs);let o=(n,u,c)=>{let p=[];for(let m=0;m=0||c.length===0)&&p.push(`input_indices[${m}] = 0;`);return[`${p.join(` +`)}`,`var value = ${n.getByIndices("input_indices")}; +var best_index : i32 = 0;`,`if (${n.getByIndices("input_indices")} ${t.selectLastIndex>0?">=":">"} value) { + value = ${n.getByIndices("input_indices")}; + best_index = i32(last_index); + }`,"",u.setByOffset("global_idx","best_index")]};e.compute(pn("argMax",{hint:t.cacheKey,inputDependencies:["rank"]},[e.inputs[0]],o,[t.axis],7,t.keepDims),{inputs:[0]})},Wi=e=>pe(e)});var _2,Li,w2,v2,x2,tr,$2,wp,fn=X(()=>{"use strict";ce();be();un();we();_2=(e,t)=>{let o=e[0],n=e[1],u=e[2],c=e[3],p=e[4],m=e[5];if(p&&m)throw new Error("Attention cannot have both past and attention_bias");if(o.dims.length!==3)throw new Error('Input "input" must have 3 dimensions');let g=o.dims[0],b=o.dims[1],_=o.dims[2];if(u.dims.length!==1)throw new Error('Input "bias" is expected to have 1 dimensions');if(n.dims.length!==2)throw new Error('Input "weights" is expected to have 2 dimensions');if(n.dims[0]!==_)throw new Error("Input 1 dimension 0 should have same length as dimension 2 of input 0");if(u.dims[0]!==n.dims[1])throw new Error('Input "bias" dimension 0 should have same length as dimension 1 of input "weights"');let w=u.dims[0]/3,x=w,S=x;if(t.qkvHiddenSizes.length>0){if(t.qkvHiddenSizes.length!==3)throw new Error("qkv_hidden_sizes attribute should have 3 elements");for(let O of t.qkvHiddenSizes)if(O%t.numHeads!==0)throw new Error("qkv_hidden_sizes should be divisible by num_heads");w=t.qkvHiddenSizes[0],x=t.qkvHiddenSizes[1],S=t.qkvHiddenSizes[2]}let C=b;if(w!==x)throw new Error("qkv_hidden_sizes first element should be same as the second");if(u.dims[0]!==w+x+S)throw new Error('Input "bias" dimension 0 should have same length as sum of Q/K/V hidden sizes');let T=0;if(p){if(x!==S)throw new Error('Input "past" expect k_hidden_size == v_hidden_size');if(p.dims.length!==5)throw new Error('Input "past" must have 5 dimensions');if(p.dims[0]!==2)throw new Error('Input "past" first dimension must be 2');if(p.dims[1]!==g)throw new Error('Input "past" second dimension must be batch_size');if(p.dims[2]!==t.numHeads)throw new Error('Input "past" third dimension must be num_heads');if(p.dims[4]!==x/t.numHeads)throw new Error('Input "past" fifth dimension must be k_hidden_size / num_heads');t.pastPresentShareBuffer||(T=p.dims[3])}let z=C+T,k=-1,A=0;if(c)throw new Error("Mask not supported");if(p)throw new Error("past is not supported");if(m){if(m.dims.length!==4)throw new Error('Input "attention_bias" must have 4 dimensions');if(m.dims[0]!==g||m.dims[1]!==t.numHeads||m.dims[2]!==b||m.dims[3]!==z)throw new Error('Expect "attention_bias" shape (batch_size, num_heads, sequence_length, total_sequence_length)')}return{batchSize:g,sequenceLength:b,pastSequenceLength:T,kvSequenceLength:C,totalSequenceLength:z,maxSequenceLength:k,inputHiddenSize:_,hiddenSize:w,vHiddenSize:S,headSize:Math.floor(w/t.numHeads),vHeadSize:Math.floor(S/t.numHeads),numHeads:t.numHeads,isUnidirectional:!1,pastPresentShareBuffer:!1,maskFilterValue:t.maskFilterValue,maskType:A,scale:t.scale,broadcastResPosBias:!1,passPastInKv:!1,qkvFormat:1}},Li=(e,t,o)=>t&&e?` + let total_sequence_length_input = u32(${t.getByOffset("0")}); + let present_sequence_length = max(total_sequence_length_input, uniforms.past_sequence_length); + let is_subsequent_prompt: bool = sequence_length > 1 && sequence_length != total_sequence_length_input; + let is_first_prompt: bool = is_subsequent_prompt == false && sequence_length == total_sequence_length_input; + total_sequence_length = u32(${e?.getByOffset("batchIdx")}) + 1; + var past_sequence_length: u32 = 0; + if (is_first_prompt == false) { + past_sequence_length = total_sequence_length - sequence_length; + } + `:` + ${o?"let past_sequence_length = uniforms.past_sequence_length":""}; + let present_sequence_length = total_sequence_length; + `,w2=(e,t,o,n,u,c,p,m)=>{let g=Ae(p?1:c),b=64,_=c/g;_{let A=Y("x",e.dataType,e.dims,g),O=[A],B=p?F("seq_lens",p.dataType,p.dims):void 0;B&&O.push(B);let W=m?F("total_sequence_length_input",m.dataType,m.dims):void 0;W&&O.push(W);let N=He(e.dataType),q=[{name:"batch_size",type:"u32"},{name:"num_heads",type:"u32"},{name:"past_sequence_length",type:"u32"},{name:"sequence_length",type:"u32"},{name:"total_sequence_length",type:"u32"},{name:"elements_per_thread",type:"u32"}];return` + var thread_max: array; + var thread_sum: array; + ${k.registerUniforms(q).declareVariables(...O)} + ${k.mainStart([b,1,1])} + let batchIdx = workgroup_id.z / uniforms.num_heads; + let headIdx = workgroup_id.z % uniforms.num_heads; + let sequence_length = uniforms.sequence_length; + var total_sequence_length = uniforms.total_sequence_length; + ${Li(B,W,!1)} + let local_offset = local_idx * uniforms.elements_per_thread; + let offset = (global_idx / ${b}) * uniforms.total_sequence_length + local_offset; + let seq_causal_length = ${p?"u32(past_sequence_length + workgroup_id.y + 1)":"total_sequence_length"}; + var thread_max_vector = ${C}(-3.402823e+38f); + for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < seq_causal_length; i++) { + thread_max_vector = max(${C}(x[offset + i]), thread_max_vector); + } + thread_max[local_idx] = ${(()=>{switch(g){case 1:return"thread_max_vector";case 2:return"max(thread_max_vector.x, thread_max_vector.y)";case 4:return"max(max(thread_max_vector.x, thread_max_vector.y), max(thread_max_vector.z, thread_max_vector.w))";default:throw new Error(`Unsupported components: ${g}`)}})()}; + workgroupBarrier(); + + var max_value = f32(-3.402823e+38f); + for (var i = 0u; i < ${b}; i++) { + max_value = max(thread_max[i], max_value); + } + + var sum_vector = ${C}(0); + for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < seq_causal_length; i++) { + sum_vector += exp(${C}(x[offset + i]) - max_value); + } + thread_sum[local_idx] = ${(()=>{switch(g){case 1:return"sum_vector";case 2:return"sum_vector.x + sum_vector.y";case 4:return"sum_vector.x + sum_vector.y + sum_vector.z + sum_vector.w";default:throw new Error(`Unsupported components: ${g}`)}})()}; + workgroupBarrier(); + + var sum: f32 = 0; + for (var i = 0u; i < ${b}; i++) { + sum += thread_sum[i]; + } + + if (sum == 0) { + for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < seq_causal_length; i++) { + x[offset + i] = ${A.type.value}(${N}(1.0) / ${N}(seq_causal_length)); + } + } else { + for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < seq_causal_length; i++) { + var f32input = ${C}(x[offset + i]); + x[offset + i] = ${A.type.value}(exp(f32input - max_value) / sum); + } + } + ${p?` + for (var total_seq_id: u32 = seq_causal_length; total_seq_id + local_offset < uniforms.total_sequence_length; total_seq_id++) { + x[offset + total_seq_id] = ${A.type.value}(${N}(0)); + }`:""}; + }`};return{name:"AttentionProbsSoftmax",shaderCache:{hint:`${b};${S};${g}`,inputDependencies:T},getShaderSource:z,getRunData:()=>({outputs:[],dispatchGroup:{x:1,y:u,z:t*o},programUniforms:x})}},v2=(e,t,o,n,u,c,p,m,g)=>{let b=p+c.kvSequenceLength,_=[c.batchSize,c.numHeads,c.sequenceLength,b],w=e>1&&n,x=c.kvNumHeads?c.kvNumHeads:c.numHeads,S=w?[c.batchSize,x,b,c.headSize]:void 0,C=c.nReps?c.nReps:1,T=c.scale===0?1/Math.sqrt(c.headSize):c.scale,z=Ae(c.headSize),k=c.headSize/z,A=12,O={x:Math.ceil(b/A),y:Math.ceil(c.sequenceLength/A),z:c.batchSize*c.numHeads},B=[{type:12,data:c.sequenceLength},{type:12,data:k},{type:12,data:b},{type:12,data:c.numHeads},{type:12,data:c.headSize},{type:1,data:T},{type:12,data:p},{type:12,data:c.kvSequenceLength},{type:12,data:C}],W=w&&n&&L.size(n.dims)>0,N=["type","type"];W&&N.push("type"),u&&N.push("type"),m&&N.push("type"),g&&N.push("type");let q=[{dims:_,dataType:t.dataType,gpuDataType:0}];w&&q.push({dims:S,dataType:t.dataType,gpuDataType:0});let K=Q=>{let ne=F("q",t.dataType,t.dims,z),se=F("key",o.dataType,o.dims,z),ue=[ne,se];if(W){let le=F("past_key",n.dataType,n.dims,z);ue.push(le)}u&&ue.push(F("attention_bias",u.dataType,u.dims));let ge=m?F("seq_lens",m.dataType,m.dims):void 0;ge&&ue.push(ge);let re=g?F("total_sequence_length_input",g.dataType,g.dims):void 0;re&&ue.push(re);let Se=Y("output",t.dataType,_),fe=[Se];w&&fe.push(Y("present_key",t.dataType,S,z));let ie=He(1,z),ve=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"alpha",type:"f32"},{name:"past_sequence_length",type:"u32"},{name:"kv_sequence_length",type:"u32"},{name:"n_reps",type:"u32"}];return` + const TILE_SIZE = ${A}u; + + var tileQ: array<${ne.type.storage}, ${A*A}>; + var tileK: array<${ne.type.storage}, ${A*A}>; + ${Q.registerUniforms(ve).declareVariables(...ue,...fe)} + ${Q.mainStart([A,A,1])} + // x holds the N and y holds the M + let headIdx = workgroup_id.z % uniforms.num_heads; + let kvHeadIdx = ${C===1?"headIdx":"headIdx / uniforms.n_reps"}; + let kv_num_heads = ${C===1?"uniforms.num_heads":"uniforms.num_heads / uniforms.n_reps"}; + let batchIdx = workgroup_id.z / uniforms.num_heads; + let m = workgroup_id.y * TILE_SIZE; + let n = workgroup_id.x * TILE_SIZE; + let sequence_length = uniforms.M; + var total_sequence_length = uniforms.N; + ${Li(ge,re,!0)} + let absKvHeadIdx = batchIdx * kv_num_heads + kvHeadIdx; + let qOffset = workgroup_id.z * uniforms.M * uniforms.K + m * uniforms.K; + ${W&&w?"let pastKeyOffset = absKvHeadIdx * uniforms.past_sequence_length * uniforms.K;":""}; + let kOffset = absKvHeadIdx * uniforms.kv_sequence_length * uniforms.K; + ${w?"let presentKeyOffset = absKvHeadIdx * uniforms.N * uniforms.K;":""} + var value = ${ie}(0); + for (var w: u32 = 0u; w < uniforms.K; w += TILE_SIZE) { + if (global_id.y < uniforms.M && w + local_id.x < uniforms.K) { + tileQ[TILE_SIZE * local_id.y + local_id.x] = q[qOffset + local_id.y * uniforms.K + w + local_id.x]; + } + if (n + local_id.y < uniforms.N && w + local_id.x < uniforms.K) { + var idx = TILE_SIZE * local_id.y + local_id.x; + ${W&&w?` + if (n + local_id.y < past_sequence_length) { + tileK[idx] = past_key[pastKeyOffset + (n + local_id.y) * uniforms.K + w + local_id.x]; + } else if (n + local_id.y - past_sequence_length < uniforms.kv_sequence_length) { + tileK[idx] = key[kOffset + (n + local_id.y - past_sequence_length) * uniforms.K + w + local_id.x]; + }`:` + if (n + local_id.y < uniforms.kv_sequence_length) { + tileK[idx] = key[kOffset + (n + local_id.y) * uniforms.K + w + local_id.x]; + }`} + ${w?`if (n + local_id.y < present_sequence_length) { + present_key[presentKeyOffset + (n + local_id.y) * uniforms.K + w + local_id.x] = tileK[idx]; + }`:""} + } + workgroupBarrier(); + + for (var k: u32 = 0u; k < TILE_SIZE && w+k < uniforms.K; k++) { + value += ${ie}(tileQ[TILE_SIZE * local_id.y + k] * tileK[TILE_SIZE * local_id.x + k]); + } + + workgroupBarrier(); + } + + if (global_id.y < uniforms.M && global_id.x < total_sequence_length) { + let headOffset = workgroup_id.z * uniforms.M * uniforms.N; + let outputIdx = headOffset + global_id.y * uniforms.N + global_id.x; + var sum: f32 = ${(()=>{switch(z){case 1:return"value";case 2:return"value.x + value.y";case 4:return"value.x + value.y + value.z + value.w";default:throw new Error(`Unsupported components: ${z}`)}})()}; + output[outputIdx] = ${Se.type.value} (sum * uniforms.alpha) + ${u?"attention_bias[outputIdx]":"0.0"}; + } + }`};return{name:"AttentionProbs",shaderCache:{hint:`${z};${u!==void 0};${n!==void 0};${e}`,inputDependencies:N},getRunData:()=>({outputs:q,dispatchGroup:O,programUniforms:B}),getShaderSource:K}},x2=(e,t,o,n,u,c,p=void 0,m=void 0)=>{let g=c+u.kvSequenceLength,b=u.nReps?u.nReps:1,_=u.vHiddenSize*b,w=e>1&&n,x=u.kvNumHeads?u.kvNumHeads:u.numHeads,S=w?[u.batchSize,x,g,u.headSize]:void 0,C=[u.batchSize,u.sequenceLength,_],T=12,z={x:Math.ceil(u.vHeadSize/T),y:Math.ceil(u.sequenceLength/T),z:u.batchSize*u.numHeads},k=[{type:12,data:u.sequenceLength},{type:12,data:g},{type:12,data:u.vHeadSize},{type:12,data:u.numHeads},{type:12,data:u.headSize},{type:12,data:_},{type:12,data:c},{type:12,data:u.kvSequenceLength},{type:12,data:b}],A=w&&n&&L.size(n.dims)>0,O=["type","type"];A&&O.push("type"),p&&O.push("type"),m&&O.push("type");let B=[{dims:C,dataType:t.dataType,gpuDataType:0}];w&&B.push({dims:S,dataType:t.dataType,gpuDataType:0});let W=N=>{let q=F("probs",t.dataType,t.dims),K=F("v",o.dataType,o.dims),Q=[q,K];A&&Q.push(F("past_value",n.dataType,n.dims));let ne=p?F("seq_lens",p.dataType,p.dims):void 0;p&&Q.push(ne);let se=m?F("total_sequence_length_input",m.dataType,m.dims):void 0;m&&Q.push(se);let ge=[Y("output",t.dataType,C)];w&&ge.push(Y("present_value",t.dataType,S));let re=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"v_hidden_size",type:"u32"},{name:"past_sequence_length",type:"u32"},{name:"kv_sequence_length",type:"u32"},{name:"n_reps",type:"u32"}];return` + const TILE_SIZE = ${T}u; + var tileQ: array<${q.type.value}, ${T*T}>; + var tileV: array<${q.type.value}, ${T*T}>; + ${N.registerUniforms(re).declareVariables(...Q,...ge)} + ${N.mainStart([T,T,1])} + let headIdx = workgroup_id.z % uniforms.num_heads; + let batchIdx = workgroup_id.z / uniforms.num_heads; + let kvHeadIdx = ${b===1?"headIdx":"headIdx / uniforms.n_reps"}; + let kv_num_heads = ${b===1?"uniforms.num_heads":"uniforms.num_heads / uniforms.n_reps"}; + let m = global_id.y; + let n = global_id.x; + let sequence_length = uniforms.M; + var total_sequence_length = uniforms.K; + ${Li(ne,se,!0)} + let offsetA = workgroup_id.z * uniforms.M * uniforms.K + m * uniforms.K; + let absKvHeadIdx = batchIdx * kv_num_heads + kvHeadIdx; // kvHeadIdx is relative to the batch + ${A&&w?"let pastValueOffset = absKvHeadIdx * uniforms.N * uniforms.past_sequence_length + n;":""}; + let vOffset = absKvHeadIdx * uniforms.N * uniforms.kv_sequence_length + n; + ${w?"let presentValueOffset = absKvHeadIdx * uniforms.N * uniforms.K + n;":""} + var value = ${q.type.storage}(0); + for (var w: u32 = 0u; w < uniforms.K; w += TILE_SIZE) { + if (m < uniforms.M && w + local_id.x < uniforms.K) { + tileQ[TILE_SIZE * local_id.y + local_id.x] = probs[offsetA + w + local_id.x]; + } + if (n < uniforms.N && w + local_id.y < uniforms.K) { + var idx = TILE_SIZE * local_id.y + local_id.x; + ${A&&w?` + if (w + local_id.y < past_sequence_length) { + tileV[idx] = past_value[pastValueOffset + (w + local_id.y) * uniforms.N]; + } else if (w + local_id.y - past_sequence_length < uniforms.kv_sequence_length) { + tileV[idx] = v[vOffset + (w + local_id.y - past_sequence_length) * uniforms.N]; + } + `:` + if (w + local_id.y < uniforms.kv_sequence_length) { + tileV[idx] = v[vOffset + (w + local_id.y) * uniforms.N]; + }`} + ${w?` + if (w + local_id.y < present_sequence_length) { + present_value[presentValueOffset + (w + local_id.y) * uniforms.N] = tileV[idx]; + }`:""} + } + workgroupBarrier(); + for (var k: u32 = 0u; k < TILE_SIZE && w+k < total_sequence_length; k++) { + value += tileQ[TILE_SIZE * local_id.y + k] * tileV[TILE_SIZE * k + local_id.x]; + } + workgroupBarrier(); + } + + // we need to transpose output from BNSH_v to BSND_v + if (m < uniforms.M && n < uniforms.N) { + let outputIdx = batchIdx * uniforms.M * uniforms.v_hidden_size + m * uniforms.v_hidden_size + + headIdx * uniforms.N + n; + output[outputIdx] = value; + } + }`};return{name:"AttentionScore",shaderCache:{hint:`${n!==void 0};${e}`,inputDependencies:O},getRunData:()=>({outputs:B,dispatchGroup:z,programUniforms:k}),getShaderSource:W}},tr=(e,t,o,n,u,c,p,m,g,b,_=void 0,w=void 0)=>{let x=Math.min(e.outputCount,1+(p?1:0)+(m?1:0)),S=x>1?b.pastSequenceLength:0,C=S+b.kvSequenceLength,T=g&&L.size(g.dims)>0?g:void 0,z=[t,o];x>1&&p&&L.size(p.dims)>0&&z.push(p),T&&z.push(T),_&&z.push(_),w&&z.push(w);let k=e.compute(v2(x,t,o,p,T,b,S,_,w),{inputs:z,outputs:x>1?[-1,1]:[-1]})[0];e.compute(w2(k,b.batchSize,b.numHeads,S,b.sequenceLength,C,_,w),{inputs:_&&w?[k,_,w]:[k],outputs:[]});let A=[k,n];x>1&&m&&L.size(m.dims)>0&&A.push(m),_&&A.push(_),w&&A.push(w),e.compute(x2(x,k,n,m,b,S,_,w),{inputs:A,outputs:x>1?[0,2]:[0]})},$2=(e,t)=>{let o=[t.batchSize,t.numHeads,t.sequenceLength,t.headSize],n=t.sequenceLength,u=t.inputHiddenSize,c=t.headSize,p=12,m={x:Math.ceil(t.headSize/p),y:Math.ceil(t.sequenceLength/p),z:t.batchSize*t.numHeads},g=[e.inputs[0],e.inputs[1],e.inputs[2]],b=[{type:12,data:n},{type:12,data:u},{type:12,data:c},{type:12,data:t.numHeads},{type:12,data:t.headSize},{type:12,data:t.hiddenSize},{type:12,data:t.hiddenSize+t.hiddenSize+t.vHiddenSize}],_=w=>{let x=Y("output_q",g[0].dataType,o),S=Y("output_k",g[0].dataType,o),C=Y("output_v",g[0].dataType,o),T=F("input",g[0].dataType,g[0].dims),z=F("weight",g[1].dataType,g[1].dims),k=F("bias",g[2].dataType,g[2].dims),A=T.type.storage,O=[{name:"M",type:"u32"},{name:"K",type:"u32"},{name:"N",type:"u32"},{name:"num_heads",type:"u32"},{name:"head_size",type:"u32"},{name:"hidden_size",type:"u32"},{name:"ldb",type:"u32"}];return` + const TILE_SIZE = ${p}u; + var tileInput: array<${A}, ${p*p}>; + var tileWeightQ: array<${A}, ${p*p}>; + var tileWeightK: array<${A}, ${p*p}>; + var tileWeightV: array<${A}, ${p*p}>; + ${w.registerUniforms(O).declareVariables(T,z,k,x,S,C)} + ${w.mainStart([p,p,1])} + let batchIndex = workgroup_id.z / uniforms.num_heads; + let headNumber = workgroup_id.z % uniforms.num_heads; + let m = global_id.y; + let n = global_id.x; + + let inputOffset = batchIndex * (uniforms.M * uniforms.K) + m * uniforms.K; + let biasOffsetQ = headNumber * uniforms.head_size; + let biasOffsetK = uniforms.hidden_size + biasOffsetQ; + let biasOffsetV = uniforms.hidden_size + biasOffsetK; + + var valueQ = ${A}(0); + var valueK = ${A}(0); + var valueV = ${A}(0); + for (var w: u32 = 0u; w < uniforms.K; w += TILE_SIZE) { + if (m < uniforms.M && w + local_id.x < uniforms.K) { + tileInput[TILE_SIZE * local_id.y + local_id.x] = input[inputOffset + w + local_id.x]; + } + if (n < uniforms.N && w + local_id.y < uniforms.K) { + let offset = n + (w + local_id.y) * uniforms.ldb; + tileWeightQ[TILE_SIZE * local_id.y + local_id.x] = weight[biasOffsetQ + offset]; + tileWeightK[TILE_SIZE * local_id.y + local_id.x] = weight[biasOffsetK + offset]; + tileWeightV[TILE_SIZE * local_id.y + local_id.x] = weight[biasOffsetV + offset]; + } + workgroupBarrier(); + for (var k: u32 = 0u; k({outputs:[{dims:o,dataType:e.inputs[0].dataType,gpuDataType:0},{dims:o,dataType:e.inputs[0].dataType,gpuDataType:0},{dims:o,dataType:e.inputs[0].dataType,gpuDataType:0}],dispatchGroup:m,programUniforms:b}),getShaderSource:_},{inputs:g,outputs:[-1,-1,-1]})},wp=(e,t)=>{let o=_2(e.inputs,t),[n,u,c]=$2(e,o);return tr(e,n,u,c,e.inputs[4],void 0,void 0,void 0,e.inputs[5],o)}});var C2,S2,T2,vp,xp=X(()=>{"use strict";nt();ce();be();We();we();C2=(e,t)=>{if(!e||e.length!==5)throw new Error("BatchNormalization requires 5 inputs");let o=(n,u,c)=>{let p=u.length;if(p!==n.length)throw new Error(`${c}: num dimensions != ${p}`);u.forEach((m,g)=>{if(m!==n[g])throw new Error(`${c}: dim[${g}] do not match`)})};if(e[0].dims.length>1){let n=t.format==="NHWC"?t.spatial?e[0].dims.slice(-1):e[0].dims.slice(-1).concat(e[0].dims.slice(1,e[0].dims.length-1)):e[0].dims.slice(1,t.spatial?2:void 0);o(e[1].dims,n,"Invalid input scale"),o(e[2].dims,n,"Invalid input B"),o(e[3].dims,n,"Invalid input mean"),o(e[4].dims,n,"Invalid input var")}else o(e[1].dims,[1],"Invalid input scale"),o(e[2].dims,[1],"Invalid input B"),o(e[3].dims,[1],"Invalid input mean"),o(e[4].dims,[1],"Invalid input var")},S2=(e,t)=>{let{epsilon:o,spatial:n,format:u}=t,c=e[0].dims,p=n?Ae(c[c.length-1]):1,m=u==="NHWC"&&c.length>1?p:1,g=L.size(c)/p,b=n,_=b?c.length:c,w=F("x",e[0].dataType,e[0].dims,p),x=F("scale",e[1].dataType,e[1].dims,m),S=F("bias",e[2].dataType,e[2].dims,m),C=F("inputMean",e[3].dataType,e[3].dims,m),T=F("inputVar",e[4].dataType,e[4].dims,m),z=Y("y",e[0].dataType,_,p),k=()=>{let O="";if(n)O=`let cOffset = ${c.length===1?"0u":u==="NHWC"?`outputIndices[${c.length-1}] / ${p}`:"outputIndices[1]"};`;else if(u==="NCHW")O=` + ${z.indicesSet("outputIndices","0","0")} + let cOffset = ${z.indicesToOffset("outputIndices")};`;else{O=`var cIndices = ${x.type.indices}(0); + cIndices[0] = outputIndices[${c.length-1}];`;for(let B=1;B` + const epsilon = ${o}; + ${O.registerUniform("outputSize","u32").declareVariables(w,x,S,C,T,z)} + ${O.mainStart()} + ${O.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + var outputIndices = ${z.offsetToIndices(`global_idx * ${p}`)}; + ${k()} + let scale = ${x.getByOffset("cOffset")}; + let bias = ${S.getByOffset("cOffset")}; + let inputMean = ${C.getByOffset("cOffset")}; + let inputVar = ${T.getByOffset("cOffset")}; + let x = ${w.getByOffset("global_idx")}; + let value = (x - inputMean) * inverseSqrt(inputVar + epsilon) * scale + bias; + ${z.setByOffset("global_idx","value")} + }`;return{name:"BatchNormalization",shaderCache:{hint:`${t.epsilon}_${t.format}_${n}_${p}`,inputDependencies:b?["rank","type","type","type","type"]:void 0},getShaderSource:A,getRunData:()=>({outputs:[{dims:e[0].dims,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(g/64)},programUniforms:b?[{type:12,data:g},...te(c)]:[{type:12,data:g}]})}},T2=e=>pe(e),vp=(e,t)=>{let{inputs:o,outputCount:n}=e,u=T2({...t,outputCount:n});if(ze.webgpu.validateInputContent&&C2(o,u),t.trainingMode)throw new Error("BatchNormalization trainingMode is not supported yet.");e.compute(S2(o,u))}});var I2,A2,$p,Cp=X(()=>{"use strict";be();we();I2=e=>{if(e[0].dims.length!==3)throw new Error("input should have 3 dimensions");if(![320,640,1280].includes(e[0].dims[2]))throw new Error("number of channels should be 320, 640 or 1280");if(e[1].dims.length!==1)throw new Error("bias is expected to have 1 dimensions");if(e[0].dims[2]!==e[1].dims[0])throw new Error("last dimension of input and bias are not the same")},A2=e=>{let t=e[0].dims,o=e[0].dims[2],n=L.size(t)/4,u=e[0].dataType,c=F("input",u,t,4),p=F("bias",u,[o],4),m=F("residual",u,t,4),g=Y("output",u,t,4);return{name:"BiasAdd",getRunData:()=>({outputs:[{dims:t,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(n/64)}}),getShaderSource:_=>` + const channels = ${o}u / 4; + ${_.declareVariables(c,p,m,g)} + + ${_.mainStart()} + ${_.guardAgainstOutOfBoundsWorkgroupSizes(n)} + let value = ${c.getByOffset("global_idx")} + + ${p.getByOffset("global_idx % channels")} + ${m.getByOffset("global_idx")}; + ${g.setByOffset("global_idx","value")} + }`}},$p=e=>{I2(e.inputs),e.compute(A2(e.inputs))}});var k2,Pe,Sp,Tp,Ip,Ap,kp,Ep,Pp,Op,zp,E2,Bp,Dp,jp,Mp,br,Rp,mn,Up,Np,Vp,Wp,Lp,Gp,Hp,Fp,qp,Kp,Jp,Zp,Qp,Yp,Xp,ef,tf,rf,Gi,Hi,nf,of,af,P2,O2,sf,hn=X(()=>{"use strict";ce();be();We();we();k2=(e,t,o,n,u,c,p)=>{let m=Math.ceil(t/4),g="";typeof u=="string"?g=`${u}(a)`:g=u("a");let b=F("inputData",o,[m],4),_=Y("outputData",n,[m],4),w=[{name:"vec_size",type:"u32"}];return p&&w.push(...p),` + ${e.registerUniforms(w).declareVariables(b,_)} + + ${c??""} + + ${e.mainStart()} + ${e.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")} + + let a = ${b.getByOffset("global_idx")}; + ${_.setByOffset("global_idx",g)} + }`},Pe=(e,t,o,n,u,c=e.dataType,p,m)=>{let g=[{type:12,data:Math.ceil(L.size(e.dims)/4)}];return p&&g.push(...p),{name:t,shaderCache:{hint:u,inputDependencies:["type"]},getShaderSource:b=>k2(b,L.size(e.dims),e.dataType,c,o,n,m),getRunData:b=>({outputs:[{dims:e.dims,dataType:c}],dispatchGroup:{x:Math.ceil(L.size(b[0].dims)/64/4)},programUniforms:g})}},Sp=e=>{e.compute(Pe(e.inputs[0],"Abs","abs"))},Tp=e=>{e.compute(Pe(e.inputs[0],"Acos","acos"))},Ip=e=>{e.compute(Pe(e.inputs[0],"Acosh","acosh"))},Ap=e=>{e.compute(Pe(e.inputs[0],"Asin","asin"))},kp=e=>{e.compute(Pe(e.inputs[0],"Asinh","asinh"))},Ep=e=>{e.compute(Pe(e.inputs[0],"Atan","atan"))},Pp=e=>{e.compute(Pe(e.inputs[0],"Atanh","atanh"))},Op=e=>pe(e),zp=(e,t)=>{let o;switch(t.to){case 10:o="vec4";break;case 1:o="vec4";break;case 12:o="vec4";break;case 6:o="vec4";break;case 9:o="vec4";break;default:throw new RangeError(`not supported type (specified in attribute 'to' from 'Cast' operator): ${t.to}`)}e.compute(Pe(e.inputs[0],"Cast",o,void 0,t.cacheKey,t.to))},E2=e=>{let t,o,n=e.length>=2&&e[1].data!==0,u=e.length>=3&&e[2].data!==0;switch(e[0].dataType){case 1:t=n?e[1].getFloat32Array()[0]:-34028234663852886e22,o=u?e[2].getFloat32Array()[0]:34028234663852886e22;break;case 10:t=n?e[1].getUint16Array()[0]:64511,o=u?e[2].getUint16Array()[0]:31743;break;default:throw new Error("Unsupport data type")}return pe({min:t,max:o})},Bp=(e,t)=>{let o=t||E2(e.inputs),n=He(e.inputs[0].dataType);e.compute(Pe(e.inputs[0],"Clip",u=>`clamp(${u}, vec4<${n}>(uniforms.min), vec4<${n}>(uniforms.max))`,void 0,o.cacheKey,void 0,[{type:e.inputs[0].dataType,data:o.min},{type:e.inputs[0].dataType,data:o.max}],[{name:"min",type:n},{name:"max",type:n}]),{inputs:[0]})},Dp=e=>{e.compute(Pe(e.inputs[0],"Ceil","ceil"))},jp=e=>{e.compute(Pe(e.inputs[0],"Cos","cos"))},Mp=e=>{e.compute(Pe(e.inputs[0],"Cosh","cosh"))},br=e=>pe(e),Rp=(e,t)=>{let o=He(e.inputs[0].dataType);e.compute(Pe(e.inputs[0],"Elu",n=>`elu_vf32(${n})`,` + const elu_alpha_ = ${o}(${t.alpha}); + + fn elu_f32(a: ${o}) -> ${o} { + return select((exp(a) - 1.0) * elu_alpha_, a, a >= 0.0); + } + + fn elu_vf32(v: vec4<${o}>) -> vec4<${o}> { + return vec4(elu_f32(v.x), elu_f32(v.y), elu_f32(v.z), elu_f32(v.w)); + }`,t.cacheKey))},mn=(e="f32")=>` +const r0: ${e} = 0.3275911; +const r1: ${e} = 0.254829592; +const r2: ${e} = -0.284496736; +const r3: ${e} = 1.421413741; +const r4: ${e} = -1.453152027; +const r5: ${e} = 1.061405429; + +fn erf_vf32(v: vec4<${e}>) -> vec4<${e}> { + let absv = abs(v); + let x = 1.0 / (1.0 + r0 * absv); + return sign(v) * (1.0 - ((((r5 * x + r4) * x + r3) * x + r2) * x + r1) * x * exp(-absv * absv)); +}`,Up=e=>{let t=He(e.inputs[0].dataType);e.compute(Pe(e.inputs[0],"Erf",o=>`erf_vf32(${o})`,mn(t)))},Np=e=>{e.compute(Pe(e.inputs[0],"Exp","exp"))},Vp=e=>{e.compute(Pe(e.inputs[0],"Floor","floor"))},Wp=e=>{let t=He(e.inputs[0].dataType);e.compute(Pe(e.inputs[0],"Gelu",o=>`0.5 * ${o} * (1.0 + erf_vf32(${o} * 0.7071067811865475))`,mn(t)))},Lp=(e,t)=>{let o=He(e.inputs[0].dataType);e.compute(Pe(e.inputs[0],"LeakyRelu",n=>`select(leaky_relu_alpha_ * ${n}, ${n}, ${n} >= vec4<${o}>(0.0))`,`const leaky_relu_alpha_ = ${o}(${t.alpha});`,t.cacheKey))},Gp=e=>{e.compute(Pe(e.inputs[0],"Not",t=>`!${t}`))},Hp=e=>{e.compute(Pe(e.inputs[0],"Neg",t=>`-${t}`))},Fp=e=>{e.compute(Pe(e.inputs[0],"Reciprocal",t=>`1.0/${t}`))},qp=e=>{let t=He(e.inputs[0].dataType);e.compute(Pe(e.inputs[0],"Relu",o=>`select(vec4<${t}>(0.0), ${o}, ${o} > vec4<${t}>(0.0))`))},Kp=e=>{e.compute(Pe(e.inputs[0],"Sigmoid",t=>`(1.0 / (1.0 + exp(-${t})))`))},Jp=e=>pe(e),Zp=(e,t)=>{let o=He(e.inputs[0].dataType);e.compute(Pe(e.inputs[0],"HardSigmoid",n=>`max(vec4<${o}>(0.0), min(vec4<${o}>(1.0), ${t.alpha} * ${n} + vec4<${o}>(${t.beta})))`,void 0,t.cacheKey))},Qp=e=>{e.compute(Pe(e.inputs[0],"Sin","sin"))},Yp=e=>{e.compute(Pe(e.inputs[0],"Sinh","sinh"))},Xp=e=>{e.compute(Pe(e.inputs[0],"Sqrt","sqrt"))},ef=e=>{e.compute(Pe(e.inputs[0],"Tan","tan"))},tf=e=>`sign(${e}) * (1 - exp(-2 * abs(${e}))) / (1 + exp(-2 * abs(${e})))`,rf=e=>{e.compute(Pe(e.inputs[0],"Tanh",tf))},Gi=(e="f32")=>` +const fast_gelu_a: ${e} = 0.5; +const fast_gelu_b: ${e} = 0.7978845608028654; +const fast_gelu_c: ${e} = 0.035677408136300125; + +fn tanh_v(v: vec4<${e}>) -> vec4<${e}> { + return ${tf("v")}; +} +`,Hi=e=>`(fast_gelu_a + fast_gelu_a * tanh_v(${e} * (fast_gelu_c * ${e} * ${e} + fast_gelu_b))) * ${e}`,nf=e=>{let t=He(e.inputs[0].dataType);e.compute(Pe(e.inputs[0],"FastGelu",Hi,Gi(t),void 0,e.inputs[0].dataType))},of=(e,t)=>{let o=He(e.inputs[0].dataType);return e.compute(Pe(e.inputs[0],"ThresholdedRelu",n=>`select(vec4<${o}>(0.0), ${n}, ${n} > thresholded_relu_alpha_)`,`const thresholded_relu_alpha_ = vec4<${o}>(${t.alpha});`,t.cacheKey)),0},af=e=>{e.compute(Pe(e.inputs[0],"Log","log"))},P2=(e,t)=>` +const alpha = vec4<${e}>(${t}); +const one = ${e}(1.0); +const zero = ${e}(0.0); + +fn quick_gelu_impl(x: vec4<${e}>) -> vec4<${e}> { + let v = x *alpha; + var x1 : vec4<${e}>; + for (var i = 0; i < 4; i = i + 1) { + if (v[i] >= zero) { + x1[i] = one / (one + exp(-v[i])); + } else { + x1[i] = one - one / (one + exp(v[i])); + } + } + return x * x1; +} +`,O2=e=>`quick_gelu_impl(${e})`,sf=(e,t)=>{let o=He(e.inputs[0].dataType);e.compute(Pe(e.inputs[0],"QuickGelu",O2,P2(o,t.alpha),t.cacheKey,e.inputs[0].dataType))}});var z2,B2,lf,df=X(()=>{"use strict";be();we();hn();z2=e=>{if(e[0].dims.length!==3)throw new Error("input should have 3 dimensions");if(![2560,5120,10240].includes(e[0].dims[2]))throw new Error("hidden state should be 2560, 5120 or 10240");if(e[1].dims.length!==1)throw new Error("bias is expected to have 1 dimensions");if(e[0].dims[2]!==e[1].dims[0])throw new Error("last dimension of input and bias are not the same")},B2=e=>{let t=e[0].dims.slice();t[2]=t[2]/2;let o=F("input",e[0].dataType,e[0].dims,4),n=F("bias",e[0].dataType,[e[0].dims[2]],4),u=Y("output",e[0].dataType,t,4),c=L.size(t)/4,p=Be(e[0].dataType);return{name:"BiasSplitGelu",getRunData:()=>({outputs:[{dims:t,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(c/64)}}),getShaderSource:g=>` + const M_SQRT2 = sqrt(2.0); + const halfChannels = ${e[0].dims[2]/4/2}u; + + ${g.declareVariables(o,n,u)} + + ${mn(p)} + + ${g.mainStart()} + ${g.guardAgainstOutOfBoundsWorkgroupSizes(c)} + let biasIdx = global_idx % halfChannels; + let batchIndex = global_idx / halfChannels; + let inputOffset = biasIdx + batchIndex * halfChannels * 2; + let valueLeft = input[inputOffset] + bias[biasIdx]; + let valueRight = input[inputOffset + halfChannels] + bias[biasIdx + halfChannels]; + let geluRight = valueRight * 0.5 * (erf_vf32(valueRight / M_SQRT2) + 1); + + ${u.setByOffset("global_idx","valueLeft * geluRight")} + }`}},lf=e=>{z2(e.inputs),e.compute(B2(e.inputs))}});var D2,j2,$t,cf,pf,ff,mf,hf,gf,yf,bf,_f,wf,vf=X(()=>{"use strict";ce();be();we();D2=(e,t,o,n,u,c,p,m,g,b,_,w)=>{let x,S;typeof m=="string"?x=S=(A,O)=>`${m}((${A}),(${O}))`:typeof m=="function"?x=S=m:(x=m.scalar,S=m.vector);let C=Y("outputData",_,n.length,4),T=F("aData",g,t.length,4),z=F("bData",b,o.length,4),k;if(u)if(c){let A=L.size(t)===1,O=L.size(o)===1,B=t.length>0&&t[t.length-1]%4===0,W=o.length>0&&o[o.length-1]%4===0;A||O?k=C.setByOffset("global_idx",S(A?`${T.type.value}(${T.getByOffset("0")}.x)`:T.getByOffset("global_idx"),O?`${z.type.value}(${z.getByOffset("0")}.x)`:z.getByOffset("global_idx"))):k=` + let outputIndices = ${C.offsetToIndices("global_idx * 4u")}; + let offsetA = ${T.broadcastedIndicesToOffset("outputIndices",C)}; + let offsetB = ${z.broadcastedIndicesToOffset("outputIndices",C)}; + ${C.setByOffset("global_idx",S(p||B?T.getByOffset("offsetA / 4u"):`${T.type.value}(${T.getByOffset("offsetA / 4u")}[offsetA % 4u])`,p||W?z.getByOffset("offsetB / 4u"):`${z.type.value}(${z.getByOffset("offsetB / 4u")}[offsetB % 4u])`))} + `}else k=C.setByOffset("global_idx",S(T.getByOffset("global_idx"),z.getByOffset("global_idx")));else{if(!c)throw new Error("no necessary to use scalar implementation for element-wise binary op implementation.");let A=(O,B,W="")=>{let N=`aData[indexA${B}][componentA${B}]`,q=`bData[indexB${B}][componentB${B}]`;return` + let outputIndices${B} = ${C.offsetToIndices(`global_idx * 4u + ${B}u`)}; + let offsetA${B} = ${T.broadcastedIndicesToOffset(`outputIndices${B}`,C)}; + let offsetB${B} = ${z.broadcastedIndicesToOffset(`outputIndices${B}`,C)}; + let indexA${B} = offsetA${B} / 4u; + let indexB${B} = offsetB${B} / 4u; + let componentA${B} = offsetA${B} % 4u; + let componentB${B} = offsetB${B} % 4u; + ${O}[${B}] = ${W}(${x(N,q)}); + `};_===9?k=` + var data = vec4(0); + ${A("data",0,"u32")} + ${A("data",1,"u32")} + ${A("data",2,"u32")} + ${A("data",3,"u32")} + outputData[global_idx] = dot(vec4(0x1, 0x100, 0x10000, 0x1000000), vec4(data));`:k=` + ${A("outputData[global_idx]",0)} + ${A("outputData[global_idx]",1)} + ${A("outputData[global_idx]",2)} + ${A("outputData[global_idx]",3)} + `}return` + ${e.registerUniform("vec_size","u32").declareVariables(T,z,C)} + + ${w??""} + + ${e.mainStart()} + ${e.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")} + ${k} + }`},j2=(e,t,o,n,u,c,p=o.dataType)=>{let m=o.dims.map(T=>Number(T)??1),g=n.dims.map(T=>Number(T)??1),b=!L.areEqual(m,g),_=m,w=L.size(m),x=!1,S=!1,C=[b];if(b){let T=bt.calcShape(m,g,!1);if(!T)throw new Error("Can't perform binary op on the given tensors");_=T.slice(),w=L.size(_);let z=L.size(m)===1,k=L.size(g)===1,A=m.length>0&&m[m.length-1]%4===0,O=g.length>0&&g[g.length-1]%4===0;C.push(z),C.push(k),C.push(A),C.push(O);let B=1;for(let W=1;W<_.length;W++){let N=m[m.length-W],q=g[g.length-W];if(N===q)B*=N;else break}B%4===0?(S=!0,x=!0):(z||k||A||O)&&(x=!0)}else x=!0;return C.push(x),{name:e,shaderCache:{hint:t+C.map(T=>T.toString()).join("_"),inputDependencies:["rank","rank"]},getShaderSource:T=>D2(T,m,g,_,x,b,S,u,o.dataType,n.dataType,p,c),getRunData:()=>({outputs:[{dims:_,dataType:p}],dispatchGroup:{x:Math.ceil(w/64/4)},programUniforms:[{type:12,data:Math.ceil(L.size(_)/4)},...te(m,g,_)]})}},$t=(e,t,o,n,u,c)=>{e.compute(j2(t,u??"",e.inputs[0],e.inputs[1],o,n,c))},cf=e=>{$t(e,"Add",(t,o)=>`${t}+${o}`)},pf=e=>{$t(e,"Div",(t,o)=>`${t}/${o}`)},ff=e=>{$t(e,"Equal",{scalar:(t,o)=>`u32(${t}==${o})`,vector:(t,o)=>`vec4(${t}==${o})`},void 0,void 0,9)},mf=e=>{$t(e,"Mul",(t,o)=>`${t}*${o}`)},hf=e=>{let t=F("input",e.inputs[0].dataType,e.inputs[0].dims).type.value;$t(e,"Pow",{scalar:(n,u)=>`pow_custom(${n},${u})`,vector:(n,u)=>`pow_vector_custom(${n},${u})`},` + fn pow_custom(a : ${t}, b : ${t}) -> ${t} { + if (b == ${t}(0.0)) { + return ${t}(1.0); + } else if (a < ${t}(0.0) && f32(b) != floor(f32(b))) { + return ${t}(pow(f32(a), f32(b))); // NaN + } + return select(sign(a), ${t}(1.0), round(f32(abs(b) % ${t}(2.0))) != 1.0) * ${t}(${t==="i32"?"round":""}(pow(f32(abs(a)), f32(b)))); + } + fn pow_vector_custom(a : vec4<${t}>, b : vec4<${t}>) -> vec4<${t}> { + // TODO: implement vectorized pow + return vec4<${t}>(pow_custom(a.x, b.x), pow_custom(a.y, b.y), pow_custom(a.z, b.z), pow_custom(a.w, b.w)); + } + `)},gf=e=>{$t(e,"Sub",(t,o)=>`${t}-${o}`)},yf=e=>{$t(e,"Greater",{scalar:(t,o)=>`u32(${t}>${o})`,vector:(t,o)=>`vec4(${t}>${o})`},void 0,void 0,9)},bf=e=>{$t(e,"Less",{scalar:(t,o)=>`u32(${t}<${o})`,vector:(t,o)=>`vec4(${t}<${o})`},void 0,void 0,9)},_f=e=>{$t(e,"GreaterOrEqual",{scalar:(t,o)=>`u32(${t}>=${o})`,vector:(t,o)=>`vec4(${t}>=${o})`},void 0,void 0,9)},wf=e=>{$t(e,"LessOrEqual",{scalar:(t,o)=>`u32(${t}<=${o})`,vector:(t,o)=>`vec4(${t}<=${o})`},void 0,void 0,9)}});var R2,U2,N2,V2,xf,$f,Cf=X(()=>{"use strict";ce();be();We();we();R2=(e,t)=>{if(!e||e.length<1)throw new Error("too few inputs");let o=0,n=e[o],u=n.dataType,c=n.dims.length;e.forEach((p,m)=>{if(m!==o){if(p.dataType!==u)throw new Error("input tensors should be one type");if(p.dims.length!==c)throw new Error("input tensors should have the same shape");p.dims.forEach((g,b)=>{if(b!==t&&g!==n.dims[b])throw new Error("non concat dimensions must match")})}})},U2=(e,t)=>` + fn calculateInputIndex(index: u32) -> u32 { + let sizeInConcatAxis = array(${t}); + for (var i: u32 = 0u; i < ${e}; i += 1u ) { + if (index < sizeInConcatAxis[i]) { + return i; + } + } + return ${e}u; + }`,N2=(e,t)=>{let o=e.length,n=[];for(let u=0;u{let u=L.size(o),c=new Array(e.length),p=new Array(e.length),m=0,g=[],b=[],_=[{type:12,data:u}];for(let T=0;T`uniforms.sizeInConcatAxis${T}`).join(","),C=T=>` + + ${(()=>{T.registerUniform("outputSize","u32");for(let z=0;z(${S}); + ${x} -= sizeInConcatAxis[inputIndex - 1u]; + } + + ${N2(p,w)} + }`;return{name:"Concat",shaderCache:{hint:`${t}`,inputDependencies:g},getRunData:()=>({outputs:[{dims:o,dataType:n}],dispatchGroup:{x:Math.ceil(u/64)},programUniforms:_}),getShaderSource:C}},xf=(e,t)=>{let o=e.inputs,n=o[0].dims,u=L.normalizeAxis(t.axis,n.length);R2(o,u);let c=n.slice();c[u]=o.reduce((m,g)=>m+(g.dims.length>u?g.dims[u]:0),0);let p=o.filter(m=>L.size(m.dims)>0);e.compute(V2(p,u,c,o[0].dataType),{inputs:p})},$f=e=>pe({axis:e.axis})});var lt,dt,ct,gn,Dt=X(()=>{"use strict";ce();be();lt=(e,t,o="f32")=>{switch(e.activation){case"Relu":return`value = max(value, ${t}(0.0));`;case"Sigmoid":return`value = (${t}(1.0) / (${t}(1.0) + exp(-value)));`;case"Clip":return`value = clamp(value, ${t}(${o}(uniforms.clip_min)), ${t}(${o}(uniforms.clip_max)));`;case"HardSigmoid":return`value = max(${t}(0.0), min(${t}(1.0), ${o}(uniforms.alpha) * value + ${o}(uniforms.beta)));`;case"LeakyRelu":return`value = select(${o}(uniforms.alpha) * value, value, value >= ${t}(0.0));`;case"Tanh":return`let e2x = exp(-2.0 * abs(value)); + value = sign(value) * (1.0 - e2x) / (1.0 + e2x); + `;case"":return"";default:throw new Error(`Unsupported activation ${e.activation}`)}},dt=(e,t)=>{e.activation==="Clip"?t.push({type:1,data:e.clipMax},{type:1,data:e.clipMin}):e.activation==="HardSigmoid"?t.push({type:1,data:e.alpha},{type:1,data:e.beta}):e.activation==="LeakyRelu"&&t.push({type:1,data:e.alpha})},ct=(e,t)=>{e.activation==="Clip"?t.push({name:"clip_max",type:"f32"},{name:"clip_min",type:"f32"}):e.activation==="HardSigmoid"?t.push({name:"alpha",type:"f32"},{name:"beta",type:"f32"}):e.activation==="LeakyRelu"&&t.push({name:"alpha",type:"f32"})},gn=e=>{let t=e?.activation||"";if(t==="HardSigmoid"){let[o,n]=e?.activation_params||[.2,.5];return{activation:t,alpha:o,beta:n}}else if(t==="Clip"){let[o,n]=e?.activation_params||[Ec,Pc];return{activation:t,clipMax:n,clipMin:o}}else if(t==="LeakyRelu"){let[o]=e?.activation_params||[.01];return{activation:t,alpha:o}}return{activation:t}}});var Ge,Sf,yn=X(()=>{"use strict";Ge=(e,t)=>{switch(e){case 1:return t;case 2:return`vec2<${t}>`;case 3:return`vec3<${t}>`;case 4:return`vec4<${t}>`;default:throw new Error(`${e}-component is not supported.`)}},Sf=e=>` + ${e?"value = value + getBiasByOutputCoords(coords);":""} + `});var Tf,If=X(()=>{"use strict";Tf=e=>` +fn getIndexFromCoords4D(coords : vec4, shape : vec4) -> i32 { + return dot(coords, vec4( + shape.y * shape.z * shape.w, shape.z * shape.w, shape.w, 1)); +} +fn getOutputIndexFromCoords(coords : vec4) -> i32 { + return dot(coords, vec4( + i32(${e}.x), i32(${e}.y), i32(${e}.z), 1)); +} +`});var _r,bn,_n=X(()=>{"use strict";ce();be();we();Dt();_r=(e,t,o,n,u)=>{let c=n-o;return` + ${Array.from({length:o}).map((p,m)=>` + if (${ae(t.shape,m,t.rank)} != 1) { + ${t.indicesSet(e,m,ae(u,m+c,n))} + } else { + ${t.indicesSet(e,m,0)} + }`).join("")} +`},bn=(e,t,o,n,u=!1,c)=>{let p=e[0].dims,m=e[1].dims,g=p[p.length-2],b=m[m.length-1],_=p[p.length-1],w=Ae(b),x=Ae(_),S=Ae(g),C=L.size(o)/w/S,T=e.length>2,z=n?n.slice(0,-2):o.slice(0,-2),A=[L.size(z),g,b],O=[{type:12,data:C},{type:12,data:g},{type:12,data:b},{type:12,data:_}];dt(t,O),O.push(...te(z,p,m)),T&&O.push(...te(e[2].dims)),O.push(...te(A));let B=W=>{let N=dn("batch_dims",e[0].dataType,z.length),q=F("a",e[0].dataType,p.length,x),K=F("b",e[1].dataType,m.length,w),Q=Y("output",e[0].dataType,A.length,w),ne=Be(Q.type.tensor),se=lt(t,Q.type.value,ne),ue=[q,K],ge="";if(T){let fe=u?w:1;ue.push(F("bias",e[2].dataType,e[2].dims.length,fe)),ge=`${u?`value += bias[col / ${fe}];`:`value += ${Q.type.value}(bias[row + i]);`}`}let re=[{name:"output_size",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"}];ct(t,re);let Se=()=>{let fe=`var a_data: ${q.type.value};`;for(let ie=0;ie; + for (var k: u32 = 0u; k < uniforms.K; k = k + ${x}) { + ${Se()} + } + for (var i = 0u; i < ${S}u; i++) { + var value = values[i]; + ${ge} + ${se} + let cur_indices = ${Q.type.indices}(batch, row + i, col); + let offset = ${Q.indicesToOffset("cur_indices")}; + ${Q.setByOffset(`offset / ${w}`,"value")}; + } + } + `};return{name:"MatMulNaive",shaderCache:{hint:`${t.activation};${w};${x};${S};${u}`,inputDependencies:T?["rank","rank","rank"]:["rank","rank"]},getRunData:()=>({outputs:[{dims:c?c(o):o,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(C/64)},programUniforms:O}),getShaderSource:B}}});var W2,L2,Fi,Af,G2,qi,H2,wr,wn=X(()=>{"use strict";ce();be();we();Dt();_n();yn();W2=(e,t)=>e?` + mm_Asub[inputRow][inputCol] = mm_readA(batch, + kStart + inputRow, + globalRowStart / innerElementSize + inputCol${t?", batchIndices":""}); + `:` + mm_Asub[inputRow][inputCol] = mm_readA(batch, + globalRow + innerRow, + kStart / innerElementSize + inputCol${t?", batchIndices":""}); + `,L2=(e,t)=>e?` + let ACached0 = mm_Asub[k * innerElementSize][localRow]; + let ACached1 = mm_Asub[k * innerElementSize + 1][localRow]; + let ACached2 = mm_Asub[k * innerElementSize + 2][localRow]; + ${t===3?"":"let ACached3 = mm_Asub[k * innerElementSize + 3][localRow];"} + for (var i = 0; i < rowPerThread; i = i + 1) { + acc[i] = BCached0 * ACached0[i] + acc[i]; + acc[i] = BCached1 * ACached1[i] + acc[i]; + acc[i] = BCached2 * ACached2[i] + acc[i]; + ${t===3?"":"acc[i] = BCached3 * ACached3[i] + acc[i];"} + }`:` + for (var i = 0; i < rowPerThread; i = i + 1) { + let ACached = mm_Asub[tileRow + i][k]; + acc[i] = BCached0 * ACached.x + acc[i]; + acc[i] = BCached1 * ACached.y + acc[i]; + acc[i] = BCached2 * ACached.z + acc[i]; + ${t===3?"":"acc[i] = BCached3 * ACached.w + acc[i];"} + }`,Fi=(e,t,o="f32",n,u=!1,c=32,p=!1,m=32)=>{let g=t[1]*e[1],b=t[0]*e[0],_=u?g:c,w=u?c:g,x=_/t[0],S=c/t[1];if(!((u&&x===4&&e[1]===4||!u&&(x===3||x===4))&&_%t[0]===0&&c%t[1]===0&&e[0]===4))throw new Error(`If transposeA ${u} is true, innerElementSize ${x} and workPerThread[1] ${e[1]} must be 4. + Otherwise, innerElementSize ${x} must be 3 or 4. + tileAWidth ${_} must be divisible by workgroupSize[0]${t[0]}. tileInner ${c} must be divisible by workgroupSize[1] ${t[1]}. colPerThread ${e[0]} must be 4.`);return` +var mm_Asub: array, ${_/x}>, ${w}>; +var mm_Bsub: array, ${b/e[0]}>, ${c}>; + +const rowPerThread = ${e[1]}; +const colPerThread = ${e[0]}; +const innerElementSize = ${x}; +const tileInner = ${c}; + +@compute @workgroup_size(${t[0]}, ${t[1]}, ${t[2]}) +fn main(@builtin(local_invocation_id) localId : vec3, + @builtin(global_invocation_id) globalId : vec3, + @builtin(workgroup_id) workgroupId : vec3) { + let localRow = i32(localId.y); + let tileRow = localRow * rowPerThread; + let tileCol = i32(localId.x); + + let globalRow =i32(globalId.y) * rowPerThread; + let globalCol = i32(globalId.x); + let batch = ${p?"0":"i32(globalId.z)"}; + ${n?`let batchIndices = ${n.offsetToIndices("u32(batch)")};`:""} + let globalRowStart = i32(workgroupId.y) * ${g}; + + let num_tiles = ${p?`${Math.ceil(m/c)}`:"(uniforms.dim_inner - 1) / tileInner + 1"}; + var kStart = ${p?`i32(globalId.z) * ${m}`:"0"}; + + var acc: array, rowPerThread>; + + // Loop over shared dimension. + let tileRowB = localRow * ${S}; + for (var t = 0; t < num_tiles; t = t + 1) { + // Load one tile of A into local memory. + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + let inputRow = tileRow + innerRow; + let inputCol = tileCol; + ${W2(u,n)} + } + + // Load one tile of B into local memory. + for (var innerRow = 0; innerRow < ${S}; innerRow = innerRow + 1) { + let inputRow = tileRowB + innerRow; + let inputCol = tileCol; + mm_Bsub[inputRow][inputCol] = mm_readB(batch, kStart + inputRow, globalCol${n?", batchIndices":""}); + } + kStart = kStart + tileInner; + workgroupBarrier(); + + // Compute acc values for a single thread. + for (var k = 0; k < tileInner / innerElementSize; k = k + 1) { + let BCached0 = mm_Bsub[k * innerElementSize][tileCol]; + let BCached1 = mm_Bsub[k * innerElementSize + 1][tileCol]; + let BCached2 = mm_Bsub[k * innerElementSize + 2][tileCol]; + ${x===3?"":"let BCached3 = mm_Bsub[k * innerElementSize + 3][tileCol];"} + + ${L2(u,x)} + } + + workgroupBarrier(); + } + + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + mm_write(batch, globalRow + innerRow, globalCol, acc[innerRow]); + } +}`},Af=(e,t)=>e?` + mm_Asub[inputRow][inputCol] = mm_readA(batch, + kStart + inputRow, + globalRowStart + inputCol${t?", batchIndices":""}); + `:` + mm_Asub[inputRow][inputCol] = mm_readA(batch, + globalRowStart + inputRow, + kStart + inputCol${t?", batchIndices":""}); + `,G2=e=>e?"let ACached = mm_Asub[k][tileRow + innerRow];":"let ACached = mm_Asub[tileRow + innerRow][k];",qi=(e,t,o="f32",n,u=!1,c=32,p=!1,m=32,g=!1)=>{let b=e[1]*t[1],_=e[0]*t[0],w=u?b:c,x=u?c:b;if(!(x%t[1]===0&&w%t[0]===0&&c%t[1]===0))throw new Error(`tileAHight ${x} must be divisible by workgroupSize[1]${t[1]}, tileAWidth ${w} must be divisible by workgroupSize[0]${t[0]}, tileInner ${c} must be divisible by workgroupSize[1]${t[1]}`);let S=x/t[1],C=w/t[0],T=c/t[1],z=g?` + let localRow = i32(localId.y); + let localCol = i32(localId.x); + let globalRowStart = i32(workgroupId.y) * ${b}; + let globalColStart = i32(workgroupId.x) * ${_}; + + // Loop over shared dimension. + for (var t = 0; t < num_tiles; t = t + 1) { + // Load one tile of A into local memory. + for (var inputRow = localRow; inputRow < ${x}; inputRow = inputRow + ${t[1]}) { + for (var inputCol = localCol; inputCol < ${w}; inputCol = inputCol + ${t[0]}) { + ${Af(u,n)} + } + } + // Load one tile of B into local memory. + for (var inputRow = localRow; inputRow < ${c}; inputRow = inputRow + ${t[1]}) { + for (var inputCol = localCol; inputCol < ${_}; inputCol = inputCol + ${t[0]}) { + mm_Bsub[inputRow][inputCol] = mm_readB(batch, + kStart + inputRow, + globalColStart + inputCol${n?", batchIndices":""}); + } + } + kStart = kStart + tileInner; + workgroupBarrier(); + + // Compute acc values for a single thread. + var BCached : array<${o}, colPerThread>; + for (var k = 0; k < tileInner; k = k + 1) { + for (var inner = 0; inner < colPerThread; inner = inner + 1) { + BCached[inner] = mm_Bsub[k][localCol + inner * ${t[0]}]; + } + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + let ACached = ${u?`mm_Asub[k][localRow + innerRow * ${t[1]}];`:`mm_Asub[localRow + innerRow * ${t[1]}][k];`} + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + acc[innerRow][innerCol] = acc[innerRow][innerCol] + + ACached * BCached[innerCol]; + } + } + } + workgroupBarrier(); + } + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + let gRow = globalRowStart + localRow + innerRow * ${t[1]}; + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + let gCol = globalColStart + localCol + innerCol * ${t[0]}; + mm_write(batch, gRow, gCol, acc[innerRow][innerCol]); + } + } + `:` +let tileRow = i32(localId.y) * rowPerThread; +let tileCol = i32(localId.x) * colPerThread; + +let globalRow = i32(globalId.y) * rowPerThread; +let globalCol = i32(globalId.x) * colPerThread; +let globalRowStart = i32(workgroupId.y) * ${b}; + +let tileRowA = i32(localId.y) * ${S}; +let tileColA = i32(localId.x) * ${C}; +let tileRowB = i32(localId.y) * ${T}; +// Loop over shared dimension. +for (var t = 0; t < num_tiles; t = t + 1) { + // Load one tile of A into local memory. + for (var innerRow = 0; innerRow < ${S}; innerRow = innerRow + 1) { + for (var innerCol = 0; innerCol < ${C}; innerCol = innerCol + 1) { + let inputRow = tileRowA + innerRow; + let inputCol = tileColA + innerCol; + ${Af(u,n)} + } + } + + // Load one tile of B into local memory. + for (var innerRow = 0; innerRow < ${T}; innerRow = innerRow + 1) { + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + let inputRow = tileRowB + innerRow; + let inputCol = tileCol + innerCol; + mm_Bsub[inputRow][inputCol] = mm_readB(batch, + kStart + inputRow, + globalCol + innerCol${n?", batchIndices":""}); + } + } + kStart = kStart + tileInner; + workgroupBarrier(); + + // Compute acc values for a single thread. + var BCached : array<${o}, colPerThread>; + for (var k = 0; k < tileInner; k = k + 1) { + for (var inner = 0; inner < colPerThread; inner = inner + 1) { + BCached[inner] = mm_Bsub[k][tileCol + inner]; + } + + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + ${G2(u)} + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + acc[innerRow][innerCol] = acc[innerRow][innerCol] + ACached * BCached[innerCol]; + } + } + } + + workgroupBarrier(); +} + +for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + mm_write(batch, globalRow + innerRow, globalCol + innerCol, + acc[innerRow][innerCol]); + } +} +`;return` + var mm_Asub : array, ${x}>; + var mm_Bsub : array, ${c}>; + const rowPerThread = ${e[1]}; + const colPerThread = ${e[0]}; + const tileInner = ${c}; + +@compute @workgroup_size(${t[0]}, ${t[1]}, ${t[2]}) +fn main(@builtin(local_invocation_id) localId : vec3, + @builtin(global_invocation_id) globalId : vec3, + @builtin(workgroup_id) workgroupId : vec3) { + let batch = ${p?"0":"i32(globalId.z)"}; + ${n?`let batchIndices = ${n.offsetToIndices("u32(batch)")};`:""} + let num_tiles = ${p?`${Math.ceil(m/c)}`:"(uniforms.dim_inner - 1) / tileInner + 1"}; + var kStart = ${p?`i32(globalId.z) * ${m}`:"0"}; + + var acc : array, rowPerThread>; + ${z} + } +`},H2=(e,t,o,n,u=!1)=>{let[c,p,m,g]=n,b=Be(n[0].type.tensor);return` + fn mm_readA(batch: i32, row: i32, colIn: i32, batchIndices: ${c.type.indices}) -> ${Ge(e,b)} { + var value = ${Ge(e,b)}(0.0); + let col = colIn * ${e}; + if(row < uniforms.dim_a_outer && col < uniforms.dim_inner) + { + var aIndices: ${p.type.indices}; + ${_r("aIndices",p,p.rank-2,c.rank,"batchIndices")} + ${p.indicesSet("aIndices",p.rank-2,"u32(row)")} + ${p.indicesSet("aIndices",p.rank-1,"u32(colIn)")} + value = ${p.getByIndices("aIndices")}; + } + return value; + } + + fn mm_readB(batch: i32, row: i32, colIn: i32, batchIndices: ${c.type.indices}) -> ${Ge(e,b)} { + var value = ${Ge(e,b)}(0.0); + let col = colIn * ${e}; + if(row < uniforms.dim_inner && col < uniforms.dim_b_outer) + { + var bIndices: ${m.type.indices}; + ${_r("bIndices",m,m.rank-2,c.rank,"batchIndices")} + ${m.indicesSet("bIndices",m.rank-2,"u32(row)")} + ${m.indicesSet("bIndices",m.rank-1,"u32(colIn)")} + value = ${m.getByIndices("bIndices")}; + } + return value; + } + + fn mm_write(batch: i32, row: i32, colIn: i32, valueIn: ${Ge(e,b)}) { + let col = colIn * ${e}; + if (row < uniforms.dim_a_outer && col < uniforms.dim_b_outer) { + var value = valueIn; + let coords = vec3(batch, row, colIn); + ${t?`value = value + ${u?"bias[colIn]":`${Ge(e,b)}(bias[row])`};`:""} + ${o} + ${g.setByIndices("vec3(coords)","value")} + } + } + `},wr=(e,t,o,n,u=!1,c)=>{let p=e[0].dims,m=e[1].dims,g=p.slice(0,-2),b=m.slice(0,-2),_=n?n.slice(0,-2):o.slice(0,-2),w=L.size(_),x=p[p.length-2],S=p[p.length-1],C=m[m.length-1],T=S%4===0&&C%4===0,z=x<=8?[4,1,1]:[4,4,1],k=[8,8,1],A=[Math.ceil(C/k[0]/z[0]),Math.ceil(x/k[1]/z[1]),Math.ceil(w/k[2]/z[2])],O=T?4:1,B=[...g,x,S/O],W=B.length,N=[...b,S,C/O],q=N.length,K=[w,x,C/O],Q=[{type:6,data:x},{type:6,data:C},{type:6,data:S}];dt(t,Q),Q.push(...te(_,B,N));let ne=["rank","rank"],se=e.length>2;se&&(Q.push(...te(e[2].dims)),ne.push("rank")),Q.push(...te(K));let ue=ge=>{let re=_.length,Se=dn("batchDims",e[0].dataType,re,1),fe=Be(e[0].dataType),ie=F("a",e[0].dataType,W,O),ve=F("b",e[1].dataType,q,O),le=Y("result",e[0].dataType,K.length,O),me=[ie,ve];if(se){let V=u?O:1;me.push(F("bias",e[2].dataType,e[2].dims.length,V))}let ke=[{name:"dim_a_outer",type:"i32"},{name:"dim_b_outer",type:"i32"},{name:"dim_inner",type:"i32"}];ct(t,ke);let je=Be(le.type.tensor),he=lt(t,le.type.value,je),R=H2(O,se,he,[Se,ie,ve,le],u);return` + ${ge.registerUniforms(ke).registerInternalVariables(Se).declareVariables(...me,le)} + ${R} + ${T?Fi(z,k,fe,Se):qi(z,k,fe,Se)} + `};return{name:"MatMul",shaderCache:{hint:`${z};${t.activation};${T};${u}`,inputDependencies:ne},getRunData:()=>({outputs:[{dims:c?c(o):o,dataType:e[0].dataType}],dispatchGroup:{x:A[0],y:A[1],z:A[2]},programUniforms:Q}),getShaderSource:ue}}});var F2,kf,Ef=X(()=>{"use strict";ce();yt();we();Dt();yn();If();wn();F2=(e,t,o,n,u=!1,c,p=4,m=4,g=4,b="f32")=>{let _=ne=>{switch(ne){case 1:return"resData = x[xIndex];";case 3:return`resData = vec3<${b}>(x[xIndex], x[xIndex + 1], x[xIndex + 2]);`;case 4:return"resData = x[xIndex / 4];";default:throw new Error(`innerElementSize ${ne} is not supported.`)}},w=ne=>{switch(ne){case 1:return"return w[row * i32(uniforms.w_shape[3]) + colIn];";case 4:return"return w[row * i32(uniforms.w_shape[3]) / 4 + colIn];";default:throw new Error(`innerElementSize ${ne} is not supported.`)}},x=e?` + let coord = vec4(batch, xRow, xCol, xCh); + `:` + let coord = vec4(batch, xCh, xRow, xCol); + `,S=e?` + let coords = vec4( + batch, + row / outWidth, + row % outWidth, + col); + `:` + let coords = vec4( + batch, + row, + col / outWidth, + col % outWidth); + `,C=e?"i32(uniforms.x_shape[1])":"i32(uniforms.x_shape[2])",T=e?"i32(uniforms.x_shape[2])":"i32(uniforms.x_shape[3])",z=e?"row":"col",k=e?"col":"row",A=` + let inChannels = i32(uniforms.w_shape[2]); + let outWidth = ${e?"i32(uniforms.result_shape[2])":"i32(uniforms.result_shape[3])"}; + let outRow = ${z} / outWidth; + let outCol = ${z} % outWidth; + + let WRow = ${k} / (i32(uniforms.w_shape[1]) * inChannels); + let WCol = ${k} / inChannels % i32(uniforms.w_shape[1]); + let xRow = outRow * uniforms.stride[0] + uniforms.dilation[0] * WRow - uniforms.pad[0]; + let xCol = outCol * uniforms.stride[1] + uniforms.dilation[1] * WCol - uniforms.pad[1]; + let xCh = ${k} % inChannels; + var resData = ${Ge(p,b)}(0.0); + // The bounds checking is always needed since we use it to pad zero for + // the 'same' padding type. + if (xRow >= 0 && xRow < ${C} && xCol >= 0 && xCol < ${T}) { + ${x} + let xIndex = getIndexFromCoords4D(coord, vec4(uniforms.x_shape)); + ${_(p)} + } + return resData;`,O=e?t&&n?` + let col = colIn * ${p}; + ${A}`:` + let col = colIn * ${p}; + if (row < uniforms.dim_a_outer && col < uniforms.dim_inner) { + ${A} + } + return ${Ge(p,b)}(0.0);`:n&&o?` + let col = colIn * ${p}; + ${A}`:` + let col = colIn * ${p}; + if (row < uniforms.dim_inner && col < uniforms.dim_b_outer) { + ${A} + } + return ${Ge(p,b)}(0.0);`,B=e?n&&o?w(m):` + let col = colIn * ${m}; + if (row < uniforms.dim_inner && col < uniforms.dim_b_outer) { + ${w(m)} + } + return ${Ge(m,b)}(0.0);`:` + let col = colIn * ${m}; + if (row < uniforms.dim_inner && col < uniforms.dim_a_outer) { + ${w(m)} + } + return ${Ge(m,b)}(0.0);`,W=Ge(g,b),N=e?Ge(p,b):Ge(m,b),q=e?Ge(m,b):Ge(p,b),K=lt(c,W,b);return` + fn mm_readA(batch: i32, row : i32, colIn : i32) -> ${N} { + ${e?O:B} + } + + fn mm_readB(batch: i32, row : i32, colIn : i32) -> ${q} { + ${e?B:O} + } + + fn mm_write(batch: i32, row : i32, colIn : i32, valueIn : ${W}) { + let col = colIn * ${g}; + if (row < uniforms.dim_a_outer && col < uniforms.dim_b_outer) + { + var value = valueIn; + let outWidth = ${e?"i32(uniforms.result_shape[2])":"i32(uniforms.result_shape[3])"}; + ${S} + ${Sf(u)} + ${K} + setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value); + } + }`},kf=(e,t,o,n,u,c,p,m,g)=>{let b=t.format==="NHWC",_=b?e[0].dims[3]:e[0].dims[1],w=o[0],x=b?o[2]:o[3],S=b?o[1]:o[2],C=b?o[3]:o[1],T=b&&(_%4===0||_%3===0)&&C%4===0,z=b?C:x*S,k=b?x*S:C,A=[8,8,1],O=n<=8?[4,1,1]:[4,4,1],B=[Math.ceil(z/A[0]/O[0]),Math.ceil(k/A[1]/O[1]),Math.ceil(w/A[2]/O[2])];$e("verbose",()=>`[conv2d_mm_webgpu] dispatch = ${B}`);let W=T?b&&_%4!==0?3:4:1,N=A[1]*O[1],q=A[0]*O[0],K=Math.max(A[0]*W,A[1]),Q=n%N===0,ne=u%q===0,se=c%K===0,ue=T?[W,4,4]:[1,1,1],ge=[{type:6,data:n},{type:6,data:u},{type:6,data:c},{type:6,data:[t.pads[0],t.pads[1]]},{type:6,data:t.strides},{type:6,data:t.dilations}];dt(t,ge),ge.push(...te(e[0].dims,e[1].dims));let re=["rank","rank"];p&&(ge.push(...te(e[2].dims)),re.push("rank")),ge.push(...te(o));let Se=fe=>{let ie=[{name:"dim_a_outer",type:"i32"},{name:"dim_b_outer",type:"i32"},{name:"dim_inner",type:"i32"},{name:"pad",type:"i32",length:2},{name:"stride",type:"i32",length:2},{name:"dilation",type:"i32",length:2}];ct(t,ie);let ve=T?4:1,le=Be(e[0].dataType),me=` + fn setOutputAtIndex(flatIndex : i32, value : ${T?`vec4<${le}>`:le}) { + result[flatIndex] = ${T?`vec4<${le}>`:le}(value); + } + fn setOutputAtCoords(d0 : i32, d1 : i32, d2 : i32, d3 : i32, value : ${T?`vec4<${le}>`:le}) { + let flatIndex = getOutputIndexFromCoords(vec4(d0, d1, d2, d3)); + setOutputAtIndex(flatIndex ${T?"/ 4":""}, value); + }`,ke=F("x",e[0].dataType,e[0].dims.length,W===3?1:W),je=F("w",e[1].dataType,e[1].dims.length,ve),he=[ke,je],R=Y("result",e[0].dataType,o.length,ve);if(p){let V=F("bias",e[2].dataType,e[2].dims.length,ve);he.push(V),me+=` + fn getBiasByOutputCoords(coords : vec4) -> ${T?`vec4<${le}>`:le} { + return bias[coords.${b?"w":"y"}${T?"/ 4":""}]; + }`}return` + ${Tf("uniforms.result_strides")} + //struct Uniforms { xShape : vec4, wShape : vec4, outShape : vec4, + // outShapeStrides: vec3, filterDims : vec2, pad : vec2, stride : vec2, + // dilation : vec2, dimAOuter : i32, dimBOuter : i32, dimInner : i32 }; + ${fe.registerUniforms(ie).declareVariables(...he,R)} + ${me} + ${F2(b,Q,ne,se,p,t,ue[0],ue[1],ue[2],le)} + ${T?Fi(O,A,le,void 0,!b,K):qi(O,A,le,void 0,!b,K,!1,void 0,m)}`};return{name:"Conv2DMatMul",shaderCache:{hint:`${t.cacheKey};${W};${T};${Q};${ne};${se};${N};${q};${K}`,inputDependencies:re},getRunData:()=>({outputs:[{dims:g?g(o):o,dataType:e[0].dataType}],dispatchGroup:{x:B[0],y:B[1],z:B[2]},programUniforms:ge}),getShaderSource:Se}}});var q2,Pf,vn,K2,Of,J2,zf,Bf,Df=X(()=>{"use strict";ce();yt();be();we();Dt();yn();q2=e=>{let t=1;for(let o=0;otypeof e=="number"?[e,e,e]:e,vn=(e,t)=>t<=1?e:e+(e-1)*(t-1),K2=(e,t,o,n=1)=>{let u=vn(t,n);return Math.floor((e[0]*(o-1)-o+u)/2)},Of=(e,t,o,n,u)=>{u==null&&(u=K2(e,t[0],n[0]));let c=[0,0,0,o];for(let p=0;p<3;p++)e[p]+2*u>=t[p]&&(c[p]=Math.trunc((e[p]-t[p]+2*u)/n[p]+1));return c},J2=(e,t,o,n,u,c,p,m,g,b)=>{let _,w,x,S;if(e==="VALID"&&(e=0),typeof e=="number"){_={top:e,bottom:e,left:e,right:e,front:e,back:e};let C=Of([t,o,n,1],[m,g,b],1,[u,c,p],e);w=C[0],x=C[1],S=C[2]}else if(Array.isArray(e)){if(!e.every((T,z,k)=>T===k[0]))throw Error(`Unsupported padding parameter: ${e}`);_={top:e[0],bottom:e[1],left:e[2],right:e[3],front:e[4],back:e[5]};let C=Of([t,o,n,1],[m,g,b],1,[u,c,p],e[0]);w=C[0],x=C[1],S=C[2]}else if(e==="SAME_UPPER"){w=Math.ceil(t/u),x=Math.ceil(o/c),S=Math.ceil(n/p);let C=(w-1)*u+m-t,T=(x-1)*c+g-o,z=(S-1)*p+b-n,k=Math.floor(C/2),A=C-k,O=Math.floor(T/2),B=T-O,W=Math.floor(z/2),N=z-W;_={top:O,bottom:B,left:W,right:N,front:k,back:A}}else throw Error(`Unknown padding parameter: ${e}`);return{padInfo:_,outDepth:w,outHeight:x,outWidth:S}},zf=(e,t,o,n,u,c=!1,p="channelsLast")=>{let m,g,b,_,w;if(p==="channelsLast")[m,g,b,_,w]=e;else if(p==="channelsFirst")[m,w,g,b,_]=e;else throw new Error(`Unknown dataFormat ${p}`);let[x,,S,C,T]=t,[z,k,A]=Pf(o),[O,B,W]=Pf(n),N=vn(S,O),q=vn(C,B),K=vn(T,W),{padInfo:Q,outDepth:ne,outHeight:se,outWidth:ue}=J2(u,g,b,_,z,k,A,N,q,K),ge=c?x*w:x,re=[0,0,0,0,0];return p==="channelsFirst"?re=[m,ge,ne,se,ue]:p==="channelsLast"&&(re=[m,ne,se,ue,ge]),{batchSize:m,dataFormat:p,inDepth:g,inHeight:b,inWidth:_,inChannels:w,outDepth:ne,outHeight:se,outWidth:ue,outChannels:ge,padInfo:Q,strideDepth:z,strideHeight:k,strideWidth:A,filterDepth:S,filterHeight:C,filterWidth:T,effectiveFilterDepth:N,effectiveFilterHeight:q,effectiveFilterWidth:K,dilationDepth:O,dilationHeight:B,dilationWidth:W,inShape:e,outShape:re,filterShape:t}},Bf=(e,t,o,n,u,c)=>{let p=c==="channelsLast",m=p?e[0].dims[3]:e[0].dims[1],g=!1,b=[64,1,1],_={x:o.map((A,O)=>O)},w=[Math.ceil(q2(_.x.map(A=>o[A]))/b[0]),1,1];$e("verbose",()=>`[conv3d_naive_webgpu] dispatch = ${w}`);let x=g?p&&m%4!==0?3:4:1,S=L.size(o),C=[{type:12,data:S},{type:12,data:n},{type:12,data:u},{type:12,data:t.strides},{type:12,data:t.dilations}];dt(t,C),C.push(...te(e[0].dims,e[1].dims));let T=["rank","rank"],z=e.length===3;z&&(C.push(...te(e[2].dims)),T.push("rank")),C.push(...te(o));let k=A=>{let O=[{name:"output_size",type:"u32"},{name:"filter_dims",type:"u32",length:n.length},{name:"pads",type:"u32",length:u.length},{name:"strides",type:"u32",length:t.strides.length},{name:"dilations",type:"u32",length:t.dilations.length}];ct(t,O);let B=g?4:1,W=Be(e[0].dataType),N=F("x",e[0].dataType,e[0].dims.length,x===3?1:x),q=F("W",e[1].dataType,e[1].dims.length,B),K=[N,q],Q=Y("result",e[0].dataType,o.length,B),ne="";if(z){let ge=F("bias",e[2].dataType,e[2].dims.length,B);K.push(ge),ne+=` + fn getBiasByOutputCoords(coords : array) -> ${g?`vec4<${W}>`:W} { + return bias[${p?ae("coords",4,5):ae("coords",1,5)}${g?"/ 4":""}]; + }`}let se=Ge(x,W),ue=lt(t,se,W);return` + ${ne} + fn getX(d0 : u32, d1 : u32, d2 : u32, d3 : u32, d4 : u32) -> f32 { + let aIndices = array(d0, d1, d2, d3, d4); + return ${N.getByIndices("aIndices")}; + } + fn getW(d0 : u32, d1 : u32, d2 : u32, d3 : u32, d4 : u32) -> f32 { + let aIndices = array(d0, d1, d2, d3, d4); + return ${q.getByIndices("aIndices")}; + } + ${A.registerUniforms(O).declareVariables(...K,Q)} + ${A.mainStart()} + ${A.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let coords = ${Q.offsetToIndices("global_idx")}; + let batch = ${ae("coords",0,N.rank)}; + let d2 = ${p?ae("coords",N.rank-1,N.rank):ae("coords",1,N.rank)}; + let xFRCCorner = vec3(${p?ae("coords",1,N.rank):ae("coords",2,N.rank)}, + ${p?ae("coords",2,N.rank):ae("coords",3,N.rank)}, + ${p?ae("coords",3,N.rank):ae("coords",4,N.rank)}) * uniforms.strides - uniforms.pads; + let xFCorner = xFRCCorner.x; + let xRCorner = xFRCCorner.y; + let xCCorner = xFRCCorner.z; + let xShapeY = ${p?ae("uniforms.x_shape",1,N.rank):ae("uniforms.x_shape",2,N.rank)}; + let xShapeZ = ${p?ae("uniforms.x_shape",2,N.rank):ae("uniforms.x_shape",3,N.rank)}; + let xShapeW = ${p?ae("uniforms.x_shape",3,N.rank):ae("uniforms.x_shape",4,N.rank)}; + let xShapeU = ${p?ae("uniforms.x_shape",4,N.rank):ae("uniforms.x_shape",1,N.rank)}; + let inputDepthNearestVec4 = (xShapeU / 4) * 4; + let inputDepthVec4Remainder = xShapeU % 4; + + var value = 0.0; + for (var wF = 0u; wF < uniforms.filter_dims[0]; wF++) { + let xF = xFCorner + wF * uniforms.dilations[0]; + if (xF < 0 || xF >= xShapeY) { + continue; + } + + for (var wR = 0u; wR < uniforms.filter_dims[1]; wR++) { + let xR = xRCorner + wR * uniforms.dilations[1]; + if (xR < 0 || xR >= xShapeZ) { + continue; + } + + for (var wC = 0u; wC < uniforms.filter_dims[2]; wC++) { + let xC = xCCorner + wC * uniforms.dilations[2]; + if (xC < 0 || xC >= xShapeW) { + continue; + } + + for (var d1 = 0u; d1 < inputDepthNearestVec4; d1 += 4) { + ${p?`let xValues = vec4( + getX(batch, xF, xR, xC, d1), + getX(batch, xF, xR, xC, d1 + 1), + getX(batch, xF, xR, xC, d1 + 2), + getX(batch, xF, xR, xC, d1 + 3)); + `:`let xValues = vec4( + getX(batch, d1, xF, xR, xC), + getX(batch, d1 + 1, xF, xR, xC), + getX(batch, d1 + 2, xF, xR, xC), + getX(batch, d1 + 3, xF, xR, xC)); + `} + let wValues = vec4( + getW(d2, d1, wF, wR, wC), + getW(d2, d1 + 1, wF, wR, wC), + getW(d2, d1 + 2, wF, wR, wC), + getW(d2, d1 + 3, wF, wR, wC)); + value += dot(xValues, wValues); + } + if (inputDepthVec4Remainder == 1) { + ${p?`value += getX(batch, xF, xR, xC, inputDepthNearestVec4) + * getW(d2, inputDepthNearestVec4, wF, wR, wC);`:`value += getX(batch, inputDepthNearestVec4, xF, xR, xC) + * getW(d2, inputDepthNearestVec4, wF, wR, wC);`} + } else if (inputDepthVec4Remainder == 2) { + ${p?`let xValues = vec2( + getX(batch, xF, xR, xC, inputDepthNearestVec4), + getX(batch, xF, xR, xC, inputDepthNearestVec4 + 1)); + `:`let xValues = vec2( + getX(batch, inputDepthNearestVec4, xF, xR, xC), + getX(batch, inputDepthNearestVec4 + 1, xF, xR, xC)); + `} + let wValues = vec2( + getW(d2, inputDepthNearestVec4, wF, wR, wC), + getW(d2, inputDepthNearestVec4 + 1, wF, wR, wC)); + value += dot(xValues, wValues); + } else if (inputDepthVec4Remainder == 3) { + ${p?`let xValues = vec3( + getX(batch, xF, xR, xC, inputDepthNearestVec4), + getX(batch, xF, xR, xC, inputDepthNearestVec4 + 1), + getX(batch, xF, xR, xC, inputDepthNearestVec4 + 2)); + `:`let xValues = vec3( + getX(batch, inputDepthNearestVec4, xF, xR, xC), + getX(batch, inputDepthNearestVec4 + 1, xF, xR, xC), + getX(batch, inputDepthNearestVec4 + 2, xF, xR, xC)); + `} + let wValues = vec3( + getW(d2, inputDepthNearestVec4, wF, wR, wC), + getW(d2, inputDepthNearestVec4 + 1, wF, wR, wC), + getW(d2, inputDepthNearestVec4 + 2, wF, wR, wC)); + value += dot(xValues, wValues); + } + } + } + } + ${z?"value = value + getBiasByOutputCoords(coords)":""}; + ${ue} + result[global_idx] = f32(value); + }`};return{name:"Conv3DNaive",shaderCache:{hint:`${t.cacheKey};${p};${x};${z}`,inputDependencies:T},getRunData:()=>({outputs:[{dims:o,dataType:e[0].dataType}],dispatchGroup:{x:w[0],y:w[1],z:w[2]},programUniforms:C}),getShaderSource:k}}});var jf,Mf,Rf=X(()=>{"use strict";ce();be();we();Dt();jf=(e,t,o,n)=>{let u=e.length>2,c=u?"value += b[output_channel];":"",p=e[0].dims,m=e[1].dims,g=t.format==="NHWC",b=g?o[3]:o[1],_=b/t.group,w=g&&_>=4?Ae(b):1,x=L.size(o)/w,S=[{type:12,data:x},{type:12,data:t.dilations},{type:12,data:[t.strides[0],t.strides[1]]},{type:12,data:[t.pads[0],t.pads[1]]},{type:12,data:_}];dt(t,S),S.push(...te(p,[m[0],m[1],m[2],m[3]/w]));let C=u?["rank","rank","rank"]:["rank","rank"];S.push(...te([o[0],o[1],o[2],o[3]/w]));let T=z=>{let k=Y("output",e[0].dataType,o.length,w),A=Be(k.type.tensor),O=lt(t,k.type.value,A),B=F("x",e[0].dataType,p.length),W=F("w",e[1].dataType,m.length,w),N=[B,W];u&&N.push(F("b",e[2].dataType,e[2].dims,w));let q=[{name:"output_size",type:"u32"},{name:"dilations",type:"u32",length:t.dilations.length},{name:"strides",type:"u32",length:2},{name:"pads",type:"u32",length:2},{name:"output_channels_per_group",type:"u32"}];ct(t,q);let K=g?` + for (var wHeight: u32 = 0u; wHeight < uniforms.w_shape[0]; wHeight++) { + let xHeight = xRCCorner.x + wHeight * uniforms.dilations[0]; + + if (xHeight < 0u || xHeight >= uniforms.x_shape[1]) { + continue; + } + + for (var wWidth: u32 = 0u; wWidth < uniforms.w_shape[1]; wWidth++) { + let xWidth = xRCCorner.y + wWidth * uniforms.dilations[1]; + if (xWidth < 0u || xWidth >= uniforms.x_shape[2]) { + continue; + } + + for (var wInChannel: u32 = 0u; wInChannel < uniforms.w_shape[2]; wInChannel++) { + let input_channel = in_channel_offset + wInChannel; + let xVal = ${B.get("batch","xHeight","xWidth","input_channel")}; + let wVal = ${W.get("wHeight","wWidth","wInChannel","output_channel")}; + value += xVal * wVal; + } + } + } + `:` + for (var wInChannel: u32 = 0u; wInChannel < uniforms.w_shape[1]; wInChannel++) { + let input_channel = in_channel_offset + wInChannel; + for (var wHeight: u32 = 0u; wHeight < uniforms.w_shape[2]; wHeight++) { + let xHeight = xRCCorner.x + wHeight * uniforms.dilations[0]; + + if (xHeight < 0u || xHeight >= uniforms.x_shape[2]) { + continue; + } + + for (var wWidth: u32 = 0u; wWidth < uniforms.w_shape[3]; wWidth++) { + let xWidth = xRCCorner.y + wWidth * uniforms.dilations[1]; + if (xWidth < 0u || xWidth >= uniforms.x_shape[3]) { + continue; + } + + let xVal = ${B.get("batch","input_channel","xHeight","xWidth")}; + let wVal = ${W.get("output_channel","wInChannel","wHeight","wWidth")}; + value += xVal * wVal; + } + } + } + `;return` + ${z.registerUniforms(q).declareVariables(...N,k)} + + ${z.mainStart()} + ${z.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + + let outputIndices = ${k.offsetToIndices("global_idx")}; + let batch: u32 = outputIndices[0]; + let output_channel: u32 = outputIndices[${g?3:1}]; + let xRCCorner: vec2 = vec2(outputIndices[${g?1:2}], outputIndices[${g?2:3}]) * uniforms.strides - uniforms.pads; + let group_id: u32 = output_channel * ${w} / uniforms.output_channels_per_group; + var in_channel_offset = group_id * uniforms.w_shape[${g?2:1}]; + + var value: ${k.type.value} = ${k.type.value}(0); + ${K} + ${c} + ${O} + ${k.setByOffset("global_idx","value")} + }`};return{name:"GroupedConv",shaderCache:{hint:`${t.cacheKey}_${w}`,inputDependencies:C},getRunData:()=>({outputs:[{dims:n?n(o):o,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(x/64)},programUniforms:S}),getShaderSource:T}},Mf=(e,t,o,n)=>{let u=e.length>2,c=Ae(o[3]),p=Ae(o[2]),m=L.size(o)/c/p,g=[e[0].dims[0],e[0].dims[1],e[0].dims[2],e[0].dims[3]/c],b=[e[1].dims[0],e[1].dims[1],e[1].dims[2],e[1].dims[3]/c],_=[o[0],o[1],o[2],o[3]/c],w=[{type:12,data:m},{type:6,data:[t.strides[0],t.strides[1]]},{type:6,data:[t.pads[0],t.pads[1]]}];dt(t,w),w.push(...te(g,b,_));let x=(p-1)*t.strides[1]+b[1],S=C=>{let T=Y("output",e[0].dataType,_.length,c),z=Be(T.type.tensor),k=lt(t,T.type.value,z),A=F("x",e[0].dataType,g.length,c),O=F("w",e[1].dataType,b.length,c),B=[A,O];u&&B.push(F("b",e[2].dataType,e[2].dims,c));let W=u?"value += b[output_channel];":"",N=[{name:"output_size",type:"u32"},{name:"strides",type:"i32",length:2},{name:"pads",type:"i32",length:2}];return ct(t,N),` + ${C.registerUniforms(N).declareVariables(...B,T)} + ${C.mainStart()} + ${C.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let width0 = uniforms.output_shape[3]; + let output_channel = global_idx % width0; + var index1 = global_idx / width0; + let width1 = uniforms.output_shape[2] / ${p}u; + let col = (index1 % width1) * ${p}u; + index1 = index1 / width1; + let row = index1 % uniforms.output_shape[1]; + let batch = index1 / uniforms.output_shape[1]; + + let x_corner = vec2(i32(row), i32(col)) * uniforms.strides - uniforms.pads; + + var x_vals: array<${A.type.value}, ${x}>; + var values: array<${T.type.value}, ${p}>; + let input_channel = output_channel; + // Use constant instead of uniform can give better performance for w's height/width. + for (var w_height: u32 = 0u; w_height < ${b[0]}; w_height++) { + let x_height = x_corner.x + i32(w_height); + if (x_height >= 0 && u32(x_height) < uniforms.x_shape[1]) { + for (var i = 0; i < ${x}; i++) { + let x_width = x_corner.y + i; + if (x_width >= 0 && u32(x_width) < uniforms.x_shape[2]) { + x_vals[i] = ${A.get("batch","u32(x_height)","u32(x_width)","input_channel")}; + } else { + x_vals[i] = ${A.type.value}(0); + } + } + for (var w_width: u32 = 0u; w_width < ${b[1]}; w_width++) { + let w_val = ${O.get("w_height","w_width","0","output_channel")}; + for (var i = 0u; i < ${p}u; i++) { + values[i] = fma(x_vals[i * u32(uniforms.strides[1]) + w_width], w_val, values[i]); + } + } + } + } + + for (var i = 0u; i < ${p}u; i++) { + var value = values[i]; + ${W} + ${k} + ${T.set("batch","row","col + i","output_channel","value")}; + } + }`};return{name:"GroupedConv-Vectorize",shaderCache:{hint:`${t.cacheKey};${c};${p};${x};${b[0]};${b[1]}`,inputDependencies:u?["rank","rank","type"]:["rank","rank"]},getRunData:()=>({outputs:[{dims:n?n(o):o,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(m/64)},programUniforms:w}),getShaderSource:S}}});var Z2,Ki,Q2,Ji,Zi,Uf,Y2,X2,Qi,Nf=X(()=>{"use strict";be();Ef();Df();wn();Rf();Dt();_n();Tt();Z2=(e,t,o,n,u,c)=>{let p=e[0],m=e.slice(c?1:2,c?3:4),g=m.length,b=t[0],w=t.slice(2).map((C,T)=>C+(C-1)*(o[T]-1)),S=m.map((C,T)=>C+n[T]+n[T+g]).map((C,T)=>Math.floor((C-w[T]+u[T])/u[T]));return S.splice(0,0,p),S.splice(c?3:1,0,b),S},Ki=[2,3,1,0],Q2=(e,t)=>{if(!e||e.length!==2&&e.length!==3)throw new Error("Conv requires 2 or 3 inputs");if(e[0].dims.length>5)throw new Error("greater than 5D is not supported");if(e[0].dims.length!==e[1].dims.length)throw new Error("filter does not have same dimension as input");let o=e[0].dims[t.format==="NHWC"?e[0].dims.length-1:1],n=e[1].dims[1]*t.group;if(o!==n)throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");if(e.length===3&&(e[2].dims.length!==1||e[1].dims[0]!==e[2].dims[0]))throw new Error("invalid bias");let u=e[0].dims.length-2;if(t.dilations.length!==u)throw new Error(`dilations should be ${u}D`);if(t.strides.length!==u)throw new Error(`strides should be ${u}D`);if(t.pads.length!==u*2)throw new Error(`pads should be ${u*2}D`);if(t.kernelShape.length!==0&&t.kernelShape.length!==e[1].dims.length-2)throw new Error("invalid kernel shape")},Ji=(e,t)=>{let o=e.kernelShape.slice();o.length{let t=gn(e),o=e.format,n=["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][e.auto_pad],u=e.dilations,c=e.group,p=e.kernel_shape,m=e.pads,g=e.strides,b=e.w_is_const();return{autoPad:n,format:o,dilations:u,group:c,kernelShape:p,pads:m,strides:g,wIsConst:b,...t,cacheKey:`${e.format};${t.activation};`}},Uf=(e,t,o,n)=>{let u=o.format==="NHWC",c=Z2(t[0].dims,t[1].dims,o.dilations,o.pads,o.strides,u);if(o.group!==1){let N=[t[0]];if(u){let K=e.kernelCustomData.wT??e.compute(Fe(t[1],Ki),{inputs:[1],outputs:[o.wIsConst?-2:-1]})[0];o.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=K),N.push(K)}else N.push(t[1]);t.length===3&&N.push(t[2]),!e.adapterInfo.isArchitecture("ampere")&&u&&t[1].dims[0]===o.group&&t[1].dims[1]===1&&o.dilations[0]===1&&o.dilations[1]===1?e.compute(Mf(N,o,c,n),{inputs:N}):e.compute(jf(N,o,c,n),{inputs:N});return}let p=t.length===3,m=t[0].dims[u?1:2],g=t[0].dims[u?2:3],b=t[0].dims[u?3:1],_=t[1].dims[2],w=t[1].dims[3],x=c[u?1:2],S=c[u?2:3],C=c[u?3:1],T=u&&_===m&&w===g&&o.pads[0]===0&&o.pads[1]===0;if(T||_===1&&w===1&&o.dilations[0]===1&&o.dilations[1]===1&&o.strides[0]===1&&o.strides[1]===1&&o.pads[0]===0&&o.pads[1]===0){let N=c[0],q,K,Q,ne=[];if(u){let ge=e.kernelCustomData.wT??e.compute(Fe(t[1],Ki),{inputs:[1],outputs:[o.wIsConst?-2:-1]})[0];if(o.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=ge),T){let re=m*g*b;q=t[0].reshape([1,N,re]),K=ge.reshape([1,re,C]),Q=[1,N,C]}else q=t[0].reshape([N,m*g,b]),K=ge.reshape([1,b,C]),Q=[N,x*S,C];ne.push(q),ne.push(K)}else q=t[0].reshape([N,b,m*g]),K=t[1].reshape([1,C,b]),Q=[N,C,x*S],ne.push(K),ne.push(q);p&&ne.push(t[2]);let se=Q[2],ue=ne[0].dims[ne[0].dims.length-1];se<8&&ue<8?e.compute(bn(ne,o,c,Q,u,n),{inputs:ne}):e.compute(wr(ne,o,c,Q,u,n),{inputs:ne});return}let z=!0,k=e.kernelCustomData.wT??e.compute(Fe(t[1],Ki),{inputs:[1],outputs:[o.wIsConst?-2:-1]})[0];o.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=k);let A=[t[0],k];p&&A.push(t[2]);let O=u?x*S:C,B=u?C:x*S,W=_*w*b;e.compute(kf(A,o,c,O,B,W,p,z,n),{inputs:A})},Y2=(e,t)=>{let o=t.format==="NHWC",n=[e.inputs[0].reshape(o?[e.inputs[0].dims[0],1,e.inputs[0].dims[1],e.inputs[0].dims[2]]:[e.inputs[0].dims[0],e.inputs[0].dims[1],1,e.inputs[0].dims[2]]),e.inputs[1].reshape([e.inputs[1].dims[0],e.inputs[1].dims[1],1,e.inputs[1].dims[2]])];e.inputs.length===3&&n.push(e.inputs[2]);let u=[0,t.pads[0],0,t.pads[1]],c=[1].concat(t.strides),p=[1].concat(t.dilations),m=[1].concat(t.kernelShape),g=Ji({...t,pads:u,strides:c,dilations:p,kernelShape:m},n);Uf(e,n,g,b=>o?[b[0],b[2],b[3]]:[b[0],b[1],b[3]])},X2=(e,t,o)=>{let n=o.format==="NHWC"?"channelsLast":"channelsFirst",u=Ji(o,t),c=o.autoPad==="NOTSET"?o.pads:o.autoPad,p=zf(t[0].dims,t[1].dims,o.strides,o.dilations,c,!1,n);e.compute(Bf(t,u,p.outShape,[p.filterDepth,p.filterHeight,p.filterWidth],[p.padInfo.front,p.padInfo.top,p.padInfo.left],n))},Qi=(e,t)=>{if(Q2(e.inputs,t),e.inputs[0].dims.length===3)Y2(e,t);else if(e.inputs[0].dims.length===5)X2(e,e.inputs,t);else{let o=Ji(t,e.inputs);Uf(e,e.inputs,o)}}});var Vf,Wf=X(()=>{"use strict";ce();yt();be();we();Vf=(e,t,o)=>{let n=e.length>2,u=t.outputShape,c=t.format==="NHWC",p=t.group,m=e[1].dims,g=m[2]/p,b=m[3],_=c?Ae(g):1,w=c&&b===1&&g>=4,x=w?Math.floor(g/4)*4:Math.floor(g/_)*_,S=g-x,C=c?Ae(b):1,T=c?b===1?_:C:1,z=L.size(u)/C,k=[Math.ceil(z/64),1,1];$e("verbose",()=>`[conv2d_backprop_webgpu] dispatch = ${k}`);let A=["rank","rank"],O=[t.strides[0],t.strides[1]],B=[t.kernelShape[c?1:2],t.kernelShape[c?2:3]],W=[t.dilations[0],t.dilations[1]],N=[B[0]+(t.dilations[0]<=1?0:(t.kernelShape[c?1:2]-1)*(t.dilations[0]-1)),B[1]+(t.dilations[1]<=1?0:(t.kernelShape[c?2:3]-1)*(t.dilations[1]-1))],q=[N[0]-1-Math.floor((t.pads[0]+t.pads[2])/2),N[1]-1-Math.floor((t.pads[1]+t.pads[3])/2)],K=[{type:12,data:z},{type:12,data:O},{type:12,data:B},{type:12,data:W},{type:12,data:N},{type:6,data:q},{type:12,data:x},{type:12,data:g},{type:12,data:b},...te(e[0].dims,e[1].dims)];n&&(K.push(...te(e[2].dims)),A.push("rank")),K.push(...te(u));let Q=ne=>{let se=[{name:"output_size",type:"u32"},{name:"strides",type:"u32",length:O.length},{name:"filter_dims",type:"u32",length:B.length},{name:"dilations",type:"u32",length:B.length},{name:"effective_filter_dims",type:"u32",length:N.length},{name:"pads",type:"i32",length:q.length},{name:"input_channels_per_group_int",type:"u32"},{name:"input_channels_per_group",type:"u32"},{name:"output_channels_per_group",type:"u32"}],ue=Be(e[0].dataType),ge=c?1:2,re=c?2:3,Se=c?3:1,fe=F("W",e[1].dataType,e[1].dims.length,T),ie=F("Dy",e[0].dataType,e[0].dims.length,_),ve=[ie,fe];n&&ve.push(F("bias",e[2].dataType,[u[Se]].length,C));let le=Y("result",e[0].dataType,u.length,C),me=()=>{let he="";if(w)_===4?he+=` + let xValue = ${ie.getByOffset("x_offset")}; + let wValue = ${fe.getByOffset("w_offset")}; + dotProd = dotProd + dot(xValue, wValue); + x_offset += 1u; + w_offset += 1u;`:_===2?he+=` + dotProd = dotProd + dot(vec4<${ue}>(${ie.getByOffset("x_offset")}, ${ie.getByOffset("x_offset + 1u")}), vec4<${ue}>(${fe.getByOffset("w_offset")}, ${fe.getByOffset("w_offset + 1u")})); + x_offset += 2u; + w_offset += 2u;`:_===1&&(he+=` + dotProd = dotProd + dot(vec4<${ue}>(${ie.getByOffset("x_offset")}, ${ie.getByOffset("x_offset + 1u")}, ${ie.getByOffset("x_offset + 2u")}, ${ie.getByOffset("x_offset + 3u")}), vec4<${ue}>(${fe.getByOffset("w_offset")}, ${fe.getByOffset("w_offset + 1u")}, ${fe.getByOffset("w_offset + 2u")}, ${fe.getByOffset("w_offset + 3u")})); + x_offset += 4u; + w_offset += 4u;`);else if(he+=` + let xValue = ${c?ie.getByOffset(`${ie.indicesToOffset(`${ie.type.indices}(batch, idyR, idyC, inputChannel)`)} / ${_}`):ie.get("batch","inputChannel","idyR","idyC")}; + `,_===1)he+=` + let w_offset = ${fe.indicesToOffset(`${fe.type.indices}(u32(wRPerm), u32(wCPerm), inputChannel, wOutChannel)`)}; + let wValue = ${fe.getByOffset(`w_offset / ${T}`)}; + dotProd = dotProd + xValue * wValue;`;else for(let R=0;R<_;R++)he+=` + let wValue${R} = ${fe.getByOffset(`${fe.indicesToOffset(`${fe.type.indices}(u32(wRPerm), u32(wCPerm), inputChannel + ${R}, wOutChannel)`)} / ${T}`)}; + dotProd = dotProd + xValue[${R}] * wValue${R};`;return he},ke=()=>{if(S===0)return"";if(!w)throw new Error(`packInputAs4 ${w} is not true.`);let he="";if(_===1){he+="dotProd = dotProd";for(let R=0;R(i32(r), i32(c)) - uniforms.pads; + let dyRCorner = dyCorner.x; + let dyCCorner = dyCorner.y; + let groupId = d1 / uniforms.output_channels_per_group; + let wOutChannel = d1 - groupId * uniforms.output_channels_per_group; + // Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1). + // ? = to be determined. : = across all values in that axis. + var dotProd = ${le.type.value}(0.0); + var wR: u32 = 0; + if (uniforms.dilations.x == 1) { + // Minimum wR >= 0 that satisfies (dyRCorner + wR) % (uniforms.strides.x) == 0 + wR = u32(((dyRCorner + i32(uniforms.strides.x) - 1) / i32(uniforms.strides.x)) * i32(uniforms.strides.x) - dyRCorner); + } + for (; wR < uniforms.effective_filter_dims.x; wR = wR + 1) { + if (wR % uniforms.dilations.x != 0) { + continue; + } + let dyR = (${ue}(dyRCorner) + ${ue}(wR)) / ${ue}(uniforms.strides[0]); + let wRPerm = uniforms.filter_dims.x - 1 - wR / uniforms.dilations.x; + if (dyR < 0.0 || dyR >= ${ue}(uniforms.Dy_shape[${ge}]) || fract(dyR) > 0.0 || + wRPerm < 0) { + continue; + } + let idyR: u32 = u32(dyR); + var wC: u32 = 0; + if (uniforms.dilations.y == 1) { + // Minimum wC >= 0 that satisfies (dyCCorner + wC) % (uniforms.strides.y) == 0 + wC = u32(((dyCCorner + i32(uniforms.strides.y) - 1) / i32(uniforms.strides.y)) * i32(uniforms.strides.y) - dyCCorner); + } + for (; wC < uniforms.effective_filter_dims.y; wC = wC + 1) { + if (wC % uniforms.dilations.y != 0) { + continue; + } + let dyC = (${ue}(dyCCorner) + ${ue}(wC)) / ${ue}(uniforms.strides.y); + let wCPerm = uniforms.filter_dims.y - 1 - wC / uniforms.dilations.y; + if (dyC < 0.0 || dyC >= ${ue}(uniforms.Dy_shape[${re}]) || + fract(dyC) > 0.0 || wCPerm < 0) { + continue; + } + let idyC: u32 = u32(dyC); + var inputChannel = groupId * uniforms.input_channels_per_group; + ${w?` + var x_offset = ${ie.indicesToOffset(`${ie.type.indices}(batch, idyR, idyC, inputChannel)`)} / ${_}; + var w_offset = ${fe.indicesToOffset(`${fe.type.indices}(wRPerm, wCPerm, inputChannel, wOutChannel)`)} / ${T}; + `:""} + for (var d2: u32 = 0; d2 < uniforms.input_channels_per_group_int; d2 = d2 + ${w?4:_}) { + ${me()} + inputChannel = inputChannel + ${w?4:_}; + } + ${ke()} + wC = wC + uniforms.strides.y - 1; + } + wR = wR + uniforms.strides[0] - 1; + } + let value = dotProd${n?` + bias[d1 / ${C}]`:""}; + ${le.setByOffset("global_idx","value")}; + `;return` + ${ne.registerUniforms(se).declareVariables(...ve,le)} + ${ne.mainStart()} + ${ne.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}; + ${je}}`};return{name:"ConvTranspose2D",shaderCache:{hint:`${t.cacheKey};${_}${T}${C}${w}${S}`,inputDependencies:A},getRunData:()=>({dispatchGroup:{x:k[0],y:k[1],z:k[2]},outputs:[{dims:o?o(u):u,dataType:e[0].dataType}],programUniforms:K}),getShaderSource:Q}}});var ex,tx,rx,Lf,Gf,nx,Hf,ix,Ff,qf=X(()=>{"use strict";Wf();Dt();Tt();ex=(e,t,o,n,u,c)=>(e-1)*t+o+(n-1)*u+1-c,tx=(e,t,o,n,u)=>{let c=Math.floor(e/2);t==="SAME_UPPER"?(o[n]=c,o[u]=e-c):t==="SAME_LOWER"&&(o[n]=e-c,o[u]=c)},rx=(e,t,o,n,u,c,p,m,g,b)=>{let _=e.length-2,w=b.length===0;g.length<_&&g.push(...Array(_-g.length).fill(0));let x=e[0],S=t[m?3:1]*u;for(let C=0,T=e.length-_-(m?1:0);C<_;++C,++T){let z=e[T],k=w?z*p[C]:b[C],A=ex(z,p[C],c[C],t[T],o[C],k);tx(A,n,c,C,C+_),w&&b.push(p[C]*(z-1)+g[C]+(t[T]-1)*o[C]+1-c[C]-c[C+_])}b.splice(0,0,x),b.splice(m?3:1,0,S)},Lf=(e,t)=>{let o=e.kernelShape.slice();if(e.kernelShape.length===0||e.kernelShape.reduce((w,x)=>w*x,1)===0){o.length=0;for(let w=2;ww+x,0)===0){let w=t[0].dims.length-2;g=new Array(w).fill(1)}let b=e.strides.slice();if(b.reduce((w,x)=>w+x,0)===0){let w=t[0].dims.length-2;b=new Array(w).fill(1)}rx(m,o,g,e.autoPad,e.group,u,b,n,p,c);let _=Object.assign({},e);return Object.assign(_,{kernelShape:o,pads:u,outputPadding:p,outputShape:c,dilations:g,strides:b}),_},Gf=e=>{let t=gn(e),o=e.format,n=["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][typeof e.autoPad>"u"?0:e.autoPad],u=e.dilations,c=e.group,p=e.kernelShape,m=e.pads,g=e.strides,b=e.wIsConst(),_=e.outputPadding,w=e.outputShape;return{autoPad:n,format:o,dilations:u,group:c,kernelShape:p,outputPadding:_,outputShape:w,pads:m,strides:g,wIsConst:b,...t,cacheKey:`${e.format};${t.activation};`}},nx=(e,t)=>{if(!e||e.length!==2&&e.length!==3)throw new Error("Conv requires 2 or 3 inputs");if(e[0].dims.length!==4&&e[0].dims.length!==3)throw new Error("currently only support 2-dimensional conv");if(e[0].dims.length!==e[1].dims.length)throw new Error("filter does not have same dimension as input");let o=e[0].dims[t.format==="NHWC"?e[0].dims.length-1:1],n=e[1].dims[0];if(o!==n)throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");let u=e[1].dims[1]*t.group;if(e.length===3&&(e[2].dims.length!==1||e[2].dims[0]!==u))throw new Error("invalid bias");let c=e[0].dims.length-2;if(t.dilations.reduce((_,w)=>_+w,0)>0&&t.dilations.length!==c)throw new Error(`dilations should be ${c}D`);if(t.strides.reduce((_,w)=>_+w,0)>0&&t.strides.length!==c)throw new Error(`strides should be ${c}D`);if(t.pads.reduce((_,w)=>_+w,0)>0&&t.pads.length!==c*2)throw new Error(`pads should be ${c*2}D`);if(t.outputPadding.length!==c&&t.outputPadding.length!==0)throw new Error(`output_padding should be ${c}D`);if(t.kernelShape.reduce((_,w)=>_+w,0)>0&&t.kernelShape.length!==0&&t.kernelShape.length!==e[1].dims.length-2)throw new Error("invalid kernel shape");if(t.outputShape.length!==0&&t.outputShape.length!==e[0].dims.length-2)throw new Error("invalid output shape")},Hf=(e,t,o,n)=>{let u=e.kernelCustomData.wT??e.compute(Fe(t[1],[2,3,0,1]),{inputs:[1],outputs:[o.wIsConst?-2:-1]})[0];o.wIsConst&&!e.kernelCustomData.wT&&(e.kernelCustomData.wT=u);let c=[t[0],u];t.length===3&&c.push(t[2]),e.compute(Vf(c,o,n),{inputs:c})},ix=(e,t)=>{let o=t.format==="NHWC",n=[e.inputs[0].reshape(o?[e.inputs[0].dims[0],1,e.inputs[0].dims[1],e.inputs[0].dims[2]]:[e.inputs[0].dims[0],e.inputs[0].dims[1],1,e.inputs[0].dims[2]]),e.inputs[1].reshape([e.inputs[1].dims[0],e.inputs[1].dims[1],1,e.inputs[1].dims[2]])];e.inputs.length===3&&n.push(e.inputs[2]);let u=t.kernelShape;(u.length===0||u[0]===0)&&(u=[e.inputs[1].dims[2]]);let c=t.dilations;(c.length===0||c[0]===0)&&(c=[1]);let p=t.strides;(p.length===0||p[0]===0)&&(p=[1]);let m=t.pads;m.length===0&&(m=[0,0]),m=[0,m[0],0,m[1]],p=[1].concat(p),c=[1].concat(c),u=[1].concat(u);let g=t.outputPadding;g=[0].concat(g);let b=Lf({...t,pads:m,strides:p,dilations:c,kernelShape:u,outputPadding:g},n);Hf(e,n,b,_=>o?[_[0],_[2],_[3]]:[_[0],_[1],_[3]])},Ff=(e,t)=>{if(nx(e.inputs,t),e.inputs[0].dims.length===3)ix(e,t);else{let o=Lf(t,e.inputs);Hf(e,e.inputs,o)}}});var ox,Kf,Jf,Zf=X(()=>{"use strict";ce();be();We();we();ox=(e,t,o,n)=>{let u=L.size(t),c=t.length,p=F("input",e,c),m=Y("output",e,c),g=o.dataType===6?o.getInt32Array()[0]:Number(o.getBigInt64Array()[0]),b=L.normalizeAxis(g,c),_=w=>{let x=` i32(${p.indicesGet("inputIndices","uniforms.axis")}) `,S=ae("uniforms.input_shape","uniforms.axis",c),C=n.reverse?x+(n.exclusive?" + 1":""):"0",T=n.reverse?S:x+(n.exclusive?"":" + 1");return` + ${w.registerUniform("outputSize","u32").registerUniform("axis","u32").declareVariables(p,m)} + ${w.mainStart()} + ${w.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + var inputIndices = ${m.offsetToIndices("global_idx")}; + var sum = ${m.type.value}(0); + let first : i32 = ${C}; + let last : i32 = ${T}; + for (var i : i32 = first; i < last; i++) { + ${p.indicesSet("inputIndices","uniforms.axis","u32(i)")}; + sum = sum + ${p.getByIndices("inputIndices")}; + } + ${m.setByOffset("global_idx","sum")}; + }`};return{name:"CumSum",shaderCache:{hint:n.cacheKey,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:t,dataType:e}],dispatchGroup:{x:Math.ceil(u/64)},programUniforms:[{type:12,data:u},{type:12,data:b},...te(t,t)]}),getShaderSource:_}},Kf=(e,t)=>{let o=e.inputs[0].dims,n=e.inputs[0].dataType,u=e.inputs[1];e.compute(ox(n,o,u,t),{inputs:[0]})},Jf=e=>{let t=e.exclusive===1,o=e.reverse===1;return pe({exclusive:t,reverse:o})}});var ax,sx,ux,Qf,Yf,Xf=X(()=>{"use strict";ce();be();We();we();ax=e=>{if(!e||e.length!==1)throw new Error("DepthToSpace requires 1 input.");if(e[0].dims.length!==4)throw new Error("DepthToSpace requires 4D input.")},sx=(e,t,o,n)=>{let u=[];u.push(`fn perm(i: ${n.type.indices}) -> ${o.type.indices} { + var a: ${o.type.indices};`);for(let c=0;c{let o,n,u,c,p,m,g=t.format==="NHWC",b=t.blocksize,_=t.mode==="DCR";g?([o,n,u,c]=e.dims,p=_?[o,n,u,b,b,c/b**2]:[o,n,u,c/b**2,b,b],m=_?[0,1,3,2,4,5]:[0,1,4,2,5,3]):([o,n,u,c]=[e.dims[0],e.dims[2],e.dims[3],e.dims[1]],p=_?[o,b,b,c/b**2,n,u]:[o,c/b**2,b,b,n,u],m=_?[0,3,4,1,5,2]:[0,1,4,2,5,3]);let w=e.reshape(p),x=w.dims.length,S=e.dataType,C=F("a",S,x),T=Y("output",S,x),z=k=>` + ${k.registerUniform("output_size","u32").declareVariables(C,T)} + + ${sx(m,x,C,T)} + + ${k.mainStart()} + ${k.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + + let indices = ${T.offsetToIndices("global_idx")}; + let aIndices = perm(indices); + + ${T.setByOffset("global_idx",C.getByIndices("aIndices"))} + }`;return{name:"DepthToSpace",shaderCache:{hint:`${e.dims};${t.blocksize};${t.mode}`,inputDependencies:["rank"]},getRunData:k=>{let A=g?[o,n*b,u*b,c/b**2]:[o,c/b**2,n*b,u*b],O=L.size(A),B=w.dims,W=L.sortBasedOnPerm(B,m);return{outputs:[{dims:A,dataType:k[0].dataType}],dispatchGroup:{x:Math.ceil(O/64)},programUniforms:[{type:12,data:O},...te(B,W)]}},getShaderSource:z}},Qf=(e,t)=>{ax(e.inputs),e.compute(ux(e.inputs[0],t))},Yf=e=>pe({blocksize:e.blocksize,mode:e.mode,format:e.format})});var Yi,xn,em,lx,dx,Xi,eo,tm,cx,rm,nm,im=X(()=>{"use strict";ce();be();We();we();Yi="[a-zA-Z]|\\.\\.\\.",xn="("+Yi+")+",em="^"+xn+"$",lx="("+xn+",)*"+xn,dx="^"+lx+"$",Xi=class{constructor(t=-1){this.symbolToIndices=new Map,this.inputIndex=t}addSymbol(t,o){let n=this.symbolToIndices.get(t);n===void 0?n=[o]:n.push(o),this.symbolToIndices.set(t,n)}},eo=class{constructor(t,o){this.equation=o;this.hasEllipsis=!1,this.symbolToInfo=new Map,this.lhs=new Array,this.outputDims=[];let[n,u]=o.includes("->")?o.split("->",2):[o,""];if(!n.match(RegExp(dx)))throw new Error("Invalid LHS term");if(n.split(",").forEach((m,g)=>{let b=t[g].dims.slice();if(!m.match(RegExp(em)))throw new Error("Invalid LHS term");let _=this.processTerm(m,!0,b,g);this.lhs.push(_)}),u==="")u+=[...this.symbolToInfo.entries()].filter(([m,g])=>g.count===1||m==="...").map(([m])=>m).join("");else if(!u.match(RegExp(xn)))throw new Error("Invalid RHS");u.match(RegExp(Yi,"g"))?.forEach(m=>{if(m==="...")this.outputDims=this.outputDims.concat(this.ellipsisDims);else{let g=this.symbolToInfo.get(m);if(g===void 0)throw new Error("Invalid RHS symbol");this.outputDims.push(g.dimValue)}}),this.rhs=this.processTerm(u,!1,this.outputDims)}addSymbol(t,o,n){let u=this.symbolToInfo.get(t);if(u!==void 0){if(u.dimValue!==o&&u.count!==1)throw new Error("Dimension mismatch");u.count++,u.inputIndices.push(n)}else u={count:1,dimValue:o,inputIndices:[n]};this.symbolToInfo.set(t,u)}processTerm(t,o,n,u=-1){let c=n.length,p=!1,m=[],g=0;if(!t.match(RegExp(em))&&!o&&t!=="")throw new Error("Invalid LHS term");let b=t.match(RegExp(Yi,"g")),_=new Xi(u);return b?.forEach((w,x)=>{if(w==="..."){if(p)throw new Error("Only one ellipsis is allowed per input term");p=!0;let S=c-b.length+1;if(S<0)throw new Error("Ellipsis out of bounds");if(m=n.slice(g,g+S),this.hasEllipsis){if(this.ellipsisDims.length!==m.length||this.ellipsisDims.toString()!==m.toString())throw new Error("Ellipsis dimensions mismatch")}else if(o)this.hasEllipsis=!0,this.ellipsisDims=m;else throw new Error("Ellipsis must be specified in the LHS");for(let C=0;Ce+"_max",cx=(e,t,o,n)=>{let c=e.map(_=>_.length).map((_,w)=>F(`input${w}`,t,_)),p=L.size(n),m=Y("output",t,n.length),g=[...o.symbolToInfo.keys()].filter(_=>!o.rhs.symbolToIndices.has(_)),b=_=>{let w=[],x="var prod = 1.0;",S="var sum = 0.0;",C="sum += prod;",T=[],z=[],k=[],A=[],O=o.symbolToInfo.size===o.rhs.symbolToIndices.size;o.symbolToInfo.forEach((W,N)=>{if(o.rhs.symbolToIndices.has(N)){let q=o.rhs.symbolToIndices.get(N)?.[0];q!==void 0&&o.lhs.forEach((K,Q)=>{if(W.inputIndices.includes(Q)){let ne=K.symbolToIndices.get(N);if(ne===void 0)throw new Error("Invalid symbol error");ne.forEach(se=>{w.push(`${c[Q].indicesSet(`input${Q}Indices`,se,m.indicesGet("outputIndices",q))}`)})}})}else o.lhs.forEach((q,K)=>{if(W.inputIndices.includes(K)){let Q=q.symbolToIndices.get(N);if(Q===void 0)throw new Error("Invalid symbol error");Q.forEach(ne=>{T.push(`${c[K].indicesSet(`input${K}Indices`,ne,`${N}`)}`)}),A.push(`prod *= ${c[K].getByIndices(`input${K}Indices`)};`)}}),z.push(`for(var ${N}: u32 = 0; ${N} < uniforms.${tm(N)}; ${N}++) {`),k.push("}")});let B=O?[...w,`let sum = ${c.map((W,N)=>W.getByIndices(`input${N}Indices`)).join(" * ")};`]:[...w,S,...z,...T,x,...A,C,...k];return` + ${_.registerUniforms(g.map(W=>({name:`${tm(W)}`,type:"u32"}))).registerUniform("outputSize","u32").declareVariables(...c,m)} + + ${_.mainStart()} + ${_.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + var outputIndices = ${m.offsetToIndices("global_idx")}; + ${c.map((W,N)=>`var input${N}Indices: ${c[N].type.indices};`).join(` +`)} + ${B.join(` +`)}; + ${m.setByOffset("global_idx","sum")}; + }`};return{name:"Einsum",shaderCache:{hint:o.equation,inputDependencies:e.map(()=>"rank")},getRunData:()=>{let _=g.filter(x=>o.symbolToInfo.has(x)).map(x=>({type:12,data:o.symbolToInfo.get(x)?.dimValue||0}));_.push({type:12,data:p});let w=e.map((x,S)=>[...te(x)]).reduce((x,S)=>x.concat(S),_);return w.push(...te(n)),{outputs:[{dims:n,dataType:t}],dispatchGroup:{x:Math.ceil(p/64)},programUniforms:w}},getShaderSource:b}},rm=(e,t)=>{let o=new eo(e.inputs,t.equation),n=o.outputDims,u=e.inputs.map((c,p)=>c.dims);e.compute(cx(u,e.inputs[0].dataType,o,n))},nm=e=>{let t=e.equation.replace(/\s+/g,"");return pe({equation:t})}});var px,om,fx,mx,am,sm=X(()=>{"use strict";ce();be();we();px=e=>{if(!e||e.length!==2)throw new Error("Expand requires 2 input.");let t=e[0].dims,o=Array.from(e[1].getBigInt64Array(),Number),n=o.length{let o=e.length-t.length,n=[];for(let u=0;ue.length>t.length?om(e,t):om(t,e),mx=e=>{let t=e[0].dims,o=Array.from(e[1].getBigInt64Array(),Number),n=fx(t,o),u=e[0].dataType,c=u===9||L.size(t)===1,p=u===9||t.length>0&&t[t.length-1]%4===0?4:1,m=c||n.length>0&&n[n.length-1]%4===0?4:1,g=Math.ceil(L.size(n)/m),b=w=>{let x=F("input",u,t.length,p),S=Y("output",u,n.length,m),C;if(u===9){let T=(z,k,A="")=>` + let outputIndices${k} = ${S.offsetToIndices(`outputOffset + ${k}u`)}; + let offset${k} = ${x.broadcastedIndicesToOffset(`outputIndices${k}`,S)}; + let index${k} = offset${k} / 4u; + let component${k} = offset${k} % 4u; + ${z}[${k}] = ${A}(${x.getByOffset(`index${k}`)}[component${k}]); + `;C=` + let outputOffset = global_idx * ${m}; + var data = vec4(0); + ${T("data",0,"u32")} + ${T("data",1,"u32")} + ${T("data",2,"u32")} + ${T("data",3,"u32")} + ${S.setByOffset("global_idx","data")} + }`}else C=` + let outputIndices = ${S.offsetToIndices(`global_idx * ${m}`)}; + let inputOffset = ${x.broadcastedIndicesToOffset("outputIndices",S)}; + let data = ${S.type.value}(${x.getByOffset(`inputOffset / ${p}`)}); + ${S.setByOffset("global_idx","data")} + }`;return` + ${w.registerUniform("vec_size","u32").declareVariables(x,S)} + ${w.mainStart()} + ${w.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")} + ${C}`},_=[{type:12,data:g},...te(t,n)];return{name:"Expand",shaderCache:{hint:`${n.length};${p}${m}`,inputDependencies:["rank"]},getShaderSource:b,getRunData:()=>({outputs:[{dims:n,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(g/64)},programUniforms:_})}},am=e=>{px(e.inputs),e.compute(mx(e.inputs),{inputs:[0]})}});var hx,um,lm=X(()=>{"use strict";ce();be();we();hn();hx=e=>{let t=e[0].dataType,o=L.size(e[0].dims),n=L.size(e[1].dims),u=n%4===0,c=p=>{let m=F("x",t,[1],4),g=F("bias",t,[1],4),b=Y("y",t,[1],4),_=[{name:"output_vec_size",type:"u32"},{name:"bias_size",type:"u32"}],w=S=>` + let bias${S}_offset: u32 = (global_idx * 4 + ${S}) % uniforms.bias_size; + let bias${S} = ${g.getByOffset(`bias${S}_offset / 4`)}[bias${S}_offset % 4];`,x=u?` + let bias = ${g.getByOffset("global_idx % (uniforms.bias_size / 4)")};`:`${w(0)}${w(1)}${w(2)}${w(3)} + let bias = ${m.type.value}(bias0, bias1, bias2, bias3);`;return`${p.registerUniforms(_).declareVariables(m,g,b)} + + ${Gi(He(t))} + + ${p.mainStart(Ft)} + ${p.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_vec_size")} + + let x = ${m.getByOffset("global_idx")}; + ${x} + let x_in = x + bias; + ${b.setByOffset("global_idx",Hi("x_in"))} + }`};return{name:"FastGeluWithBias",shaderCache:{hint:`${u}`,inputDependencies:["type","type"]},getShaderSource:c,getRunData:p=>({outputs:[{dims:p[0].dims,dataType:p[0].dataType}],programUniforms:[{type:12,data:Math.ceil(o/4)},{type:12,data:n}],dispatchGroup:{x:Math.ceil(o/Ft/4)}})}},um=e=>{e.inputs.length<2||L.size(e.inputs[1].dims)===0?nf(e):e.compute(hx(e.inputs))}});var gx,yx,dm,cm,pm=X(()=>{"use strict";ce();be();We();we();gx=e=>{if(!e||e.length!==2)throw new Error("Gather requires 2 inputs.")},yx=(e,t)=>{let o=e[0].dims,n=e[1].dims,u=o.length,c=L.normalizeAxis(t.axis,u),p=o.slice(0);p.splice(c,1,...n);let m=o[c],g=e[0].dataType===9?4:1,b=Math.ceil(L.size(p)/g),_=[{type:12,data:b},{type:6,data:m},{type:12,data:c},...te(e[0].dims,e[1].dims,p)],w=x=>{let S=F("data",e[0].dataType,e[0].dims.length,g),C=F("inputIndices",e[1].dataType,e[1].dims.length),T=Y("output",e[0].dataType,p.length,g),z=A=>{let O=n.length,B=`var indicesIndices${A} = ${C.type.indices}(0);`;for(let W=0;W1?`indicesIndices${A}[${W}]`:`indicesIndices${A}`} = ${p.length>1?`outputIndices${A}[uniforms.axis + ${W}]`:`outputIndices${A}`};`;B+=` + var idx${A} = ${C.getByIndices(`indicesIndices${A}`)}; + if (idx${A} < 0) { + idx${A} = idx${A} + uniforms.axisDimLimit; + } + var dataIndices${A} : ${S.type.indices}; + `;for(let W=0,N=0;W1?`dataIndices${A}[${W}]`:`dataIndices${A}`} = u32(idx${A});`,N+=O):(B+=`${u>1?`dataIndices${A}[${W}]`:`dataIndices${A}`} = ${p.length>1?`outputIndices${A}[${N}]`:`outputIndices${A}`};`,N++);return B},k;if(e[0].dataType===9){let A=(O,B,W="")=>` + let outputIndices${B} = ${T.offsetToIndices(`outputOffset + ${B}u`)}; + ${z(B)}; + let offset${B} = ${S.indicesToOffset(`dataIndices${B}`)}; + let index${B} = offset${B} / 4u; + let component${B} = offset${B} % 4u; + ${O}[${B}] = ${W}(${S.getByOffset(`index${B}`)}[component${B}]); + `;k=` + let outputOffset = global_idx * ${g}; + var value = vec4(0); + ${A("value",0,"u32")} + ${A("value",1,"u32")} + ${A("value",2,"u32")} + ${A("value",3,"u32")} + ${T.setByOffset("global_idx","value")} + `}else k=` + let outputIndices = ${T.offsetToIndices("global_idx")}; + ${z("")}; + let value = ${S.getByIndices("dataIndices")}; + ${T.setByOffset("global_idx","value")}; + `;return` + ${x.registerUniform("outputSize","u32").registerUniform("axisDimLimit","i32").registerUniform("axis","u32").declareVariables(S,C,T)} + ${x.mainStart()} + ${x.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + ${k} + }`};return{name:"Gather",shaderCache:{hint:t.cacheKey,inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:p,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(b/64)},programUniforms:_}),getShaderSource:w}},dm=e=>pe({axis:e.axis}),cm=(e,t)=>{let o=e.inputs;gx(o),e.compute(yx(e.inputs,t))}});var bx,fm,mm,hm=X(()=>{"use strict";ce();be();we();bx=(e,t,o,n,u,c,p,m,g)=>{let b=[{type:12,data:c},{type:12,data:n},{type:12,data:u},{type:12,data:o},{type:12,data:p},{type:12,data:m},{type:12,data:g}],_=[c];b.push(...te(t.dims,_));let w=x=>{let S=F("indices_data",t.dataType,t.dims.length),C=Y("input_slice_offsets_data",12,1,1),T=[S,C],z=[{name:"output_size",type:"u32"},{name:"batch_dims",type:"u32"},{name:"input_dims",type:"u32",length:u.length},{name:"sizes_from_slice_dims_data",type:"u32",length:o.length},{name:"num_slices_per_batch",type:"u32"},{name:"input_batch_stride",type:"u32"},{name:"num_slice_dims",type:"u32"}];return` + ${x.registerUniforms(z).declareVariables(...T)} + ${x.mainStart()} + ${x.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let batch_idx = global_idx / uniforms.num_slices_per_batch; + let base_offset = batch_idx * uniforms.input_batch_stride; + + let slice_indices_base_offset = global_idx * uniforms.num_slice_dims; + var relative_slice_offset = 0; + for (var dim_idx = 0u; dim_idx < uniforms.num_slice_dims; dim_idx ++) { + var index = i32(indices_data[dim_idx + slice_indices_base_offset].x); + let input_dim_idx = uniforms.batch_dims + dim_idx; + if (index < 0) { + ${u.length===1?"index += i32(uniforms.input_dims);":"index += i32(uniforms.input_dims[input_dim_idx]);"} + } + ${o.length===1?"relative_slice_offset += index * i32(uniforms.sizes_from_slice_dims_data);":"relative_slice_offset += index * i32(uniforms.sizes_from_slice_dims_data[dim_idx]);"} + } + + input_slice_offsets_data[global_idx] = base_offset + u32(relative_slice_offset); + }`};return e.compute({name:"computeSliceOffsets",shaderCache:{hint:`${u.length}_${o.length}`,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:_,dataType:e.inputs[1].dataType}],dispatchGroup:{x:Math.ceil(c/64)},programUniforms:b}),getShaderSource:w},{inputs:[t],outputs:[-1]})[0]},fm=(e,t)=>{let o=e.inputs,n=o[0].dims,u=o[0].dataType,c=o[1].dims,p=c[c.length-1],m=L.sizeToDimension(c,c.length-1),g=L.sizeFromDimension(n,t.batchDims+p),b=L.sizeToDimension(n,t.batchDims),_=L.sizeFromDimension(n,t.batchDims),w=m/b,x=new Array(p),S=g;for(let B=0;Bn.length)throw new Error("last dimension of indices must not be larger than rank of input tensor");let z=c.slice(0,-1).concat(n.slice(T)),k=L.size(z),A=[{type:12,data:k},{type:12,data:g},...te(o[0].dims,C.dims,z)],O=B=>{let W=F("data",o[0].dataType,o[0].dims.length),N=F("slice_offsets",12,C.dims.length),q=Y("output",o[0].dataType,z.length);return` + ${B.registerUniform("output_size","u32").registerUniform("slice_size","u32").declareVariables(W,N,q)} + ${B.mainStart()} + ${B.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let slice_offset = slice_offsets[global_idx / uniforms.slice_size]; + output[global_idx] = data[u32(slice_offset) + global_idx % uniforms.slice_size]; + }`};e.compute({name:"GatherND",shaderCache:{hint:t.cacheKey,inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:z,dataType:u}],dispatchGroup:{x:Math.ceil(k/64)},programUniforms:A}),getShaderSource:O},{inputs:[o[0],C]})},mm=e=>({batchDims:e.batch_dims,cacheKey:""})});var _x,wx,gm,ym,bm=X(()=>{"use strict";ce();be();We();we();_x=(e,t)=>{if(e.length<3||e.length>4)throw new Error("GatherBlockQuantized requires 3 or 4 inputs.");let o=L.normalizeAxis(t.quantizeAxis,e[0].dims.length),n=t.blockSize,u=e[0],c=e[2],p=e.length===4?e[3]:void 0;if(c.dims.length!==u.dims.length||!u.dims.map((m,g)=>g===o?Math.ceil(m/n)===c.dims[g]:m===c.dims[g]).reduce((m,g)=>m&&g,!0))throw new Error("Scales must have the same rank as the input tensor and the dims should match except on gatherAxis.");if(p){if(p.dataType!==u.dataType)throw new Error("Zero point must have the same data type as the input tensor.");if(p.dims.length!==c.dims.length||!p.dims.map((m,g)=>m===c.dims[g]).reduce((m,g)=>m&&g,!0))throw new Error("Zero point must have the same rank as the input tensor and the dims should match except on quantizeAxis.")}},wx=(e,t)=>{let o=e[0].dims,n=e[1].dims,u=o.length,c=L.normalizeAxis(t.gatherAxis,u),p=L.normalizeAxis(t.quantizeAxis,u),m=o.slice(0);m.splice(c,1,...n);let g=L.size(m),b=e[2].dataType,w=e[0].dataType===22,x=[{type:12,data:g},{type:12,data:p},{type:12,data:c},{type:12,data:t.blockSize},...te(...e.map((C,T)=>C.dims),m)],S=C=>{let T=F("data",e[0].dataType,e[0].dims.length),z=F("inputIndices",e[1].dataType,e[1].dims.length),k=F("scales",e[2].dataType,e[2].dims.length),A=e.length>3?F("zeroPoint",e[3].dataType,e[3].dims.length):void 0,O=Y("output",b,m.length),B=[T,z,k];A&&B.push(A);let W=[{name:"output_size",type:"u32"},{name:"quantize_axis",type:"u32"},{name:"gather_axis",type:"u32"},{name:"block_size",type:"u32"}];return` + ${C.registerUniforms(W).declareVariables(...B,O)} + ${C.mainStart()} + let output_indices = ${O.offsetToIndices("global_idx")}; + var indices_indices = ${z.type.indices}(0); + ${n.length>1?` + for (var i: u32 = 0; i < ${n.length}; i++) { + let index = ${O.indicesGet("output_indices","uniforms.gather_axis + i")}; + ${z.indicesSet("indices_indices","i","index")}; + }`:`indices_indices = ${O.indicesGet("output_indices","uniforms.gather_axis")};`}; + var data_indices = ${T.type.indices}(0); + for (var i: u32 = 0; i < uniforms.gather_axis; i++) { + let index = ${O.indicesGet("output_indices","i")}; + ${T.indicesSet("data_indices","i","index")}; + } + var index_from_indices = ${z.getByIndices("indices_indices")}; + if (index_from_indices < 0) { + index_from_indices += ${o[c]}; + } + ${T.indicesSet("data_indices","uniforms.gather_axis","u32(index_from_indices)")}; + for (var i = uniforms.gather_axis + 1; i < ${m.length}; i++) { + let index = ${O.indicesGet("output_indices",`i + ${n.length} - 1`)}; + ${T.indicesSet("data_indices","i","index")}; + } + let data_offset = ${T.indicesToOffset("data_indices")}; + let data_index = data_offset % 8; + // Convert 4-bit packed data to 8-bit packed data. + let packed_4bit_quantized_data = ${T.getByOffset("data_offset / 8")}; + let packed_8bit_quantized_data = (packed_4bit_quantized_data >> (4 * (data_index % 2))) & 0x0f0f0f0f; + let quantized_data_vec = ${w?"unpack4xI8":"unpack4xU8"}(u32(packed_8bit_quantized_data)); + let quantized_data = quantized_data_vec[data_index / 2]; + var scale_indices = data_indices; + let quantize_axis_index = ${k.indicesGet("data_indices","uniforms.quantize_axis")} / uniforms.block_size; + ${k.indicesSet("scale_indices","uniforms.quantize_axis","quantize_axis_index")}; + var scale = ${k.getByIndices("scale_indices")}; + ${A?` + let zero_point_indices = scale_indices; + let zero_point_offset = ${A.indicesToOffset("zero_point_indices")}; + let zero_point_index = zero_point_offset % 8; + let packed_4bit_zero_points = ${A.getByOffset("zero_point_offset / 8")}; + let packed_8bit_zero_points = (packed_4bit_zero_points >> (4 * (zero_point_index % 2))) & 0x0f0f0f0f; + let zero_point_vec = ${w?"unpack4xI8":"unpack4xU8"}(u32(packed_8bit_zero_points)); + let zero_point = zero_point_vec[zero_point_index / 2];`:"var zero_point = 0"}; + let dequantized_data = ${He(b)}(quantized_data - zero_point) * scale; + ${O.setByOffset("global_idx","dequantized_data")}; + }`};return{name:"GatherBlockQuantized",shaderCache:{hint:`${t.cacheKey};${e.filter((C,T)=>T!==1).map(C=>C.dims.join("_")).join(";")}`,inputDependencies:Array.from({length:e.length},(C,T)=>"rank")},getRunData:()=>({outputs:[{dims:m,dataType:b}],dispatchGroup:{x:Math.ceil(g/64)},programUniforms:x}),getShaderSource:S}},gm=(e,t)=>{let o=e.inputs;_x(o,t),e.compute(wx(e.inputs,t))},ym=e=>pe({blockSize:e.blockSize,gatherAxis:e.gatherAxis,quantizeAxis:e.quantizeAxis})});var vx,xx,_m,wm,vm=X(()=>{"use strict";ce();be();We();we();vx=e=>{if(!e||e.length!==2)throw new Error("GatherElements requires 2 inputs.");if(e[0].dims.length<1)throw new Error("GatherElements requires that the data input be rank >= 1.");if(e[0].dims.length!==e[1].dims.length)throw new Error(`GatherElements requires that the data input and + indices input tensors be of same rank.`)},xx=(e,t)=>{let o=e[0].dims,n=e[0].dataType,u=o.length,c=e[1].dims,p=e[1].dataType,m=L.normalizeAxis(t.axis,u),g=o[m],b=c.slice(0),_=L.size(b),w=F("input",n,u),x=F("indicesInput",p,c.length),S=Y("output",n,b.length),C=[{type:12,data:_},{type:6,data:g},{type:12,data:m}];return C.push(...te(o,c,b)),{name:"GatherElements",shaderCache:{inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:b,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(_/64)},programUniforms:C}),getShaderSource:k=>` + ${k.registerUniform("outputSize","u32").registerUniform("axisDimLimit","i32").registerUniform("axis","u32").declareVariables(w,x,S)} + ${k.mainStart()} + ${k.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + + let outputIndices = ${S.offsetToIndices("global_idx")}; + + var idx = ${x.getByOffset("global_idx")}; + if (idx < 0) { + idx = idx + uniforms.axisDimLimit; + } + var inputIndices = ${w.type.indices}(outputIndices); + ${w.indicesSet("inputIndices","uniforms.axis","u32(idx)")}; + let value = ${w.getByIndices("inputIndices")}; + + ${S.setByOffset("global_idx","value")}; + }`}},_m=e=>pe({axis:e.axis}),wm=(e,t)=>{let o=e.inputs;vx(o),e.compute(xx(e.inputs,t))}});var $x,Cx,xm,$m,Cm=X(()=>{"use strict";ce();be();we();$x=e=>{if(!e)throw new Error("Input is missing");if(e.length<2||e.length>3)throw new Error("Invaid input number.");if(e.length===3&&e[2].dims.length>2)throw new Error("Invalid input shape of C");if(e[0].dataType!==e[1].dataType||e.length===3&&e[0].dataType!==e[2].dataType)throw new Error("Input types are mismatched")},Cx=(e,t)=>{let o=e[0].dims.slice(),n=e[1].dims.slice(),[u,c,p]=tn.getShapeOfGemmResult(o,t.transA,n,t.transB,e.length===3?e[2].dims:void 0),m=[u,c];if(!m)throw new Error("Can't use gemm on the given tensors");let g=16,b=Math.ceil(c/g),_=Math.ceil(u/g),w=!0,x=L.size(m),S=[{type:12,data:w?b:x},{type:12,data:u},{type:12,data:c},{type:12,data:p},{type:1,data:t.alpha},{type:1,data:t.beta}],C=["type","type"];e.length===3&&(S.push(...te(e[2].dims)),C.push("rank")),S.push(...te(m));let T=k=>{let A="";t.transA&&t.transB?A="value += a[k * uniforms.M + m] * b[n * uniforms.K + k];":t.transA&&!t.transB?A="value += a[k * uniforms.M + m] * b[k * uniforms.N + n];":!t.transA&&t.transB?A="value += a[m * uniforms.K + k] * b[n * uniforms.K + k];":!t.transA&&!t.transB&&(A="value += a[m * uniforms.K + k] * b[k * uniforms.N + n];");let O=t.alpha===1?"":"value *= uniforms.alpha;",B=F("a",e[0].dataType,e[0].dims),W=F("b",e[1].dataType,e[1].dims),N=B.type.value,q=null,K=[B,W];e.length===3&&(q=F("c",e[2].dataType,e[2].dims.length),K.push(q));let Q=Y("output",e[0].dataType,m.length);K.push(Q);let ne=[{name:"output_size",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"},{name:"alpha",type:"f32"},{name:"beta",type:"f32"}];return` + ${k.registerUniforms(ne).declareVariables(...K)} + + ${k.mainStart()} + ${k.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + + let m = global_idx / uniforms.N; + let n = global_idx % uniforms.N; + + var value = ${N}(0); + for (var k: u32 = 0u; k < uniforms.K; k++) { + ${A} + } + + ${O} + ${q!=null?`let cOffset = ${q.broadcastedIndicesToOffset("vec2(m, n)",Q)}; value += ${N}(uniforms.beta) * ${q.getByOffset("cOffset")};`:""} + output[global_idx] = value; + }`},z=k=>{let A=F("a",e[0].dataType,e[0].dims),O=F("b",e[1].dataType,e[1].dims),B=null,W=[A,O];e.length===3&&(B=F("c",e[2].dataType,e[2].dims.length),W.push(B));let N=Y("output",e[0].dataType,m.length);W.push(N);let q=[{name:"num_tile_n",type:"u32"},{name:"M",type:"u32"},{name:"N",type:"u32"},{name:"K",type:"u32"},{name:"alpha",type:"f32"},{name:"beta",type:"f32"}],K="",Q="";t.transA&&t.transB?(Q=` + var col = tile_row_start + local_id.x; + var row = k_start + local_id.y; + if (col < uniforms.M && row < uniforms.K) { + tile_a[local_id.y][local_id.x] = a[row * uniforms.M + col]; + } else { + tile_a[local_id.y][local_id.x] = ${A.type.value}(0); + } + + col = k_start + local_id.x; + row = tile_col_start + local_id.y; + if (col < uniforms.K && row < uniforms.N) { + tile_b[local_id.y][local_id.x] = b[row * uniforms.K + col]; + } else { + tile_b[local_id.y][local_id.x] = ${O.type.value}(0); + } + `,K="value += tile_a[k][local_id.y] * tile_b[local_id.x][k];"):t.transA&&!t.transB?(Q=` + var col = tile_row_start + local_id.x; + var row = k_start + local_id.y; + if (col < uniforms.M && row < uniforms.K) { + tile_a[local_id.y][local_id.x] = a[row * uniforms.M + col]; + } else { + tile_a[local_id.y][local_id.x] = ${A.type.value}(0); + } + + col = tile_col_start + local_id.x; + row = k_start + local_id.y; + if (col < uniforms.N && row < uniforms.K) { + tile_b[local_id.y][local_id.x] = b[row * uniforms.N + col]; + } else { + tile_b[local_id.y][local_id.x] = ${O.type.value}(0); + } + `,K="value += tile_a[k][local_id.y] * tile_b[k][local_id.x];"):!t.transA&&t.transB?(Q=` + var col = k_start + local_id.x; + var row = tile_row_start + local_id.y; + if (col < uniforms.K && row < uniforms.M) { + tile_a[local_id.y][local_id.x] = a[row * uniforms.K + col]; + } else { + tile_a[local_id.y][local_id.x] = ${A.type.value}(0); + } + + col = k_start + local_id.x; + row = tile_col_start + local_id.y; + if (col < uniforms.K && row < uniforms.N) { + tile_b[local_id.y][local_id.x] = b[row * uniforms.K + col]; + } else { + tile_b[local_id.y][local_id.x] = ${O.type.value}(0); + } + `,K="value += tile_a[local_id.y][k] * tile_b[local_id.x][k];"):!t.transA&&!t.transB&&(Q=` + var col = k_start + local_id.x; + var row = tile_row_start + local_id.y; + if (col < uniforms.K && row < uniforms.M) { + tile_a[local_id.y][local_id.x] = a[row * uniforms.K + col]; + } else { + tile_a[local_id.y][local_id.x] = ${A.type.value}(0); + } + + col = tile_col_start + local_id.x; + row = k_start + local_id.y; + if (col < uniforms.N && row < uniforms.K) { + tile_b[local_id.y][local_id.x] = b[row * uniforms.N + col]; + } else { + tile_b[local_id.y][local_id.x] = ${O.type.value}(0); + } + `,K="value += tile_a[local_id.y][k] * tile_b[k][local_id.x];");let ne=t.alpha===1?"":"value *= uniforms.alpha;";return` + ${k.registerUniforms(q).declareVariables(...W)} + var tile_a: array, ${g}>; + var tile_b: array, ${g}>; + ${k.mainStart([g,g,1])} + let tile_col_start = (workgroup_index % uniforms.num_tile_n) * ${g}; + let tile_row_start = (workgroup_index / uniforms.num_tile_n) * ${g}; + let num_tiles = (uniforms.K - 1) / ${g} + 1; + var k_start = 0u; + var value = ${N.type.value}(0); + for (var t: u32 = 0u; t < num_tiles; t++) { + ${Q} + k_start = k_start + ${g}; + workgroupBarrier(); + + for (var k: u32 = 0u; k < ${g}; k++) { + ${K} + } + workgroupBarrier(); + } + + ${ne} + let m = tile_row_start + local_id.y; + let n = tile_col_start + local_id.x; + ${B!=null?`let cOffset = ${B.broadcastedIndicesToOffset("vec2(m, n)",N)}; value += ${N.type.value}(uniforms.beta) * ${B.getByOffset("cOffset")};`:""} + if (m < uniforms.M && n < uniforms.N) { + output[m * uniforms.N + n] = value; + } + }`};return w?{name:"GemmShared",shaderCache:{hint:`${t.cacheKey}`,inputDependencies:C},getRunData:()=>({outputs:[{dims:m,dataType:e[0].dataType}],dispatchGroup:{x:b*_},programUniforms:S}),getShaderSource:z}:{name:"Gemm",shaderCache:{hint:`${t.cacheKey}`,inputDependencies:C},getRunData:()=>({outputs:[{dims:m,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(x/64)},programUniforms:S}),getShaderSource:T}},xm=e=>{let t=e.transA,o=e.transB,n=e.alpha,u=e.beta;return{transA:t,transB:o,alpha:n,beta:u,cacheKey:`${e.transA};${e.transB};${e.alpha===1}`}},$m=(e,t)=>{$x(e.inputs),e.compute(Cx(e.inputs,t))}});var It,jt,rr,nr,Sx,Tx,Ix,Ax,kx,Ex,Px,Ox,Sm,Tm,Im=X(()=>{"use strict";ce();be();We();we();[It,jt,rr,nr]=[0,1,2,3],Sx=e=>{if(e[0].dims.length!==4)throw new Error("only 4-D tensor is supported.");if(e[0].dims.length!==e[1].dims.length)throw new Error("input dimensions must be equal to grid dimensions");if(e[0].dims.length-2!==e[1].dims[e[1].dims.length-1])throw new Error(`last dimension of grid must be equal to ${e[0].dims.length-2}`);if(e[0].dims[0]!==e[1].dims[0])throw new Error("grid batch size must match input batch size")},Tx=` + fn gs_get_cubic_coeffs(x: f32) -> vec4 { + let cubic_alpha = -0.75f; + let x_abs = abs(x); + var coeffs: vec4; + coeffs[0] = (((cubic_alpha * (x_abs + 1) - 5 * cubic_alpha) * (x_abs + 1) + 8 * cubic_alpha) * (x_abs + 1) - 4 * cubic_alpha); + coeffs[1] = (((cubic_alpha + 2) * x_abs - (cubic_alpha + 3)) * x_abs * x_abs + 1); + coeffs[2] = (((cubic_alpha + 2) * (1 - x_abs) - (cubic_alpha + 3)) * (1 - x_abs) * (1 - x_abs) + 1); + coeffs[3] = (((cubic_alpha * (2 - x_abs) - 5 * cubic_alpha) * (2 - x_abs) + 8 * cubic_alpha) * (2 - x_abs) - 4 * cubic_alpha); + return coeffs; + } +`,Ix=e=>` + fn gs_bicubic_interpolate(p: mat4x4<${e}>, x: f32, y: f32) -> ${e} { + var v: vec4; + var coeffs = gs_get_cubic_coeffs(x); + for (var i = 0; i < 4; i++) { + v[i] = coeffs[0] * p[i][0] + coeffs[1] * p[i][1] + coeffs[2] * p[i][2] + coeffs[3] * p[i][3]; + } + coeffs = gs_get_cubic_coeffs(y); + let pixel = ${e}(coeffs[0] * v[0] + coeffs[1] * v[1] + coeffs[2] * v[2] + coeffs[3] * v[3]); + return pixel; + } +`,Ax=e=>` + fn gs_denormalize(n: f32, length: i32) -> f32 { + ${e.alignCorners===0?` + // alignCorners: false => [-1, 1] to [-0.5, length - 0.5] + return ((n + 1.0) * f32(length) - 1.0) / 2.0; + `:` + // alignCorners: true => [-1, 1] to [0, length - 1] + return (n + 1.0) / 2.0 * (f32(length - 1)); + `} + } +`,kx=e=>` + ${e.paddingMode==="reflection"?` + fn gs_reflect(x: i32, x_min: f32, x_max: f32) -> u32 { + var dx = 0.0; + var fx = f32(x); + let range = x_max - x_min; + if (fx < x_min) { + dx = x_min - fx; + let n = u32(dx / range); + let r = dx - f32(n) * range; + if (n % 2 == 0) { + fx = x_min + r; + } else { + fx = x_max - r; + } + } else if (fx > x_max) { + dx = fx - x_max; + let n = u32(dx / range); + let r = dx - f32(n) * range; + if (n % 2 == 0) { + fx = x_max - r; + } else { + fx = x_min + r; + } + } + return u32(fx); + }`:""} +`,Ex=(e,t,o)=>` + fn pixel_at_grid(r: i32, c: i32, H: i32, W: i32, batch: u32, channel: u32, border: vec4) -> ${t} { + var pixel = ${t}(0); + var indices = vec4(0); + indices[${It}] = batch; + indices[${jt}] = channel;`+(()=>{switch(o.paddingMode){case"zeros":return` + if (r >= 0 && r < H && c >=0 && c < W) { + indices[${rr}] = u32(r); + indices[${nr}] = u32(c); + } else { + return ${t}(0); + } + `;case"border":return` + indices[${rr}] = u32(clamp(r, 0, H - 1)); + indices[${nr}] = u32(clamp(c, 0, W - 1)); + `;case"reflection":return` + indices[${rr}] = gs_reflect(r, border[1], border[3]); + indices[${nr}] = gs_reflect(c, border[0], border[2]); + `;default:throw new Error(`padding mode ${o.paddingMode} is not supported`)}})()+` + return ${e.getByIndices("indices")}; + } +`,Px=(e,t,o)=>(()=>{switch(o.mode){case"nearest":return` + let result = pixel_at_grid(i32(round(y)), i32(round(x)), H_in, W_in, indices[${It}], indices[${jt}], border); + `;case"bilinear":return` + let x1 = i32(floor(x)); + let y1 = i32(floor(y)); + let x2 = x1 + 1; + let y2 = y1 + 1; + + let p11 = pixel_at_grid(y1, x1, H_in, W_in, indices[${It}], indices[${jt}], border); + let p12 = pixel_at_grid(y1, x2, H_in, W_in, indices[${It}], indices[${jt}], border); + let p21 = pixel_at_grid(y2, x1, H_in, W_in, indices[${It}], indices[${jt}], border); + let p22 = pixel_at_grid(y2, x2, H_in, W_in, indices[${It}], indices[${jt}], border); + + let dx2 = ${t}(f32(x2) - x); + let dx1 = ${t}(x - f32(x1)); + let dy2 = ${t}(f32(y2) - y); + let dy1 = ${t}(y - f32(y1)); + let result = dy2 * (dx2 * p11 + dx1 * p12) + dy1 * (dx2 * p21 + dx1 * p22); + `;case"bicubic":return` + let x0 = i32(floor(x)) - 1; + let y0 = i32(floor(y)) - 1; + var p: mat4x4<${t}>; + for (var h = 0; h < 4; h++) { + for (var w = 0; w < 4; w++) { + p[h][w] = pixel_at_grid(h + y0, w + x0, H_in, W_in, indices[${It}], indices[${jt}], border); + } + } + + let dx = x - f32(x0 + 1); + let dy = y - f32(y0 + 1); + let result = gs_bicubic_interpolate(p, dx, dy); + `;default:throw new Error(`mode ${o.mode} is not supported`)}})()+`${e.setByOffset("global_idx","result")}`,Ox=(e,t)=>{let o=F("x",e[0].dataType,e[0].dims.length),n=[e[1].dims[0],e[1].dims[1],e[1].dims[2]],u=F("grid",e[1].dataType,n.length,2),c=[e[0].dims[0],e[0].dims[1],e[1].dims[1],e[1].dims[2]];t.format==="NHWC"&&(c=[e[0].dims[0],e[1].dims[1],e[1].dims[2],e[0].dims[3]],[It,jt,rr,nr]=[0,3,1,2]);let p=Y("output",e[0].dataType,c.length),m=o.type.value,g=L.size(c),b=[{type:12,data:g},...te(e[0].dims,n,c)],_=w=>` + ${w.registerUniform("output_size","u32").declareVariables(o,u,p)} + ${Tx} + ${Ix(m)} + ${Ax(t)} + ${kx(t)} + ${Ex(o,m,t)} + + ${w.mainStart()} + ${w.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let H_in = i32(uniforms.x_shape[${rr}]); + let W_in = i32(uniforms.x_shape[${nr}]); + + ${t.alignCorners===0?` + let x_min = -0.5; + let x_max = f32(W_in) - 0.5; + let y_min = -0.5; + let y_max = f32(H_in) - 0.5; + `:` + let x_min = 0.0; + let x_max = f32(W_in) - 1.0; + let y_min = 0.0; + let y_max = f32(H_in) - 1.0; + `}; + let border = vec4(x_min, y_min, x_max, y_max); + + let indices = ${p.offsetToIndices("global_idx")}; + var grid_indices = vec3(indices[${It}], indices[${rr}], indices[${nr}]); + let nxy = ${u.getByIndices("grid_indices")}; + var x = gs_denormalize(f32(nxy[0]), W_in); + var y = gs_denormalize(f32(nxy[1]), H_in); + + ${Px(p,m,t)} + }`;return{name:"GridSample",shaderCache:{hint:`${t.cacheKey}`,inputDependencies:["type","type"]},getRunData:w=>{let x=L.size(c);return{outputs:[{dims:c,dataType:w[0].dataType}],dispatchGroup:{x:Math.ceil(x/64)},programUniforms:b}},getShaderSource:_}},Sm=(e,t)=>{Sx(e.inputs),e.compute(Ox(e.inputs,t))},Tm=e=>pe({alignCorners:e.align_corners,mode:e.mode,paddingMode:e.padding_mode,format:e.format})});var Qe,Dx,km,Am,jx,vr,Em,to=X(()=>{"use strict";ce();be();We();un();fn();we();Tt();Qe=(e,t)=>e.length>t&&e[t].dims.length>0?e[t]:void 0,Dx=(e,t)=>{let o=e[0],n=Qe(e,1),u=Qe(e,2),c=Qe(e,3),p=Qe(e,4),m=Qe(e,5),g=Qe(e,6),b=Qe(e,7);if(o.dims.length!==3&&o.dims.length!==5)throw new Error("Input query is expected to have 3 or 5 dimensions");let _=o.dims[0],w=o.dims[1],x=o.dims.length===3?o.dims[2]:t.numHeads*o.dims[4],S=w,C=0,T=0,z=Math.floor(x/t.numHeads);if(g&&b&&L.size(g.dims)&&L.size(b.dims)){if(g.dims.length!==4)throw new Error('Input "past_key" is expected to have 4 dimensions');if(g.dims[0]!==_||g.dims[1]!==t.numHeads||g.dims[3]!==z)throw new Error('Input "past_key" shape (batch_size, num_heads, past_sequence_length, head_size)');if(b.dims[0]!==_||b.dims[1]!==t.numHeads||b.dims[3]!==z)throw new Error('Input "past_value" shape (batch_size, num_heads, past_sequence_length, head_size)');if(g.dims[2]!==b.dims[2])throw new Error('Input "past_key" and "past_value" shall have same dim 2 (past_sequence_length)');if(b.dims.length!==4)throw new Error('Input "past_value" is expected to have 4 dimensions');C=g.dims[2],T=g.dims[2]}else if(g&&L.size(g.dims)||b&&L.size(b.dims))throw new Error('Input "past_key" and "past_value" shall be both present or both absent');let k;if(n&&L.size(n.dims)>0){if(o.dims.length!==3)throw new Error('Input "query" is expected to have 3 dimensions when key is given');if(n.dims.length<3||n.dims.length>5)throw new Error('Input "key" is expected to have 3, 4, or 5 dimensions');if(o.dims[0]!==n.dims[0])throw new Error('Input "query" and "key" shall have same dim 0 (batch size)');if(n.dims.length===3){if(n.dims[2]!==o.dims[2])throw new Error('Input "query" and "key" shall have same dim 2 (hidden_size)');k=2,S=n.dims[1]}else if(n.dims.length===5){if(n.dims[2]!==t.numHeads||n.dims[3]!==2||n.dims[4]!==z)throw new Error('Expect "key" shape (batch_size, kv_sequence_length, num_heads, 2, head_size) for packed kv');if(u)throw new Error('Expect "value" be none when "key" has packed kv format.');k=5,S=n.dims[1]}else{if(n.dims[1]!==t.numHeads||n.dims[3]!==z)throw new Error('Expect "key" shape (batch_size, num_heads, kv_sequence_length, head_size) for past_key');k=0,S=n.dims[2]}}else{if(o.dims.length!==5)throw new Error('Input "query" is expected to have 5 dimensions when key is empty');if(o.dims[2]!==t.numHeads||o.dims[3]!==3)throw new Error('Expect "query" shape (batch_size, kv_sequence_length, num_heads, 3, head_size) for packed kv');k=3}if(c&&L.size(c.dims)>0){if(c.dims.length!==1)throw new Error('Input "bias" is expected to have 1 dimension');if(n&&n.dims.length===5&&n.dims[3]===2)throw new Error("bias is not allowed for packed kv.")}let A=C+S,O=0;if(p&&L.size(p.dims)>0){O=8;let q=p.dims;throw q.length===1?q[0]===_?O=1:q[0]===3*_+2&&(O=3):q.length===2&&q[0]===_&&q[1]===A&&(O=5),O===8?new Error('Input "key_padding_mask" shape shall be (batch_size) or (batch_size, total_sequence_length)'):new Error("Mask not supported")}let B=!1,W=x;if(u&&L.size(u.dims)>0){if(u.dims.length!==3&&u.dims.length!==4)throw new Error('Input "value" is expected to have 3 or 4 dimensions');if(o.dims[0]!==u.dims[0])throw new Error('Input "query" and "value" shall have same dim 0 (batch_size)');if(u.dims.length===3){if(S!==u.dims[1])throw new Error('Input "key" and "value" shall have the same dim 1 (kv_sequence_length)');W=u.dims[2]}else{if(S!==u.dims[2])throw new Error('Input "key" and "value" shall have the same dim 2 (kv_sequence_length)');W=u.dims[1]*u.dims[3],B=!0}}let N=!1;if(p&&L.size(p.dims)>0)throw new Error("Key padding mask is not supported");if(m&&L.size(m.dims)>0){if(m.dims.length!==4)throw new Error('Input "attention_bias" is expected to have 4 dimensions');if(m.dims[0]!==_||m.dims[1]!==t.numHeads||m.dims[2]!==w||m.dims[3]!==A)throw new Error('Expect "attention_bias" shape (batch_size, num_heads, sequence_length, total_sequence_length)')}return{batchSize:_,sequenceLength:w,pastSequenceLength:C,kvSequenceLength:S,totalSequenceLength:A,maxSequenceLength:T,inputHiddenSize:0,hiddenSize:x,vHiddenSize:W,headSize:z,vHeadSize:Math.floor(W/t.numHeads),numHeads:t.numHeads,isUnidirectional:!1,pastPresentShareBuffer:!1,maskFilterValue:t.maskFilterValue,maskType:O,scale:t.scale,broadcastResPosBias:N,passPastInKv:B,qkvFormat:k}},km=e=>pe({...e}),Am=pe({perm:[0,2,1,3]}),jx=(e,t,o,n,u,c,p)=>{let m=[n,u,c],g=L.size(m),b=[{type:12,data:g},{type:12,data:p},{type:12,data:c}],_=w=>{let x=Y("qkv_with_bias",t.dataType,m),S=F("qkv",t.dataType,m),C=F("bias",o.dataType,m),T=[{name:"output_size",type:"u32"},{name:"bias_offset",type:"u32"},{name:"hidden_size",type:"u32"}];return` + ${w.registerUniforms(T).declareVariables(S,C,x)} + ${w.mainStart()} + ${w.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let bias_offset_idx = (global_idx % uniforms.hidden_size) + uniforms.bias_offset; + + qkv_with_bias[global_idx] = qkv[global_idx] + bias[bias_offset_idx]; + }`};return e.compute({name:"MultiHeadAttentionAddBias",shaderCache:{inputDependencies:["type","type"]},getRunData:()=>({outputs:[{dims:m,dataType:t.dataType,gpuDataType:0}],dispatchGroup:{x:Math.ceil(g/64)},programUniforms:b}),getShaderSource:_},{inputs:[t,o],outputs:[-1]})[0]},vr=(e,t,o,n,u,c,p,m)=>{let g=c;if(p&&L.size(p.dims)>0){if(n===1)throw new Error("AddBiasReshape is not implemented. Please export your model with packed QKV or KV");return g=jx(e,c,p,t,n,o*u,m),g=g.reshape([t,n,o,u]),o===1||n===1?g:e.compute(Fe(g,Am.perm),{inputs:[g],outputs:[-1]})[0]}else return c.dims.length===3&&(g=c.reshape([t,n,o,u])),o===1||n===1?g:e.compute(Fe(g,Am.perm),{inputs:[g],outputs:[-1]})[0]},Em=(e,t)=>{let o=Dx(e.inputs,t),n=e.inputs[0],u=Qe(e.inputs,1),c=Qe(e.inputs,2),p=Qe(e.inputs,3),m=Qe(e.inputs,4),g=Qe(e.inputs,5),b=Qe(e.inputs,6),_=Qe(e.inputs,7);if(n.dims.length===5)throw new Error("Packed QKV is not implemented");if(u?.dims.length===5)throw new Error("Packed KV is not implemented");let w=u&&c&&u.dims.length===4&&c.dims.length===4,x=vr(e,o.batchSize,o.numHeads,o.sequenceLength,o.headSize,n,p,0);if(w)return tr(e,x,u,c,m,void 0,b,_,g,o);if(!u||!c)throw new Error("key and value must be provided");let S=vr(e,o.batchSize,o.numHeads,o.kvSequenceLength,o.headSize,u,p,o.hiddenSize),C=vr(e,o.batchSize,o.numHeads,o.kvSequenceLength,o.vHeadSize,c,p,2*o.hiddenSize);tr(e,x,S,C,m,void 0,b,_,g,o)}});var Mx,Rx,Ux,Nx,ro,Pm,Om,no=X(()=>{"use strict";ce();be();We();we();Mx=e=>{if(!e||e.length<1)throw new Error("too few inputs")},Rx=(e,t)=>{let o=[],n=t.numOutputs;return e[1].dims[0]>0&&(e[1].getBigInt64Array().forEach(u=>o.push(Number(u))),n=o.length),pe({numOutputs:n,axis:t.axis,splitSizes:o})},Ux=e=>` +fn calculateOutputIndex(index: u32) -> u32 { + for (var i: u32 = 0u; i < ${e}u; i += 1u ) { + if (index < ${ae("uniforms.size_in_split_axis","i",e)}) { + return i; + } + } + return ${e}u; +}`,Nx=e=>{let t=e.length,o=[];for(let n=0;n{let o=e[0].dims,n=L.size(o),u=e[0].dataType,c=L.normalizeAxis(t.axis,o.length),p=new Array(t.numOutputs),m=F("input",u,o.length),g=new Array(t.numOutputs),b=[],_=[],w=0,x=[{type:12,data:n}];for(let C=0;C` + ${C.registerUniform("input_size","u32").registerUniform("size_in_split_axis","u32",g.length).declareVariables(m,...p)} + ${Ux(g.length)} + ${Nx(p)} + + ${C.mainStart()} + ${C.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.input_size")} + + var indices = ${m.offsetToIndices("global_idx")}; + var index = ${m.indicesGet("indices",c)}; + let output_number = calculateOutputIndex(index); + if (output_number != 0) { + index -= ${ae("uniforms.size_in_split_axis","output_number - 1u",g.length)}; + ${m.indicesSet("indices",c,"index")}; + } + writeBufferData(output_number, indices, global_idx); + }`;return{name:"Split",shaderCache:{hint:t.cacheKey,inputDependencies:["rank"]},getShaderSource:S,getRunData:()=>({outputs:b,dispatchGroup:{x:Math.ceil(n/64)},programUniforms:x})}},Pm=(e,t)=>{Mx(e.inputs);let o=e.inputs.length===1?t:Rx(e.inputs,t);e.compute(ro(e.inputs,o),{inputs:[0]})},Om=e=>{let t=e.axis,o=e.splitSizes,n=e.numOutputs<0?o.length:e.numOutputs;if(n!==o.length)throw new Error("numOutputs and splitSizes lengh must be equal");return pe({axis:t,numOutputs:n,splitSizes:o})}});var Vx,$n,zm,io=X(()=>{"use strict";ce();be();We();we();Vx=(e,t)=>{let[o,n,u,c]=e,{numHeads:p,rotaryEmbeddingDim:m}=t;if(o.dims.length!==3&&o.dims.length!==4)throw new Error(`Input 'x' is expected to have 3 or 4 dimensions, got ${o.dims.length}`);if(!L.areEqual(n.dims,[])&&!L.areEqual(n.dims,[1])&&n.dims.length!==2)throw new Error(`Input 'position_ids' is expected to have 0, 1, or 2 dimensions, got ${n.dims.length}`);if(u.dims.length!==2)throw new Error(`Input 'cos_cache' is expected to have 2 dimensions, got ${u.dims.length}`);if(c.dims.length!==2)throw new Error(`Input 'sin_cache' is expected to have 2 dimensions, got ${c.dims.length}`);if(!L.areEqual(u.dims,c.dims))throw new Error("Inputs 'cos_cache' and 'sin_cache' are expected to have the same shape");if(m>0&&p===0)throw new Error("num_heads must be provided if rotary_embedding_dim is specified");let g=o.dims[0],b=o.dims[o.dims.length-2],_=u.dims[0],w=L.sizeFromDimension(o.dims,1)/b,x=m===0?u.dims[1]*2:w/p;if(m>x)throw new Error("rotary_embedding_dim must be less than or equal to head_size");if(n.dims.length===2){if(g!==n.dims[0])throw new Error(`Input 'position_ids' dimension 0 should be of size batch_size, got ${n.dims[0]}`);if(b!==n.dims[1])throw new Error(`Input 'position_ids' dimension 1 should be of size sequence_length, got ${n.dims[1]}`)}if(x/2!==u.dims[1]&&m/2!==u.dims[1])throw new Error(`Input 'cos_cache' dimension 1 should be same as head_size / 2 or rotary_embedding_dim / 2, got ${u.dims[1]}`);if(b>_)throw new Error("Updating cos_cache and sin_cache in RotaryEmbedding is not currently supported")},$n=(e,t)=>{let{interleaved:o,numHeads:n,rotaryEmbeddingDim:u,scale:c}=t,p=e[0].dims[0],m=L.sizeFromDimension(e[0].dims,1),g=e[0].dims[e[0].dims.length-2],b=m/g,_=e[2].dims[1],w=u===0?_*2:b/n,x=new Array(p,g,b/w,w-_),S=L.computeStrides(x),C=[{type:1,data:c},{type:12,data:x},{type:12,data:S},...e[0].dims.length===3?new Array({type:12,data:[m,b,w,1]}):[],...e[0].dims.length===4?new Array({type:12,data:[m,w,g*w,1]}):[],...te(e[0].dims,e[1].dims,e[2].dims,e[3].dims,e[0].dims)],T=z=>{let k=F("input",e[0].dataType,e[0].dims.length),A=F("position_ids",e[1].dataType,e[1].dims.length),O=F("cos_cache",e[2].dataType,e[2].dims.length),B=F("sin_cache",e[3].dataType,e[3].dims.length),W=Y("output",e[0].dataType,e[0].dims.length);return z.registerUniforms([{name:"scale",type:"f32"},{name:"global_shape",type:"u32",length:x.length},{name:"global_strides",type:"u32",length:S.length},{name:"input_output_strides",type:"u32",length:S.length}]),` + ${z.declareVariables(k,A,O,B,W)} + + ${z.mainStart(Ft)} + let half_rotary_emb_dim = uniforms.${O.name}_shape[1]; + let bsnh = global_idx / uniforms.global_strides % uniforms.global_shape; + let size = uniforms.global_shape[0] * uniforms.global_strides[0]; + ${z.guardAgainstOutOfBoundsWorkgroupSizes("size")} + + if (bsnh[3] < half_rotary_emb_dim) { + let position_ids_idx = + ${A.broadcastedIndicesToOffset("bsnh.xy",Y("",A.type.tensor,2))}; + let position_id = + u32(${A.getByOffset("position_ids_idx")}) + select(0, bsnh[1], position_ids_idx == 0); + let i = dot(bsnh, uniforms.input_output_strides) + select(0, bsnh[3], ${o}); + let j = i + select(half_rotary_emb_dim, 1, ${o}); + let re = ${k.getByOffset("i")} * ${O.get("position_id","bsnh[3]")} - + ${k.getByOffset("j")} * ${B.get("position_id","bsnh[3]")}; + ${W.setByOffset("i","re")} + let im = ${k.getByOffset("i")} * ${B.get("position_id","bsnh[3]")} + + ${k.getByOffset("j")} * ${O.get("position_id","bsnh[3]")}; + ${W.setByOffset("j","im")} + } else { + let k = dot(bsnh, uniforms.input_output_strides) + half_rotary_emb_dim; + ${W.setByOffset("k",k.getByOffset("k"))} + } + }`};return{name:"RotaryEmbedding",shaderCache:{hint:pe({interleaved:o}).cacheKey,inputDependencies:["rank","rank","rank","rank"]},getShaderSource:T,getRunData:()=>({outputs:[{dims:e[0].dims,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(L.size(x)/Ft)},programUniforms:C})}},zm=(e,t)=>{Vx(e.inputs,t),e.compute($n(e.inputs,t))}});var Wx,Lx,Bm,Gx,Dm,jm=X(()=>{"use strict";We();ce();fn();to();no();Tt();io();we();Wx=(e,t)=>{if(t.doRotary&&e.length<=7)throw new Error("cos_cache and sin_cache inputs are required if do_rotary is specified");let o=e[0],n=e[1],u=e[2],c=e[3],p=e[4];if(t.doRotary!==0&&e.length<=7)throw new Error("cos_cast and sin_cache are expected if do_rotary attribute is non-zero");if(t.localWindowSize!==-1)throw new Error("Local attention is not supported");if(t.softcap!==0)throw new Error("Softcap is not supported");if(t.rotaryInterleaved!==0)throw new Error("Rotary interleaved is not supported");if(t.smoothSoftmax)throw new Error("Smooth softmax is not supported");if(o.dims.length!==3&&o.dims.length!==5)throw new Error("Input query is expected to have 3 or 5 dimensions");let m=!1,g=o.dims[0],b=o.dims[1],_=o.dims.length===3?m?o.dims[2]/3:o.dims[2]:t.numHeads*o.dims[4],w=b,x=0,S=!n||n.dims.length===0,C=Math.floor(S?_/(t.numHeads+2*t.kvNumHeads):_/t.numHeads);S&&(_=C*t.numHeads);let T=c&&c.dims.length!==0,z=p&&p.dims.length!==0;if(T&&c.dims.length===4&&c.dims[0]===g&&c.dims[1]!==t.kvNumHeads&&c.dims[2]===t.kvNumHeads&&c.dims[3]===C)throw new Error("BSNH pastKey/pastValue is not supported");if(T&&z){if(c.dims.length!==4)throw new Error('Input "past_key" is expected to have 4 dimensions');if(p.dims.length!==4)throw new Error('Input "past_value" is expected to have 4 dimensions');x=c.dims[2]}else if(T||z)throw new Error('Input "past_key" and "past_value" shall be both present or both absent');let A=1;if(n&&n.dims.length>0){if(o.dims.length!==3)throw new Error('Input "query" is expected to have 3 dimensions when key is given');if(n.dims.length<3||n.dims.length>5)throw new Error('Input "key" is expected to have 3, 4, or 5 dimensions');if(o.dims[0]!==n.dims[0])throw new Error('Input "query" and "key" shall have same dim 0 (batch size)');if(n.dims.length===3){if(o.dims[2]%n.dims[2]!==0)throw new Error('Dimension 2 of "query" should be a multiple of "key"');w=n.dims[1]}else if(n.dims.length===5){if(n.dims[2]!==t.numHeads||n.dims[3]!==2||n.dims[4]!==C)throw new Error('Expect "key" shape (batch_size, kv_sequence_length, num_heads, 2, head_size) for packed kv');if(u)throw new Error('Expect "value" be none when "key" has packed kv format.');w=n.dims[1]}else{if(n.dims[1]!==t.numHeads||n.dims[3]!==C)throw new Error('Expect "key" shape (batch_size, num_heads, kv_sequence_length, head_size) for past_key');w=n.dims[2]}}else{if(o.dims.length!==3&&o.dims.length!==5)throw new Error('Input "query" is expected to have 3 or 5 dimensions when key is empty');if(o.dims.length===5&&(o.dims[2]!==t.numHeads||o.dims[3]!==3))throw new Error('Expect "query" shape (batch_size, kv_sequence_length, num_heads, 3, head_size) for packed kv');A=3}let O=0,B=!1,W=t.kvNumHeads?C*t.kvNumHeads:_;if(u&&u.dims.length>0){if(u.dims.length!==3&&u.dims.length!==4)throw new Error('Input "value" is expected to have 3 or 4 dimensions');if(o.dims[0]!==u.dims[0])throw new Error('Input "query" and "value" shall have same dim 0 (batch_size)');if(u.dims.length===3){if(w!==u.dims[1])throw new Error('Input "key" and "value" shall have the same dim 1 (kv_sequence_length)');W=u.dims[2]}else{if(w!==u.dims[2])throw new Error('Input "past_key" and "past_value" shall have the same dim 2 (kv_sequence_length)');W=u.dims[1]*u.dims[3],B=!0}}let N=e.length>4?e[5]:void 0;if(N&&N.dims.length!==1&&N.dims[0]!==g)throw new Error('Input "seqlens" is expected to have 1 dimension and the same dim 0 as batch_size');return{batchSize:g,sequenceLength:b,pastSequenceLength:x,kvSequenceLength:w,totalSequenceLength:-1,maxSequenceLength:-1,inputHiddenSize:0,hiddenSize:_,vHiddenSize:W,headSize:C,vHeadSize:Math.floor(W/t.kvNumHeads),numHeads:t.numHeads,kvNumHeads:t.kvNumHeads,nReps:t.numHeads/t.kvNumHeads,pastPresentShareBuffer:!1,maskType:O,scale:t.scale,broadcastResPosBias:!1,passPastInKv:B,qkvFormat:A}},Lx=pe({perm:[0,2,1,3]}),Bm=(e,t,o)=>{let n=t,u=o.kvNumHeads;return t.dims.length===3&&o.kvSequenceLength!==0&&(n=t.reshape([o.batchSize,o.kvSequenceLength,u,o.headSize]),n=e.compute(Fe(n,Lx.perm),{inputs:[n],outputs:[-1]})[0]),n},Gx=(e,t,o,n)=>{let u=7,c=["type","type"],p=[e*t],m=e*t,g=[{type:12,data:m},{type:12,data:t},{type:12,data:e}],b=_=>{let w=F("seq_lens",o.dataType,o.dims),x=F("total_seq_lens",n.dataType,n.dims),S=Y("pos_ids",u,p),C=[{name:"output_size",type:"u32"},{name:"sequence_length",type:"u32"},{name:"batch_size",type:"u32"}];return` + ${_.registerUniforms(C).declareVariables(w,x,S)} + ${_.mainStart()} + ${_.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let total_sequence_length = u32(${x.getByOffset("0")}); + let is_subsequent_prompt = uniforms.sequence_length > 1 && uniforms.sequence_length != total_sequence_length; + let is_first_prompt = !is_subsequent_prompt && uniforms.sequence_length == total_sequence_length; + let batch_idx = global_idx / uniforms.sequence_length; + let sequence_idx = i32(global_idx % uniforms.sequence_length); + var pos_id: i32 = 0; + let seqlen = ${w.getByOffset("batch_idx")}; + let total_seqlen = seqlen + 1; + if (is_first_prompt) { + if (sequence_idx < total_seqlen) { + pos_id = sequence_idx; + } else { + pos_id = 1; + } + ${S.setByOffset("global_idx","pos_id")} + } else if (is_subsequent_prompt) { + let past_seqlen = total_seqlen - i32(uniforms.sequence_length); + if (past_seqlen + sequence_idx < total_seqlen) { + pos_id = past_seqlen + sequence_idx; + } else { + pos_id = 1; + } + ${S.setByOffset("global_idx","pos_id")} + } else if (global_idx < uniforms.batch_size) { + ${S.setByOffset("global_idx","seqlen")} + }; + } + `};return{name:"GeneratePositionIds",shaderCache:{hint:`${e};${t}`,inputDependencies:c},getRunData:()=>({outputs:[{dims:p,dataType:u}],dispatchGroup:{x:Math.ceil(m/64)},programUniforms:g}),getShaderSource:b}},Dm=(e,t)=>{let o=Wx(e.inputs,t);if(e.inputs[0].dims.length===5)throw new Error("Packed QKV is not implemented");if(e.inputs[1]?.dims.length===5)throw new Error("Packed KV is not implemented");let n=e.inputs[0],u=e.inputs[1]&&e.inputs[1].dims.length>0?e.inputs[1]:void 0,c=e.inputs[2]&&e.inputs[2].dims.length>0?e.inputs[2]:void 0,p=e.inputs[3]&&e.inputs[3].dims.length!==0?e.inputs[3]:void 0,m=e.inputs[4]&&e.inputs[4].dims.length!==0?e.inputs[4]:void 0,g=e.inputs.length>4?e.inputs[5]:void 0,b=e.inputs.length>5?e.inputs[6]:void 0,_=o.kvNumHeads?o.kvNumHeads:o.numHeads,w=pe({axis:2,numOutputs:3,splitSizes:[o.numHeads*o.headSize,_*o.headSize,_*o.headSize]}),[x,S,C]=!u&&!c?e.compute(ro([n],w),{inputs:[n],outputs:[-1,-1,-1]}):[n,u,c],T,z;if(t.doRotary){let B=e.compute(Gx(o.batchSize,o.sequenceLength,g,b),{inputs:[g,b],outputs:[-1]})[0],W=e.inputs[7],N=e.inputs[8],q=pe({interleaved:t.rotaryInterleaved!==0,numHeads:o.numHeads,rotaryEmbeddingDim:0,scale:t.scale}),K=[x,B,W,N],Q=[-1];T=e.compute($n(K,q),{inputs:K,outputs:Q})[0],K.splice(0,1,S);let ne=pe({interleaved:t.rotaryInterleaved!==0,numHeads:o.kvNumHeads,rotaryEmbeddingDim:0,scale:t.scale});z=e.compute($n(K,ne),{inputs:K,outputs:Q})[0]}let k=vr(e,o.batchSize,o.numHeads,o.sequenceLength,o.headSize,t.doRotary?T:x,void 0,0),A=Bm(e,t.doRotary?z:S,o),O=Bm(e,C,o);tr(e,k,A,O,void 0,void 0,p,m,void 0,o,g,b)}});var Mm,Hx,Fx,Rm,Um=X(()=>{"use strict";ce();be();Tt();we();Mm=(e,t,o,n,u,c,p,m)=>{let g=Ae(c),b=g===1?"f32":`vec${g}f`,_=g===1?"vec2f":`mat2x${g}f`,w=u*p,x=64;w===1&&(x=256);let S=[u,p,c/g],C=[u,p,2],T=["rank","type","type"],z=[];z.push(...te(S,C));let k=A=>{let O=F("x",t.dataType,3,g),B=F("scale",o.dataType,o.dims),W=F("bias",n.dataType,n.dims),N=Y("output",1,3,2),q=[O,B,W,N];return` + var workgroup_shared : array<${_}, ${x}>; + const workgroup_size = ${x}u; + ${A.declareVariables(...q)} + ${A.mainStart(x)} + let batch = workgroup_index / uniforms.x_shape[1]; + let channel = workgroup_index % uniforms.x_shape[1]; + let hight = uniforms.x_shape[2]; + // initialize workgroup memory + var sum = ${b}(0); + var squared_sum = ${b}(0); + for (var h = local_idx; h < hight; h += workgroup_size) { + let value = ${b}(${O.get("batch","channel","h")}); + sum += value; + squared_sum += value * value; + } + workgroup_shared[local_idx] = ${_}(sum, squared_sum); + workgroupBarrier(); + + for (var currSize = workgroup_size >> 1; currSize > 0; currSize = currSize >> 1) { + if (local_idx < currSize) { + workgroup_shared[local_idx] = workgroup_shared[local_idx] + workgroup_shared[local_idx + currSize]; + } + workgroupBarrier(); + } + if (local_idx == 0) { + let sum_final = ${ut("workgroup_shared[0][0]",g)} / f32(hight * ${g}); + let squared_sum_final = ${ut("workgroup_shared[0][1]",g)} / f32(hight * ${g}); + + let inv_std_dev = inverseSqrt(squared_sum_final - sum_final * sum_final + f32(${m})); + let channel_scale = inv_std_dev * f32(scale[channel]); + let channel_shift = f32(bias[channel]) - sum_final * channel_scale; + output[workgroup_index] = vec2f(channel_scale, channel_shift); + } + }`};return e.compute({name:"InstanceNormComputeChannelScaleShift",shaderCache:{hint:`${g};${m};${x}`,inputDependencies:T},getRunData:()=>({outputs:[{dims:C,dataType:1}],dispatchGroup:{x:w},programUniforms:z}),getShaderSource:k},{inputs:[t,o,n],outputs:[-1]})[0]},Hx=(e,t,o)=>{let n=t[0].dims,u=n,c=2,p=n[0],m=n[1],g=L.sizeFromDimension(n,c),b=Ae(g),_=L.size(u)/b,w=Mm(e,t[0],t[1],t[2],p,g,m,o.epsilon),x=[p,m,g/b],S=[p,m],C=["type","none"],T=z=>{let k=F("x",t[0].dataType,x.length,b),A=F("scale_shift",1,S.length,2),O=Y("output",t[0].dataType,x.length,b),B=[k,A,O];return` + ${z.registerUniform("output_size","u32").declareVariables(...B)} + ${z.mainStart()} + ${z.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let outputIndices = ${O.offsetToIndices("global_idx")}; + let batch = outputIndices[0]; + let channel = outputIndices[1]; + let scale_shift = ${A.getByIndices("vec2(batch, channel)")}; + let value = ${k.getByOffset("global_idx")} * ${O.type.value}(scale_shift.x) + ${O.type.value}(scale_shift.y); + ${O.setByOffset("global_idx","value")}; + }`};e.compute({name:"InstanceNormalization",shaderCache:{hint:`${b}`,inputDependencies:C},getRunData:()=>({outputs:[{dims:u,dataType:t[0].dataType}],dispatchGroup:{x:Math.ceil(_/64)},programUniforms:[{type:12,data:_},...te(x,S,x)]}),getShaderSource:T},{inputs:[t[0],w]})},Fx=(e,t,o)=>{let n=t[0].dims,u=n,c=n[0],p=n[n.length-1],m=L.sizeFromDimension(n,1)/p,g=Ae(p),b=L.size(u)/g,_=[{type:12,data:m},{type:12,data:Math.floor(p/g)}],w=["type","type"],x=!1,S=[0,n.length-1];for(let k=0;kn[S[A]])),T=Mm(e,C,t[1],t[2],c,m,p,o.epsilon),z=k=>{let A=Be(t[0].dataType),O=g===1?"vec2f":`mat${g}x2f`,B=q=>{let K=q===0?"x":"y",Q=g===1?"f32":`vec${g}f`;switch(g){case 1:return`${A}(${Q}(scale.${K}))`;case 2:return`vec2<${A}>(${Q}(scale[0].${K}, scale[1].${K}))`;case 4:return`vec4<${A}>(${Q}(scale[0].${K}, scale[1].${K}, scale[2].${K}, scale[3].${K}))`;default:throw new Error(`Not supported compoents ${g}`)}},W=F("input",t[0].dataType,t[0].dims,g),N=Y("output",t[0].dataType,u,g);return` + @group(0) @binding(0) var input : array<${W.type.storage}>; + @group(0) @binding(1) var scale_input : array<${O}>; + @group(0) @binding(2) var output : array<${N.type.storage}>; + struct Uniforms {H: u32, C : u32}; + @group(0) @binding(3) var uniforms: Uniforms; + + ${k.mainStart()} + let current_image_number = global_idx / (uniforms.C * uniforms.H); + let current_channel_number = global_idx % uniforms.C; + + let scale_offset = current_image_number * uniforms.C + current_channel_number; + let scale = scale_input[scale_offset]; + output[global_idx] = fma(input[global_idx], ${B(0)}, ${B(1)}); + }`};e.compute({name:"InstanceNormalizationNHWC",shaderCache:{hint:`${g}`,inputDependencies:w},getRunData:()=>({outputs:[{dims:u,dataType:t[0].dataType}],dispatchGroup:{x:Math.ceil(b/64)},programUniforms:_}),getShaderSource:z},{inputs:[t[0],T]})},Rm=(e,t)=>{t.format==="NHWC"?Fx(e,e.inputs,t):Hx(e,e.inputs,t)}});var qx,Kx,Nm,Vm=X(()=>{"use strict";ce();be();we();qx=e=>{if(!e||e.length<2)throw new Error("layerNorm requires at least 2 inputs.")},Kx=(e,t,o)=>{let n=t.simplified,u=e[0].dims,c=e[1],p=!n&&e[2],m=u,g=L.normalizeAxis(t.axis,u.length),b=L.sizeToDimension(u,g),_=L.sizeFromDimension(u,g),w=L.size(c.dims),x=p?L.size(p.dims):0;if(w!==_||p&&x!==_)throw new Error(`Size of X.shape()[axis:] == ${_}. + Size of scale and bias (if provided) must match this. + Got scale size of ${w} and bias size of ${x}`);let S=[];for(let W=0;W1,A=o>2,O=W=>{let N=Be(e[0].dataType),q=[F("x",e[0].dataType,e[0].dims,C),F("scale",c.dataType,c.dims,C)];p&&q.push(F("bias",p.dataType,p.dims,C)),q.push(Y("output",e[0].dataType,m,C)),k&&q.push(Y("mean_data_output",1,S)),A&&q.push(Y("inv_std_output",1,S));let K=[{name:"norm_count",type:"u32"},{name:"norm_size",type:"f32"},{name:"norm_size_vectorized",type:"u32"},{name:"epsilon",type:"f32"}];return` + ${W.registerUniforms(K).declareVariables(...q)} + ${W.mainStart()} + ${W.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.norm_count")} + let offset = global_idx * uniforms.norm_size_vectorized; + var mean_vector = ${Ni("f32",C)}; + var mean_square_vector = ${Ni("f32",C)}; + + for (var h: u32 = 0u; h < uniforms.norm_size_vectorized; h++) { + let value = ${qt(N,C,"x[h + offset]")}; + mean_vector += value; + mean_square_vector += value * value; + } + let mean = ${ut("mean_vector",C)} / uniforms.norm_size; + let inv_std_dev = inverseSqrt(${ut("mean_square_vector",C)} / uniforms.norm_size ${n?"":"- mean * mean"} + uniforms.epsilon); + + for (var j: u32 = 0; j < uniforms.norm_size_vectorized; j++) { + let f32input = ${qt(N,C,"x[j + offset]")}; + let f32scale = ${qt(N,C,"scale[j]")}; + output[j + offset] = ${q[0].type.value}((f32input ${n?"":"- mean"}) * inv_std_dev * f32scale + ${p?`+ ${qt(N,C,"bias[j]")}`:""} + ); + } + + ${k?"mean_data_output[global_idx] = mean":""}; + ${A?"inv_std_output[global_idx] = inv_std_dev":""}; + }`},B=[{dims:m,dataType:e[0].dataType}];return k&&B.push({dims:S,dataType:1}),A&&B.push({dims:S,dataType:1}),{name:"LayerNormalization",shaderCache:{hint:`${C};${o};${n}`,inputDependencies:T},getRunData:()=>({outputs:B,dispatchGroup:{x:Math.ceil(b/64)},programUniforms:z}),getShaderSource:O}},Nm=(e,t)=>{qx(e.inputs),e.compute(Kx(e.inputs,t,e.outputCount))}});var Jx,Wm,Lm=X(()=>{"use strict";be();_n();wn();Jx=e=>{if(!e||e.length!==2)throw new Error("MatMul requires 2 inputs.");if(e[0].dims[e[0].dims.length-1]!==e[1].dims[e[1].dims.length-2])throw new Error("shared dimension does not match.")},Wm=e=>{Jx(e.inputs);let t=bt.calcShape(e.inputs[0].dims,e.inputs[1].dims,!0);if(!t)throw new Error("Can't use matmul on the given tensors");let o=t[t.length-1],n=e.inputs[0].dims[e.inputs[0].dims.length-1];if(o<8&&n<8)e.compute(bn(e.inputs,{activation:""},t));else{let u=t[t.length-2],c=L.size(e.inputs[0].dims.slice(0,-2)),p=L.size(e.inputs[1].dims.slice(0,-2));if(c!==1&&u===1&&p===1){let m=e.inputs[0].reshape([1,c,n]),g=e.inputs[1].reshape([1,n,o]),b=[1,c,o],_=[m,g];e.compute(wr(_,{activation:""},t,b),{inputs:_})}else e.compute(wr(e.inputs,{activation:""},t))}}});var Zx,Qx,Yx,Gm,Hm,Fm=X(()=>{"use strict";ce();be();We();we();Zx=(e,t)=>{if(e.length<3||e.length>4)throw new Error("MatMulNBits requires 3 or 4 inputs");let o=e[0],n=o.dims.length;if(o.dims[n-1]!==t.k)throw new Error("The last dim of input shape does not match the k value");let u=Math.floor((t.k+t.blockSize-1)/t.blockSize),c=t.blockSize/8*t.bits,p=e[1];if(!L.areEqual(p.dims,[t.n,u,c]))throw new Error("The second inputs must be 3D tensor with shape N X nBlocksPerCol X blobSize");let g=e[2].dims;if(L.size(g)!==t.n*u)throw new Error("scales input size error.");if(e.length===4){let _=e[3].dims,w=t.bits>4?t.n*u:t.n*Math.floor((u+1)/2);if(L.size(_)!==w)throw new Error("zeroPoints input size error.")}},Qx=(e,t)=>{let o=e[0].dims,n=o.length,u=o[n-2],c=t.k,p=t.n,m=o.slice(0,n-2),g=L.size(m),_=e[1].dims[2]/4,w=e[0].dataType,x=Ae(t.k),S=Ae(_),C=Ae(p),T=m.concat([u,p]),z=u>1&&p/C%2===0?2:1,k=L.size(T)/C/z,A=64,O=[],B=[g,u,c/x],W=L.convertShape(e[1].dims).slice();W.splice(-1,1,_/S),O.push(...te(B)),O.push(...te(W)),O.push(...te(e[2].dims)),e.length===4&&O.push(...te(L.convertShape(e[3].dims)));let N=[g,u,p/C];O.push(...te(N));let q=K=>{let Q=B.length,ne=F("a",e[0].dataType,Q,x),se=F("b",12,W.length,S),ue=F("scales",e[2].dataType,e[2].dims.length),ge=[ne,se,ue],re=e.length===4?F("zero_points",12,e[3].dims.length):void 0;re&&ge.push(re);let Se=N.length,fe=Y("output",e[0].dataType,Se,C),ie=Be(e[0].dataType),ve=(()=>{switch(x){case 1:return`array<${ie}, 8>`;case 2:return`mat4x2<${ie}>`;case 4:return`mat2x4<${ie}>`;default:throw new Error(`${x}-component is not supported.`)}})(),le=()=>{let je=` + // reuse a data + var input_offset = ${ne.indicesToOffset(`${ne.type.indices}(batch, row, word_offset)`)}; + var a_data: ${ve}; + for (var j: u32 = 0; j < ${8/x}; j++) { + a_data[j] = ${ne.getByOffset("input_offset")}; + input_offset++; + } + `;for(let he=0;he> 4) & b_mask); + b_quantized_values = ${ve}(${Array.from({length:4},(R,V)=>`${ie}(b_value_lower[${V}]), ${ie}(b_value_upper[${V}])`).join(", ")}); + b_dequantized_values = ${x===1?`${ve}(${Array.from({length:8},(R,V)=>`(b_quantized_values[${V}] - ${re?`zero_point${he}`:"zero_point"}) * scale${he}`).join(", ")});`:`(b_quantized_values - ${ve}(${Array(8).fill(`${re?`zero_point${he}`:"zero_point"}`).join(",")})) * scale${he};`}; + workgroup_shared[local_id.x * ${z} + ${Math.floor(he/C)}]${C>1?`[${he%C}]`:""} += ${Array.from({length:8/x},(R,V)=>`${x===1?`a_data[${V}] * b_dequantized_values[${V}]`:`dot(a_data[${V}], b_dequantized_values[${V}])`}`).join(" + ")}; + `;return je},me=()=>{let je=` + var col_index = col * ${C}; + ${re?` + let zero_point_bytes_per_col = (nBlocksPerCol + 1) / 2; + var zero_point_byte_count: u32; + var zero_point_word_index: u32; + var zero_point_byte_offset: u32; + let zero_point_nibble_offset: u32 = block & 0x1u; + var zero_point_bits_offset: u32; + var zero_point_word: u32;`:` + // The default zero point is 8 for unsigned 4-bit quantization. + let zero_point = ${ie}(8);`} + `;for(let he=0;he> 0x1u); + zero_point_word_index = zero_point_byte_count >> 0x2u; + zero_point_byte_offset = zero_point_byte_count & 0x3u; + zero_point_bits_offset = (zero_point_byte_offset << 3) + (zero_point_nibble_offset << 2); + zero_point_word = ${re.getByOffset("zero_point_word_index")} >> zero_point_bits_offset; + let zero_point${he} = ${ie}((zero_point_word) & 0xFu);`:""} + col_index += 1;`;return je},ke=()=>{let je=`col_index = col * ${C};`;for(let he=0;he; + var b_value_upper: vec4; + var b_quantized_values: ${ve}; + var b_dequantized_values: ${ve};`,je};return` + var workgroup_shared: array<${fe.type.value}, ${z*A}>; + ${K.declareVariables(...ge,fe)} + ${K.mainStart([A,1,1])} + let output_indices = ${fe.offsetToIndices(`(global_idx / ${A}) * ${z}`)}; + let col = output_indices[2]; + let row = output_indices[1]; + let batch = output_indices[0]; + let nBlocksPerCol = uniforms.b_shape[1]; + + for (var block = local_id.x; block < nBlocksPerCol; block += ${A}) { + //process one block + var word_offset: u32 = block * ${t.blockSize/x}; + ${me()} + for (var word: u32 = 0; word < ${_}; word += ${S}) { + ${ke()} + for (var i: u32 = 0; i < ${S}; i++) { + ${le()} + word_offset += ${8/x}; + } + } + } + workgroupBarrier(); + + if (local_id.x < ${z}) { + var output_value: ${fe.type.value} = ${fe.type.value}(0); + var workgroup_shared_offset: u32 = local_id.x; + for (var b: u32 = 0u; b < ${A}u; b++) { + output_value += workgroup_shared[workgroup_shared_offset]; + workgroup_shared_offset += ${z}; + } + ${fe.setByIndices(`${fe.type.indices}(batch, row, col + local_id.x)`,"output_value")}; + } + }`};return{name:"MatMulNBits",shaderCache:{hint:`${t.blockSize};${t.bits};${x};${S};${C};${z};${A}`,inputDependencies:Array(e.length).fill("rank")},getRunData:()=>({outputs:[{dims:T,dataType:w}],dispatchGroup:{x:k},programUniforms:O}),getShaderSource:q}},Yx=(e,t)=>{let o=e[0].dims,n=o.length,u=o[n-2],c=t.k,p=t.n,m=o.slice(0,n-2),g=L.size(m),_=e[1].dims[2]/4,w=e[0].dataType,x=Ae(t.k),S=Ae(_),C=m.concat([u,p]),T=128,z=p%8===0?8:p%4===0?4:1,k=T/z,A=k*S*8,O=A/x,B=A/t.blockSize,W=L.size(C)/z,N=[],q=[g,u,c/x],K=L.convertShape(e[1].dims).slice();K.splice(-1,1,_/S),N.push(...te(q)),N.push(...te(K)),N.push(...te(e[2].dims)),e.length===4&&N.push(...te(L.convertShape(e[3].dims)));let Q=[g,u,p];N.push(...te(Q));let ne=se=>{let ue=q.length,ge=F("a",e[0].dataType,ue,x),re=F("b",12,K.length,S),Se=F("scales",e[2].dataType,e[2].dims.length),fe=[ge,re,Se],ie=e.length===4?F("zero_points",12,e[3].dims.length):void 0;ie&&fe.push(ie);let ve=Q.length,le=Y("output",e[0].dataType,ve),me=Be(e[0].dataType),ke=()=>{switch(x){case 1:return` + let a_data0 = vec4<${me}>(sub_a[word_offset], sub_a[word_offset + 1], sub_a[word_offset + 2], sub_a[word_offset + 3]); + let a_data1 = vec4<${me}>(sub_a[word_offset + 4], sub_a[word_offset + 5], sub_a[word_offset + 6], sub_a[word_offset + 7]);`;case 2:return` + let a_data0 = vec4<${me}>(sub_a[word_offset], sub_a[word_offset + 1]); + let a_data1 = vec4<${me}>(sub_a[word_offset + 2], sub_a[word_offset + 3]);`;case 4:return` + let a_data0 = sub_a[word_offset]; + let a_data1 = sub_a[word_offset + 1];`;default:throw new Error(`${x}-component is not supported.`)}};return` + var sub_a: array<${ge.type.value}, ${O}>; + var inter_results: array, ${z}>; + ${se.declareVariables(...fe,le)} + ${se.mainStart([k,z,1])} + let output_indices = ${le.offsetToIndices(`workgroup_index * ${z}`)}; + let col = output_indices[2]; + let row = output_indices[1]; + let batch = output_indices[0]; + let n_blocks_per_col = uniforms.b_shape[1]; + let num_tiles = (n_blocks_per_col - 1) / ${B} + 1; + + // Loop over shared dimension. + for (var tile: u32 = 0; tile < num_tiles; tile += 1) { + let a_col_start = tile * ${O}; + // load one tile A data into shared memory. + for (var a_offset = local_idx; a_offset < ${O}; a_offset += ${T}) + { + let a_col = a_col_start + a_offset; + if (a_col < uniforms.a_shape[2]) + { + sub_a[a_offset] = ${ge.getByIndices(`${ge.type.indices}(batch, row, a_col)`)}; + } else { + sub_a[a_offset] = ${ge.type.value}(0); + } + } + workgroupBarrier(); + + // each thread process one block + let b_row = col + local_id.y; + let block = tile * ${B} + local_id.x; + ${ie?` + let zero_point_bytes_per_col = (n_blocks_per_col + 1) / 2; + let zero_point_byte_count = b_row * zero_point_bytes_per_col + (block >> 0x1u); + let zero_point_word_index = zero_point_byte_count >> 0x2u; + let zero_point_byte_offset = zero_point_byte_count & 0x3u; + let zero_point_nibble_offset: u32 = block & 0x1u; + let zero_point_bits_offset = (zero_point_byte_offset << 3) + (zero_point_nibble_offset << 2); + let zero_point_word = ${ie.getByOffset("zero_point_word_index")} >> zero_point_bits_offset; + let zero_point = ${me}((zero_point_word) & 0xFu);`:` + // The default zero point is 8 for unsigned 4-bit quantization. + let zero_point = ${me}(8);`} + let scale = ${Se.getByOffset("b_row * n_blocks_per_col + block")}; + let b_data = ${re.getByIndices(`${re.type.indices}(b_row, block, 0)`)}; + var word_offset = local_id.x * ${t.blockSize/x}; + for (var i: u32 = 0; i < ${S}; i++) { + ${ke()} + let b_value = ${S===1?"b_data":"b_data[i]"}; + let b_value_lower = unpack4xU8(b_value & 0x0F0F0F0Fu); + let b_value_upper = unpack4xU8((b_value >> 4) & 0x0F0F0F0Fu); + let b_quantized_values = mat2x4<${me}>(${Array.from({length:4},(je,he)=>`${me}(b_value_lower[${he}]), ${me}(b_value_upper[${he}])`).join(", ")}); + let b_dequantized_values = (b_quantized_values - mat2x4<${me}>(${Array(8).fill("zero_point").join(",")})) * scale; + inter_results[local_id.y][local_id.x] += ${Array.from({length:2},(je,he)=>`${`dot(a_data${he}, b_dequantized_values[${he}])`}`).join(" + ")}; + word_offset += ${8/x}; + } + workgroupBarrier(); + } + + if (local_idx < ${z}) { + var output_value: ${le.type.value} = ${le.type.value}(0); + for (var b = 0u; b < ${k}; b++) { + output_value += inter_results[local_idx][b]; + } + if (col + local_idx < uniforms.output_shape[2]) + { + ${le.setByIndices(`${le.type.indices}(batch, row, col + local_idx)`,"output_value")} + } + } + }`};return{name:"BlockwiseMatMulNBits32",shaderCache:{hint:`${t.blockSize};${x};${S};${k};${z}`,inputDependencies:Array(e.length).fill("rank")},getRunData:()=>({outputs:[{dims:C,dataType:w}],dispatchGroup:{x:W},programUniforms:N}),getShaderSource:ne}},Gm=(e,t)=>{Zx(e.inputs,t),t.blockSize===32&&e.adapterInfo.isVendor("intel")&&e.adapterInfo.isArchitecture("gen-12lp")?e.compute(Yx(e.inputs,t)):e.compute(Qx(e.inputs,t))},Hm=e=>pe(e)});var Xx,e$,t$,r$,n$,i$,o$,a$,qm,Km=X(()=>{"use strict";ce();be();we();Xx=e=>{if(!e||e.length<1)throw new Error("Too few inputs");if(e[0].dataType!==1&&e[0].dataType!==10)throw new Error("Input type must be float or float16.");if(e.length>=2){let t=e[0].dims.length*2===e[1].dims[0];if(e.length===4&&(t=e[3].dims[0]*2===e[1].dims[0]),!t)throw new Error("The pads should be a 1D tensor of shape [2 * input_rank] or [2 * num_axes].")}},e$=(e,t,o)=>{let n="";for(let u=t-1;u>=0;--u)n+=` + k = i32(${e.indicesGet("indices",u)}) - ${ae("uniforms.pads",u,o)}; + if (k < 0) { + break; + } + if (k >= i32(${ae("uniforms.x_shape",u,t)})) { + break; + } + offset += k * i32(${ae("uniforms.x_strides",u,t)}); + `;return` + value = ${e.type.value}(uniforms.constant_value); + for (var i = 0; i < 1; i++) { + var offset = 0; + var k = 0; + ${n} + value = x[offset]; + } + `},t$=(e,t,o)=>{let n="";for(let u=t-1;u>=0;--u)n+=` + k = i32(${e.indicesGet("indices",u)}) - ${ae("uniforms.pads",u,o)}; + if (k < 0) { + k = -k; + } + { + let _2n_1 = 2 * (i32(${ae("uniforms.x_shape",u,t)}) - 1); + k = k % _2n_1; + if(k >= i32(${ae("uniforms.x_shape",u,t)})) { + k = _2n_1 - k; + } + } + offset += k * i32(${ae("uniforms.x_strides",u,t)}); + `;return` + var offset = 0; + var k = 0; + ${n} + value = x[offset]; + `},r$=(e,t,o)=>{let n="";for(let u=t-1;u>=0;--u)n+=` + k = i32(${e.indicesGet("indices",u)}) - ${ae("uniforms.pads",u,o)}; + if (k < 0) { + k = 0; + } + if (k >= i32(${ae("uniforms.x_shape",u,t)})) { + k = i32(${ae("uniforms.x_shape",u,t)}) - 1; + } + offset += k * i32(${ae("uniforms.x_strides",u,t)}); + `;return` + var offset = 0; + var k = 0; + ${n} + value = x[offset]; + `},n$=(e,t,o)=>{let n="";for(let u=t-1;u>=0;--u)n+=` + k = i32(${e.indicesGet("indices",u)}) - ${ae("uniforms.pads",u,o)}; + if (k < 0) { + k += i32(${ae("uniforms.x_shape",u,t)}]); + } + if (k >= i32(${ae("uniforms.x_shape",u,t)})) { + k -= i32(${ae("uniforms.x_shape",u,t)}); + } + offset += k * i32(${ae("uniforms.x_strides",u,t)}); + `;return` + var offset = 0; + var k = 0; + ${n} + value = x[offset]; + `},i$=(e,t,o)=>{switch(o.mode){case 0:return e$(e,t,o.pads.length);case 1:return t$(e,t,o.pads.length);case 2:return r$(e,t,o.pads.length);case 3:return n$(e,t,o.pads.length);default:throw new Error("Invalid mode")}},o$=(e,t)=>{let o=L.padShape(e[0].dims.slice(),t.pads),n=e[0].dims,u=L.size(o),c=[{type:12,data:u},{type:6,data:t.pads}],p=e.length>=3&&e[2].data;t.mode===0&&c.push({type:p?e[2].dataType:1,data:t.value}),c.push(...te(e[0].dims,o));let m=["rank"],g=b=>{let _=Y("output",e[0].dataType,o.length),w=F("x",e[0].dataType,n.length),x=w.type.value,S=i$(_,n.length,t),C=[{name:"output_size",type:"u32"},{name:"pads",type:"i32",length:t.pads.length}];return t.mode===0&&C.push({name:"constant_value",type:p?x:"f32"}),` + ${b.registerUniforms(C).declareVariables(w,_)} + ${b.mainStart()} + ${b.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + + let indices = ${_.offsetToIndices("global_idx")}; + + var value = ${x}(0); + ${S} + output[global_idx] = value; + }`};return{name:"Pad",shaderCache:{hint:`${t.mode}${p}`,inputDependencies:m},getRunData:()=>({outputs:[{dims:o,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(L.size(o)/64)},programUniforms:c}),getShaderSource:g}},a$=(e,t)=>{if(e.length>1){let o=e[1].getBigInt64Array(),n=e.length>=3&&e[2].data?e[2].dataType===10?e[2].getUint16Array()[0]:e[2].getFloat32Array()[0]:0,u=e[0].dims.length,c=new Int32Array(2*u).fill(0);if(e.length>=4){let m=e[3].getBigInt64Array();for(let g=0;gc[Number(g)]=Number(m));let p=[];return c.forEach(m=>p.push(m)),{mode:t.mode,value:n,pads:p}}else return t},qm=(e,t)=>{Xx(e.inputs);let o=a$(e.inputs,t);e.compute(o$(e.inputs,o),{inputs:[0]})}});var Cn,Jm,Zm,Qm,Ym,s$,u$,Xm,eh,th,rh,nh,ih,oh,ah,sh,uh,lh,dh,ch=X(()=>{"use strict";nt();ce();be();we();Cn=e=>{if(ze.webgpu.validateInputContent&&(!e||e.length!==1))throw new Error("Pool ops requires 1 input.")},Jm=(e,t,o)=>{let n=t.format==="NHWC",u=e.dims.slice();n&&u.splice(1,0,u.pop());let c=Object.hasOwnProperty.call(t,"dilations"),p=t.kernelShape.slice(),m=t.strides.slice(),g=c?t.dilations.slice():[],b=t.pads.slice();Ht.adjustPoolAttributes(o,u,p,m,g,b);let _=Ht.computePoolOutputShape(o,u,m,g,p,b,t.autoPad),w=Object.assign({},t);c?Object.assign(w,{kernelShape:p,strides:m,pads:b,dilations:g,cacheKey:t.cacheKey}):Object.assign(w,{kernelShape:p,strides:m,pads:b,cacheKey:t.cacheKey});let x=_.slice();return x.push(x.splice(1,1)[0]),[w,n?x:_]},Zm=(e,t)=>{let o=t.format==="NHWC",n=L.size(e),u=L.size(t.kernelShape),c=[{type:12,data:n},{type:12,data:u}],p=[{name:"outputSize",type:"u32"},{name:"kernelSize",type:"u32"}];if(t.kernelShape.length<=2){let m=t.kernelShape[t.kernelShape.length-1],g=t.strides[t.strides.length-1],b=t.pads[t.pads.length/2-1],_=t.pads[t.pads.length-1],w=!!(b+_);c.push({type:12,data:m},{type:12,data:g},{type:12,data:b},{type:12,data:_}),p.push({name:"kw",type:"u32"},{name:"sw",type:"u32"},{name:"pwStart",type:"u32"},{name:"pwEnd",type:"u32"});let x=!1;if(t.kernelShape.length===2){let S=t.kernelShape[t.kernelShape.length-2],C=t.strides[t.strides.length-2],T=t.pads[t.pads.length/2-2],z=t.pads[t.pads.length-2];x=!!(T+z),c.push({type:12,data:S},{type:12,data:C},{type:12,data:T},{type:12,data:z}),p.push({name:"kh",type:"u32"},{name:"sh",type:"u32"},{name:"phStart",type:"u32"},{name:"phEnd",type:"u32"})}return[c,p,!0,w,x]}else{if(o)throw new Error("Pooling with kernelShape.length > 2 is not supported for NHWC format.");let m=L.computeStrides(t.kernelShape);c.push({type:12,data:m},{type:12,data:t.pads},{type:12,data:t.strides}),p.push({name:"kernelStrides",type:"u32",length:m.length},{name:"pads",type:"u32",length:t.pads.length},{name:"strides",type:"u32",length:t.strides.length});let g=t.pads.reduce((b,_)=>b+_);return[c,p,!!g,!1,!1]}},Qm=(e,t,o,n,u,c,p,m,g,b,_,w)=>{let x=u.format==="NHWC",S=t.type.value,C=Y("output",t.type.tensor,n);if(u.kernelShape.length<=2){let T="",z="",k="",A=o-(x?2:1);if(_?T=` + for (var i: u32 = 0u; i < uniforms.kw; i++) { + xIndices[${A}] = indices[${A}] * uniforms.sw - uniforms.pwStart + i; + if (xIndices[${A}] < 0 || xIndices[${A}] + >= uniforms.x_shape[${A}]) { + pad++; + continue; + } + let x_val = x[${t.indicesToOffset("xIndices")}]; + ${c} + }`:T=` + for (var i: u32 = 0u; i < uniforms.kw; i++) { + xIndices[${A}] = indices[${A}] * uniforms.sw - uniforms.pwStart + i; + let x_val = x[${t.indicesToOffset("xIndices")}]; + ${c} + }`,u.kernelShape.length===2){let B=o-(x?3:2);w?z=` + for (var j: u32 = 0u; j < uniforms.kh; j++) { + xIndices[${B}] = indices[${B}] * uniforms.sh - uniforms.phStart + j; + if (xIndices[${B}] < 0 || xIndices[${B}] >= uniforms.x_shape[${B}]) { + pad += i32(uniforms.kw); + continue; + } + `:z=` + for (var j: u32 = 0u; j < uniforms.kh; j++) { + xIndices[${B}] = indices[${B}] * uniforms.sh - uniforms.phStart + j; + `,k=` + } + `}return` + ${e.registerUniforms(g).declareVariables(t,C)} + + ${e.mainStart()} + ${e.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + + let indices = ${C.offsetToIndices("global_idx")}; + var xIndices = ${C.offsetToIndices("global_idx")}; + + var value = ${S}(${m}); + var pad = 0; + ${z} + ${T} + ${k} + ${p} + + output[global_idx] = value; + }`}else{if(x)throw new Error("Pooling with kernelShape.length > 2 is not supported for NHWC format.");let T=u.kernelShape.length,z=u.pads.length,k="";return b?k=` + if (xIndices[j] >= uniforms.x_shape[j]) { + pad++; + isPad = true; + break; + } + } + if (!isPad) { + let x_val = x[${t.indicesToOffset("xIndices")}]; + ${c} + }`:k=` + } + let x_val = x[${t.indicesToOffset("xIndices")}]; + ${c} + `,` + ${e.registerUniforms(g).declareVariables(t,C)} + + ${e.mainStart()} + ${e.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + let indices = ${C.offsetToIndices("global_idx")}; + var xIndices = ${C.offsetToIndices("global_idx")}; + + var offsets: array; + + var value = ${S}(${m}); + var pad = 0; + var isPad = false; + + for (var i: u32 = 0u; i < uniforms.kernelSize; i++) { + var offset = i; + for (var j = 0u; j < ${T-1}u; j++) { + offsets[j] = offset / ${ae("uniforms.kernelStrides","j",T)}; + offset -= offsets[j] * ${ae("uniforms.kernelStrides","j",T)}; + } + offsets[${T-1}] = offset; + + isPad = false; + for (var j = ${o-T}u; j < ${o}u; j++) { + xIndices[j] = indices[j] * ${ae("uniforms.strides",`j - ${o-T}u`,T)} + + offsets[j - ${o-T}u] - ${ae("uniforms.pads","j - 2u",z)}; + ${k} + } + ${p} + + output[global_idx] = value; + }`}},Ym=e=>`${e.format};${e.ceilMode};${e.autoPad};${e.kernelShape.length}`,s$=e=>`${Ym(e)};${e.countIncludePad}`,u$=e=>`${Ym(e)};${e.storageOrder};${e.dilations}`,Xm=e=>({format:e.format,autoPad:["NOTSET","VALID","SAME_UPPER","SAME_LOWER"][e.auto_pad],ceilMode:e.ceil_mode,kernelShape:e.kernel_shape,strides:e.strides,pads:e.pads}),eh=(e,t,o,n)=>{let[u,c]=Jm(t,n,o),p=F("x",t.dataType,t.dims.length),m=p.type.value,g="value += x_val;",b="";u.countIncludePad?b+=`value /= ${m}(uniforms.kernelSize);`:b+=`value /= ${m}(i32(uniforms.kernelSize) - pad);`;let[_,w,x,S,C]=Zm(c,u);_.push(...te(t.dims,c));let T=["rank"];return{name:e,shaderCache:{hint:`${n.cacheKey};${x};${S};${C}`,inputDependencies:T},getRunData:()=>({outputs:[{dims:c,dataType:t.dataType}],dispatchGroup:{x:Math.ceil(L.size(c)/64)},programUniforms:_}),getShaderSource:z=>Qm(z,p,t.dims.length,c.length,u,g,b,0,w,x,S,C)}},th=e=>{let t=e.count_include_pad!==0,o=Xm(e);if(o.ceilMode!==0)throw new Error("using ceil() in shape computation is not yet supported for AveragePool");let n={countIncludePad:t,...o,cacheKey:""};return{...n,cacheKey:s$(n)}},rh=(e,t)=>{Cn(e.inputs),e.compute(eh("AveragePool",e.inputs[0],!1,t))},nh={autoPad:"",ceilMode:0,countIncludePad:!1,kernelShape:[],strides:[],pads:[],storageOrder:0,dilations:[]},ih=e=>{let t=e.format;return{format:t,...nh,cacheKey:t}},oh=(e,t)=>{Cn(e.inputs),e.compute(eh("GlobalAveragePool",e.inputs[0],!0,t))},ah=(e,t,o,n)=>{let[u,c]=Jm(t,n,o),p=` + value = max(x_val, value); + `,m="",g=F("x",t.dataType,t.dims.length),b=["rank"],[_,w,x,S,C]=Zm(c,u);return _.push(...te(t.dims,c)),{name:e,shaderCache:{hint:`${n.cacheKey};${x};${S};${C}`,inputDependencies:b},getRunData:()=>({outputs:[{dims:c,dataType:t.dataType}],dispatchGroup:{x:Math.ceil(L.size(c)/64)},programUniforms:_}),getShaderSource:T=>Qm(T,g,t.dims.length,c.length,u,p,m,t.dataType===10?-65504:-1e5,w,x,S,C)}},sh=(e,t)=>{Cn(e.inputs),e.compute(ah("MaxPool",e.inputs[0],!1,t))},uh=e=>{let t=e.storage_order,o=e.dilations,n=Xm(e);if(t!==0)throw new Error("column major storage order is not yet supported for MaxPool");if(n.ceilMode!==0)throw new Error("using ceil() in shape computation is not yet supported for MaxPool");let u={storageOrder:t,dilations:o,...n,cacheKey:""};return{...u,cacheKey:u$(u)}},lh=e=>{let t=e.format;return{format:t,...nh,cacheKey:t}},dh=(e,t)=>{Cn(e.inputs),e.compute(ah("GlobalMaxPool",e.inputs[0],!0,t))}});var d$,c$,ph,fh,mh=X(()=>{"use strict";ce();be();We();we();d$=(e,t)=>{if(e.length<2||e.length>3)throw new Error("DequantizeLinear requires 2 or 3 inputs.");if(e.length===3&&e[1].dims===e[2].dims)throw new Error("x-scale and x-zero-point must have the same shape.");if(e.length===3&&e[0].dataType!==e[2].dataType)throw new Error("x and x-zero-point must have the same data type.");if(e[0].dataType===6&&e.length>2)throw new Error("In the case of dequantizing int32 there is no zero point.");if(e[1].dims.length!==0&&e[1].dims.length!==1&&e[1].dims.length!==e[0].dims.length)throw new Error("scale input must be a scalar, a 1D tensor, or have the same rank as the input tensor.");if(e.length>2){if(e[0].dataType!==e[2].dataType)throw new Error("x and x-zero-point must have the same data type.");if(e[1].dims.length!==e[2].dims.length)throw new Error("scale and zero-point inputs must have the same rank.");if(!e[1].dims.map((o,n)=>o===e[2].dims[n]).reduce((o,n)=>o&&n,!0))throw new Error("scale and zero-point inputs must have the same shape.")}if(t.blockSize>0){if(e[1].dims.length===0||e[1].dims.length===1&&e[1].dims[0]===1)throw new Error("blockSize must be set only for block quantization.");if(!e[1].dims.map((u,c)=>c===t.axis||u===e[0].dims[c]).reduce((u,c)=>u&&c,!0))throw new Error("For block qunatization, scale input shape to match the input shape except for the axis");if(e[1].dims.length!==e[0].dims.length)throw new Error("For block qunatization the scale input rank must be the same as the x rank.");let o=e[0].dims[t.axis],n=e[1].dims[t.axis];if(t.blockSizeMath.ceil(o/(n-1)-1))throw new Error("blockSize must be with in the range [ceil(dI / Si), ceil(dI / (Si - 1) - 1)].")}},c$=(e,t)=>{let o=L.normalizeAxis(t.axis,e[0].dims.length),n=e[0].dataType,u=n===3,c=e[0].dims,p=e[1].dataType,m=L.size(c),g=n===3||n===2,b=g?[Math.ceil(L.size(e[0].dims)/4)]:e[0].dims,_=e[1].dims,w=e.length>2?e[2]:void 0,x=w?g?[Math.ceil(L.size(w.dims)/4)]:w.dims:void 0,S=_.length===0||_.length===1&&_[0]===1,C=S===!1&&_.length===1,T=Ae(m),z=S&&(!g||T===4),k=z?T:1,A=z&&!g?T:1,O=F("input",g?12:n,b.length,A),B=F("scale",p,_.length),W=w?F("zero_point",g?12:n,x.length):void 0,N=Y("output",p,c.length,k),q=[O,B];W&&q.push(W);let K=[b,_];w&&K.push(x);let Q=[{type:12,data:m/k},{type:12,data:o},{type:12,data:t.blockSize},...te(...K,c)],ne=se=>{let ue=[{name:"output_size",type:"u32"},{name:"axis",type:"u32"},{name:"block_size",type:"u32"}];return` + ${se.registerUniforms(ue).declareVariables(...q,N)} + ${se.mainStart()} + ${se.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let output_indices = ${N.offsetToIndices("global_idx")}; + + // Set input x + ${g?` + let input = ${O.getByOffset("global_idx / 4")}; + let x_vec = ${u?"unpack4xI8(input)":"unpack4xU8(input)"}; + let x_value = ${k===1?"x_vec[global_idx % 4]":"x_vec"};`:`let x_value = ${O.getByOffset("global_idx")};`}; + + // Set scale input + ${S?`let scale_value= ${B.getByOffset("0")}`:C?` + let scale_index = ${N.indicesGet("output_indices","uniforms.axis")}; + let scale_value= ${B.getByOffset("scale_index")};`:` + var scale_indices: ${B.type.indices} = output_indices; + let index = ${B.indicesGet("scale_indices","uniforms.axis")} / uniforms.block_size; + ${B.indicesSet("scale_indices","uniforms.axis","index")}; + let scale_value= ${B.getByIndices("scale_indices")};`}; + + // Set zero-point input + ${W?S?g?` + let zero_point_input = ${W.getByOffset("0")}; + let zero_point_vec = ${u?"unpack4xI8(zero_point_input)":"unpack4xU8(zero_point_input)"}; + let zero_point_value= zero_point_vec[0]`:`let zero_point_value = ${W.getByOffset("0")}`:C?g?` + let zero_point_index = ${N.indicesGet("output_indices","uniforms.axis")}; + let zero_point_input = ${W.getByOffset("zero_point_index / 4")}; + let zero_point_vec = ${u?"unpack4xI8(zero_point_input)":"unpack4xU8(zero_point_input)"}; + let zero_point_value = zero_point_vec[zero_point_index % 4]`:` + let zero_point_index = ${N.indicesGet("output_indices","uniforms.axis")}; + let zero_point_value = ${W.getByOffset("zero_point_index")};`:g?` + let zero_point_offset = ${B.indicesToOffset("scale_indices")}; + let zero_point_input = ${W.getByOffset("zero_point_offset / 4")}; + let zero_point_vec = ${u?"unpack4xI8(zero_point_input)":"unpack4xU8(zero_point_input)"}; + let zero_point_value = zero_point_vec[zero_point_offset % 4];`:`let zero_point_value = ${W.getByIndices("scale_indices")};`:`let zero_point_value = ${g?u?"i32":"u32":O.type.value}(0);`}; + // Compute and write output + ${N.setByOffset("global_idx",`${N.type.value}(x_value - zero_point_value) * scale_value`)}; + }`};return{name:"DequantizeLinear",shaderCache:{hint:t.cacheKey,inputDependencies:W?["rank","rank","rank"]:["rank","rank"]},getShaderSource:ne,getRunData:()=>({outputs:[{dims:c,dataType:p}],dispatchGroup:{x:Math.ceil(m/k/64),y:1,z:1},programUniforms:Q})}},ph=(e,t)=>{d$(e.inputs,t),e.compute(c$(e.inputs,t))},fh=e=>pe({axis:e.axis,blockSize:e.blockSize})});var p$,f$,hh,gh=X(()=>{"use strict";nt();ce();we();p$=(e,t,o)=>{let n=e===t,u=et&&o>0;if(n||u||c)throw new Error("Range these inputs' contents are invalid.")},f$=(e,t,o,n)=>{let u=Math.abs(Math.ceil((t-e)/o)),c=[u],p=u,m=[{type:12,data:p},{type:n,data:e},{type:n,data:o},...te(c)],g=b=>{let _=Y("output",n,c.length),w=_.type.value,x=[{name:"outputSize",type:"u32"},{name:"start",type:w},{name:"delta",type:w}];return` + ${b.registerUniforms(x).declareVariables(_)} + ${b.mainStart()} + ${b.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + output[global_idx] = uniforms.start + ${w}(global_idx) * uniforms.delta; + }`};return{name:"Range",shaderCache:{hint:`${n}`},getShaderSource:g,getRunData:()=>({outputs:[{dims:c,dataType:n}],dispatchGroup:{x:Math.ceil(p/64)},programUniforms:m})}},hh=e=>{let t=0,o=0,n=0;e.inputs[0].dataType===6?(t=e.inputs[0].getInt32Array()[0],o=e.inputs[1].getInt32Array()[0],n=e.inputs[2].getInt32Array()[0]):e.inputs[0].dataType===1&&(t=e.inputs[0].getFloat32Array()[0],o=e.inputs[1].getFloat32Array()[0],n=e.inputs[2].getFloat32Array()[0]),ze.webgpu.validateInputContent&&p$(t,o,n),e.compute(f$(t,o,n,e.inputs[0].dataType),{inputs:[]})}});var m$,h$,yh,bh,_h=X(()=>{"use strict";ce();be();We();we();m$=(e,t,o,n)=>{if(e!=="none"&&n!=="i32"&&n!=="u32"&&n!=="f32")throw new Error(`Input ${n} is not supported with reduction ${e}.`);let u=`{ + var oldValue = 0; + loop { + let newValueF32 =`,c=`; + let newValue = bitcast(newValueF32); + let res = atomicCompareExchangeWeak(&${t}, oldValue, newValue); + if res.exchanged { + break; + } + oldValue = res.old_value; + } + }`;switch(e){case"none":return`${t}=${o};`;case"add":return n==="i32"||n==="u32"?`atomicAdd(&${t}, bitcast<${n}>(${o}));`:` + ${u}bitcast<${n}>(oldValue) + (${o})${c}`;case"max":return n==="i32"||n==="u32"?`atomicMax(&${t}, bitcast<${n}>(${o}));`:` + ${u}max(bitcast(oldValue), (${o}))${c}`;case"min":return n==="i32"||n==="u32"?`atomicMin(&${t}, bitcast<${n}>(${o}));`:`${u}min(bitcast<${n}>(oldValue), (${o}))${c}`;case"mul":return`${u}(bitcast<${n}>(oldValue) * (${o}))${c}`;default:throw new Error(`Reduction ${e} is not supported.`)}},h$=(e,t)=>{let o=e[0].dims,n=e[1].dims,u=o,c=1,p=Math.ceil(L.sizeToDimension(n,n.length-1)/c),m=n[n.length-1],g=L.sizeFromDimension(o,m),b=[{type:12,data:p},{type:12,data:m},{type:12,data:g},...te(e[1].dims,e[2].dims,u)],_=w=>{let x=F("indices",e[1].dataType,e[1].dims.length),S=F("updates",e[2].dataType,e[2].dims.length,c),C=t.reduction!=="none"&&t.reduction!==""?Gc("output",e[0].dataType,u.length):Y("output",e[0].dataType,u.length,c);return` + ${w.registerUniform("output_size","u32").registerUniform("last_index_dimension","u32").registerUniform("num_updates_elements","u32").declareVariables(x,S,C)} + ${w.mainStart()} + ${w.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + var data_offset = 0u; + let indices_start = uniforms.last_index_dimension * global_idx; + let indices_end = indices_start + uniforms.last_index_dimension; + for (var i = indices_start; i < indices_end; i++) { + var index = i32(indices[i].x); + ${e[0].dims.length===1?` + let element_count_dim = uniforms.output_strides; + let dim_value = uniforms.output_shape;`:` + let element_count_dim = uniforms.output_strides[i - indices_start]; + let dim_value = uniforms.output_shape[i - indices_start];`} + if (index >= 0) { + if (index >= i32(dim_value)) { + index = i32(dim_value - 1); + } + } else { + if (index < -i32(dim_value)) { + index = 0; + } else { + index += i32(dim_value); + } + } + data_offset += u32((u32(index) * element_count_dim)); + } + + for (var i = 0u; i < uniforms.num_updates_elements; i++) { + let value = updates[uniforms.num_updates_elements * global_idx + i]; + ${m$(t.reduction,"output[data_offset + i]","value",C.type.value)} + } + + }`};return{name:"ScatterND",shaderCache:{hint:`${t.cacheKey}_${t.reduction}`,inputDependencies:["rank","rank"]},getRunData:()=>({outputs:[{dims:u,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(p/64)},programUniforms:b}),getShaderSource:_}},yh=e=>pe({reduction:e.reduction}),bh=(e,t)=>{e.compute(h$(e.inputs,t),{inputs:[e.inputs[1],e.inputs[2]],outputs:[]})}});var g$,y$,b$,wh,_$,w$,v$,x$,$$,C$,S$,T$,vh,I$,A$,k$,E$,P$,xh,$h,Ch=X(()=>{"use strict";ce();be();We();we();g$=(e,t)=>{if(e.every(o=>o>0||(()=>{throw new Error("Resize requires scales input values to be positive")})),e.length>0){if(t.mode==="linear"){if(!(e.length===2||e.length===3||e.length===4&&e[0]===1&&e[1]===1||e.length===4&&e[0]===1&&e[3]===1||e.length===5&&e[0]===1&&e[1]===1))throw new Error(`For linear mode, Resize requires scales to be 2D, 3D, 4D with either two outermost or one innermost and + one outermost scale values equal to 1, or 5D with two outermost scale values equal to 1`)}else if(t.mode==="cubic"&&!(e.length===2||e.length===4&&e[0]===1&&e[1]===1||e.length===4&&e[0]===1&&e[3]===1))throw new Error("Resize requires scales input size to be 2 or 4 for cubic mode")}},y$=(e,t,o)=>{t.every(u=>u>=0&&u{throw new Error("Resize requires axes input values to be positive and less than rank")}));let n=new Array(o).fill(1);return t.forEach((u,c)=>n[u]=e[c]),n},b$=(e,t,o,n,u,c)=>{let[p,m,g]=o>10?[1,2,3]:[-1,e.length>1?1:-1,-1],b=e[0].dims.length;if(p>0&&e.length>p&&e[p].dims.length>0)e[p].getFloat32Array().forEach(_=>c.push(_));else if(t.coordinateTransformMode==="tf_crop_and_resize")throw new Error("Resize requires RoI input to be specified when coordinateTransformMode is tfCropAndResize");if(m>0&&e.length>m&&e[m].dims.length===1&&e[m].dims[0]>0){if(e[m].getFloat32Array().forEach(_=>n.push(_)),n.length!==0&&n.length!==b&&o>=18&&n.length!==t.axes.length)throw new Error("Resize requires scales input size to be same as input rank or axes size for opset 18 and up");g$(n,t),t.axes.length>0&&y$(n,t.axes,b).forEach((_,w)=>n[w]=_)}if(g>0&&e.length>g&&e[g].dims.length===1&&e[g].dims[0]>0&&(e[g].getBigInt64Array().forEach(_=>u.push(Number(_))),u.length!==0&&u.length!==b&&o>=18&&u.length!==t.axes.length))throw new Error("Resize requires sizes input size to be same as input rank or axes size for opset 18 and up");if(t.axes.length>0){if(n.length!==0&&n.length!==t.axes.length)throw new Error('Resize requires "scales" input size to be of axes rank when axes attributes is specified');if(u.length!==0&&u.length!==t.axes.length)throw new Error('Resize requires "sizes" input size to be of rank axes rank when axes attributes is specified')}if(typeof n<"u"&&typeof u<"u"&&n.length>0&&u.length>b)throw new Error("Resize requires only of scales or sizes to be specified")},wh=(e,t,o,n)=>` + // The whole part and the fractional part are calculated separately due to inaccuracy of floating + // point division. As an example, f32(21) / f32(7) may evaluate to 2.99... instead of 3, causing an + // offset-by-one error later in floor(). + let big = (${e}) * (${t}); + let whole = ${n}(big / (${o})); + let fract = ${n}(big % (${o})) / ${n}(${o}); + return whole + fract; +`,_$=(e,t)=>`fn getOriginalCoordinateFromResizedCoordinate(xResized: u32, xScale: f32, lengthResized: u32, + lengthOriginal: u32, roiStart: f32, roiEnd: f32) -> ${t} { `+(()=>{switch(e){case"asymmetric":return` + if (xScale < 1.0 || floor(xScale) != xScale) { + return ${t}(xResized) / ${t}(xScale); + } else { + ${wh("xResized","lengthOriginal","lengthResized",t)} + } + `;case"pytorch_half_pixel":return`if (lengthResized > 1) { + return (${t}(xResized) + 0.5) / ${t}(xScale) - 0.5; + } else { + return 0.0; + }`;case"tf_half_pixel_for_nn":return`return (${t}(xResized) + 0.5) / ${t}(xScale);`;case"align_corners":return`if (lengthResized == 1) { + return 0.0; + } else { + ${wh("xResized","lengthOriginal - 1","lengthResized - 1",t)} + }`;case"tf_crop_and_resize":return`if (lengthResized > 1) { + return ${t}(roiStart) * ${t}(lengthOriginal - 1) + + (${t}(xResized) * ${t}(roiEnd - roiStart) * ${t}(lengthOriginal - 1)) / + ${t}(lengthResized - 1); + } else { + return 0.5 * ${t}(roiStart + roiEnd) * ${t}(lengthOriginal - 1); + }`;case"half_pixel_symmetric":return`const outputWidth = ${t}xScale * ${t}(lengthResized); + const adjustment = ${t}(lengthResized) / outputWidth; + const center = ${t}(lengthOriginal) / 2; + const offset = center * (1 - adjustment); + return offset + ((${t}(xResized) + 0.5) / ${t}(xScale)) - 0.5;`;case"half_pixel":return`return ((${t}(xResized) + 0.5) / ${t}(xScale)) - 0.5;`;default:throw new Error(`Coordinate transform mode ${e} is not supported`)}})()+"}",w$=(e,t,o)=>`fn getNearestPixelFromOriginal(xOriginal: ${o}, isDownSample: bool) -> ${o} {`+(()=>{switch(e){case"round_prefer_ceil":return"if (fract(xOriginal) == 0.5) { return ceil(xOriginal); } else { return round(xOriginal); }";case"floor":return"return floor(xOriginal);";case"ceil":return"return ceil(xOriginal);";case"round_prefer_floor":return"if (fract(xOriginal) == 0.5) { return floor(xOriginal); } else { return round(xOriginal); }";case"simple":default:if(t<11)return"if (isDownSample) { return ceil(xOriginal); } else { return xOriginal; }";throw new Error(`Nearest mode ${e} is not supported`)}})()+"}",v$=(e,t,o)=>{let n=new Array(o).fill(0).concat(new Array(o).fill(1)),u=e.length===0?n:e.slice();return t.length>0?(t.forEach((c,p)=>{n[c]=u[p],n[p+o]=u[t.length+p]}),n):u},x$=(e,t,o,n)=>{let u=[];if(o.length>0)if(n.length>0){if(e.forEach(c=>u.push(c)),Math.max(...n)>e.length)throw new Error("axes is out of bound");n.forEach((c,p)=>u[c]=o[p])}else o.forEach(c=>u.push(c));else{if(t.length===0)throw new Error("Resize requires either scales or sizes.");u=e.map((c,p)=>Math.round(c*t[p]))}return u},$$=(e,t,o)=>{let n=(()=>{switch(o.keepAspectRatioPolicy){case"not_larger":return o.axes.length>0?Math.min(...o.axes.map(c=>t[c]),Number.MAX_VALUE):Math.min(...t,Number.MAX_VALUE);case"not_smaller":return o.axes.length>0?Math.max(...o.axes.map(c=>t[c]),Number.MIN_VALUE):Math.max(...t,Number.MIN_VALUE);default:throw new Error(`Keep aspect ratio policy ${o.keepAspectRatioPolicy} is not supported`)}})();t.fill(1,0,t.length);let u=e.slice();return o.axes.length>0?(o.axes.forEach(c=>t[c]=n),o.axes.forEach(c=>u[c]=Math.round(e[c]*t[c]))):(t.fill(n,0,t.length),u.forEach((c,p)=>u[p]=Math.round(c*t[p]))),u},C$=(e,t,o,n,u)=>` + fn calculateOriginalIndicesFromOutputIndices(output_indices: ${e.type.indices}) -> array<${e.type.value}, ${o.length}> { + var original_indices: array<${e.type.value}, ${o.length}>; + for (var i:u32 = 0; i < ${o.length}; i++) { + var output_index = ${e.indicesGet("output_indices","i")}; + var scale = ${ae("uniforms.scales","i",n)}; + var roi_low = ${ae("uniforms.roi","i",u)}; + var roi_hi = ${ae("uniforms.roi",`i + ${t.length}`,u)}; + if (scale == 1.0) { + original_indices[i] = ${e.type.value}(output_index); + } else { + var input_shape_i = ${ae("uniforms.input_shape","i",t.length)}; + var output_shape_i = ${ae("uniforms.output_shape","i",o.length)}; + original_indices[i] = getOriginalCoordinateFromResizedCoordinate(output_index, scale, output_shape_i, + input_shape_i, roi_low, roi_hi); + } + } + return original_indices; + }`,S$=(e,t,o,n,u,c,p)=>` + fn calculateInputIndicesFromOutputIndices(output_indices: ${t.type.indices}) -> ${e.type.indices} { + var input_indices: ${e.type.indices}; + for (var i:u32 = 0; i < ${n.length}; i++) { + var output_index = ${t.indicesGet("output_indices","i")}; + var input_index: u32; + var scale = ${ae("uniforms.scales","i",u)}; + if (scale == 1.0) { + input_index = output_index; + } else { + var roi_low = ${ae("uniforms.roi","i",c)}; + var roi_hi = ${ae("uniforms.roi",`i + ${o.length}`,c)}; + var input_shape_i = ${ae("uniforms.input_shape","i",o.length)}; + var output_shape_i = ${ae("uniforms.output_shape","i",n.length)}; + var original_idx = getOriginalCoordinateFromResizedCoordinate(output_index, scale, output_shape_i, + input_shape_i, roi_low, roi_hi); + if (!${p} || (original_idx >= 0 && original_idx < ${t.type.value}(input_shape_i))) { + if (original_idx < 0) { + input_index = 0; + } else if (original_idx > ${t.type.value}(input_shape_i - 1)) { + input_index = input_shape_i - 1; + } else { + input_index = u32(getNearestPixelFromOriginal(original_idx, scale < 1)); + } + } else { + input_index = u32(original_idx); + } + } + ${e.indicesSet("input_indices","i","input_index")} + } + return input_indices; + }`,T$=(e,t)=>` + fn checkInputIndices(input_indices: ${e.type.indices}) -> bool { + for (var i:u32 = 0; i < ${t.length}; i++) { + var input_index = ${e.indicesGet("input_indices","i")}; + if (input_index < 0 || input_index >= ${ae("uniforms.input_shape","i",t.length)}) { + return false; + } + } + return true; + }`,vh=(e,t,o,n)=>e.rank>n?` + ${e.indicesSet("input_indices",t,"channel")}; + ${e.indicesSet("input_indices",o,"batch")}; +`:"",I$=(e,t,o,n,u)=>{let[p,m,g,b]=o.length===2?[-1,0,1,-1]:[0,2,3,1],_=e.type.value;return` + fn getInputValue(batch: u32, channel: u32, row: u32, col: u32) -> ${_} { + var input_indices: ${e.type.indices}; + ${e.indicesSet("input_indices",m,`max(0, min(row, ${o[m]} - 1))`)}; + ${e.indicesSet("input_indices",g,`max(0, min(col, ${o[g]} - 1))`)}; + ${vh(e,b,p,2)} + return ${e.getByIndices("input_indices")}; + } + + fn bilinearInterpolation(output_indices: ${t.type.indices}) -> ${_} { + var originalIndices = calculateOriginalIndicesFromOutputIndices(output_indices); + var row:${_} = originalIndices[${m}]; + var col:${_} = originalIndices[${g}]; + ${n?`if (row < 0 || row > (${o[m]} - 1) || col < 0 || col > (${o[g]} - 1)) { + return ${u}; + }`:""}; + row = max(0, min(row, ${o[m]} - 1)); + col = max(0, min(col, ${o[g]} - 1)); + var row1: u32 = u32(row); + var col1: u32 = u32(col); + var row2: u32 = u32(row + 1); + var col2: u32 = u32(col + 1); + var channel: u32 = ${o.length>2?`u32(originalIndices[${b}])`:"0"}; + var batch: u32 = ${o.length>2?`u32(originalIndices[${p}])`:"0"}; + var x11: ${_} = getInputValue(batch, channel, row1, col1); + var x12: ${_} = getInputValue(batch, channel, row1, col2); + var x21: ${_} = getInputValue(batch, channel, row2, col1); + var x22: ${_} = getInputValue(batch, channel, row2, col2); + var dx1: ${_} = abs(row - ${_}(row1)); + var dx2: ${_} = abs(${_}(row2) - row); + var dy1: ${_} = abs(col - ${_}(col1)); + var dy2: ${_} = abs(${_}(col2) - col); + if (row1 == row2) { + dx1 = 0.5; + dx2 = 0.5; + } + if (col1 == col2) { + dy1 = 0.5; + dy2 = 0.5; + } + return (x11 * dx2 * dy2 + x12 * dx2 * dy1 + x21 * dx1 * dy2 + x22 * dx1 * dy1); + }`},A$=(e,t,o,n,u,c,p,m,g,b)=>{let _=o.length===2,w=!0,[x,S]=_?[0,1]:w?[2,3]:[1,2],C=e.type.value,T=z=>{let k=z===x?"row":"col";return` + fn ${k}CubicInterpolation(input_indices: ${e.type.indices}, output_indices: ${t.type.indices}) -> ${C} { + var output_index = ${t.indicesGet("output_indices",z)}; + var originalIdx: ${C} = getOriginalCoordinateFromResizedCoordinate(output_index, ${u[z]}, + ${n[z]}, ${o[z]}, ${c[z]}, ${c[z]} + ${o.length}); + var fractOriginalIdx: ${C} = originalIdx - floor(originalIdx); + var coefs = getCubicInterpolationCoefs(fractOriginalIdx); + + if (${m} && (originalIdx < 0 || originalIdx > (${o[z]} - 1))) { + return ${g}; + } + var data: array<${C}, 4> = array<${C}, 4>(0.0, 0.0, 0.0, 0.0); + for (var i: i32 = -1; i < 3; i++) { + var ${k}: ${C} = originalIdx + ${C}(i); + if (${k} < 0 || ${k} >= ${o[z]}) { + ${b?`coefs[i + 1] = 0.0; + continue;`:m?`return ${g};`:`${k} = max(0, min(${k}, ${o[z]} - 1));`}; + } + var input_indices_copy: ${e.type.indices} = input_indices; + ${e.indicesSet("input_indices_copy",z,`u32(${k})`)}; + data[i + 1] = ${z===x?e.getByIndices("input_indices_copy"):"rowCubicInterpolation(input_indices_copy, output_indices)"}; + } + return cubicInterpolation1D(data, coefs); + }`};return` + ${T(x)}; + ${T(S)}; + fn getCubicInterpolationCoefs(s: ${C}) -> array<${C}, 4> { + var absS = abs(s); + var coeffs: array<${C}, 4> = array<${C}, 4>(0.0, 0.0, 0.0, 0.0); + var oneMinusAbsS: ${C} = 1.0 - absS; + var twoMinusAbsS: ${C} = 2.0 - absS; + var onePlusAbsS: ${C} = 1.0 + absS; + coeffs[0] = ((${p} * onePlusAbsS - 5 * ${p}) * onePlusAbsS + 8 * ${p}) * onePlusAbsS - 4 * ${p}; + coeffs[1] = ((${p} + 2) * absS - (${p} + 3)) * absS * absS + 1; + coeffs[2] = ((${p} + 2) * oneMinusAbsS - (${p} + 3)) * oneMinusAbsS * oneMinusAbsS + 1; + coeffs[3] = ((${p} * twoMinusAbsS - 5 * ${p}) * twoMinusAbsS + 8 * ${p}) * twoMinusAbsS - 4 * ${p}; + return coeffs; + } + + fn cubicInterpolation1D(x: array<${C}, 4>, coefs: array<${C}, 4>) -> ${C} { + var coefsSum: ${C} = coefs[0] + coefs[1] + coefs[2] + coefs[3]; + return (x[0] * coefs[0] + x[1] * coefs[1]+ x[2] * coefs[2]+ x[3] * coefs[3]) / coefsSum; + } + + fn bicubicInterpolation(output_indices: ${t.type.indices}) -> ${C} { + var input_indices: ${e.type.indices} = output_indices; + return colCubicInterpolation(input_indices, output_indices); + } + `},k$=(e,t,o,n,u)=>{let[p,m,g,b,_]=o.length===3?[-1,0,1,2,-1]:[0,2,3,4,1],w=e.type.value;return` + fn getInputValue(batch: u32, channel: u32, depth:u32, height: u32, width: u32) -> ${w} { + var input_indices: ${e.type.indices}; + ${e.indicesSet("input_indices",m,`max(0, min(depth, ${o[m]} - 1))`)}; + ${e.indicesSet("input_indices",g,`max(0, min(height, ${o[g]} - 1))`)}; + ${e.indicesSet("input_indices",b,`max(0, min(width, ${o[b]} - 1))`)}; + ${vh(e,_,p,3)} + return ${e.getByIndices("input_indices")}; + } + + fn trilinearInterpolation(output_indices: ${t.type.indices}) -> ${w} { + var originalIndices = calculateOriginalIndicesFromOutputIndices(output_indices); + var depth:${w} = originalIndices[${m}]; + var height:${w} = originalIndices[${g}]; + var width:${w} = originalIndices[${b}]; + ${n?`if (depth < 0 || depth > (${o[m]} - 1) || height < 0 || height > (${o[g]} - 1) || width < 0 || (width > ${o[b]} - 1)) { + return ${u}; + }`:""}; + + depth = max(0, min(depth, ${o[m]} - 1)); + height = max(0, min(height, ${o[g]} - 1)); + width = max(0, min(width, ${o[b]} - 1)); + var depth1: u32 = u32(depth); + var height1: u32 = u32(height); + var width1: u32 = u32(width); + var depth2: u32 = u32(depth + 1); + var height2: u32 = u32(height + 1); + var width2: u32 = u32(width + 1); + var channel: u32 = ${o.length>3?`u32(originalIndices[${_}])`:"0"}; + var batch: u32 = ${o.length>3?`u32(originalIndices[${p}])`:"0"}; + + var x111: ${w} = getInputValue(batch, channel, depth1, height1, width1); + var x112: ${w} = getInputValue(batch, channel, depth1, height1, width2); + var x121: ${w} = getInputValue(batch, channel, depth1, height2, width1); + var x122: ${w} = getInputValue(batch, channel, depth1, height2, width2); + var x211: ${w} = getInputValue(batch, channel, depth2, height1, width1); + var x212: ${w} = getInputValue(batch, channel, depth2, height1, width2); + var x221: ${w} = getInputValue(batch, channel, depth2, height2, width1); + var x222: ${w} = getInputValue(batch, channel, depth2, height2, width2); + var dx1: ${w} = abs(depth - ${w}(depth1)); + var dx2: ${w} = abs(${w}(depth2) - depth); + var dy1: ${w} = abs(height - ${w}(height1)); + var dy2: ${w} = abs(${w}(height2) - height); + var dz1: ${w} = abs(width - ${w}(width1)); + var dz2: ${w} = abs(${w}(width2) - width); + if (depth1 == depth2) { + dx1 = 0.5; + dx2 = 0.5; + } + if (height1 == height2) { + dy1 = 0.5; + dy2 = 0.5; + } + if (width1 == width2) { + dz1 = 0.5; + dz2 = 0.5; + } + return (x111 * dx2 * dy2 * dz2 + x112 * dx2 * dy2 * dz1 + x121 * dx2 * dy1 *dz2 + x122 * dx2 * dy1 * dz1 + + x211 * dx1 * dy2 * dz2 + x212 * dx1 * dy2 * dz1 + x221 * dx1 * dy1 *dz2 + x222 * dx1 * dy1 * dz1); + }`},E$=(e,t,o,n,u,c)=>{let p=e.dims,m=v$(c,t.axes,p.length),g=x$(p,n,u,t.axes),b=n.slice();n.length===0&&(b=p.map((A,O)=>A===0?1:g[O]/A),t.keepAspectRatioPolicy!=="stretch"&&(g=$$(p,b,t)));let _=Y("output",e.dataType,g.length),w=F("input",e.dataType,p.length),x=L.size(g),S=p.length===g.length&&p.every((A,O)=>A===g[O]),C=t.coordinateTransformMode==="tf_crop_and_resize",T=t.extrapolationValue,z=w.type.value,k=A=>` + ${S?"":` + ${_$(t.coordinateTransformMode,z)}; + ${(()=>{switch(t.mode){case"nearest":return` + ${T$(w,p)}; + ${w$(t.nearestMode,o,z)}; + ${S$(w,_,p,g,b.length,m.length,C)}; + `;case"linear":return` + ${C$(_,p,g,b.length,m.length)}; + ${(()=>{if(p.length===2||p.length===4)return`${I$(w,_,p,C,T)}`;if(p.length===3||p.length===5)return`${k$(w,_,p,C,T)}`;throw Error("Linear mode only supports input dims 2, 3, 4 and 5 are supported in linear mode.")})()}; + `;case"cubic":return` + ${(()=>{if(p.length===2||p.length===4)return`${A$(w,_,p,g,b,m,t.cubicCoeffA,C,t.extrapolationValue,t.excludeOutside)}`;throw Error("Cubic mode only supports input dims 2 and 4 are supported in linear mode.")})()}; + `;default:throw Error("Invalid resize mode")}})()}; + `} + ${A.registerUniform("output_size","u32").registerUniform("scales","f32",b.length).registerUniform("roi","f32",m.length).declareVariables(w,_)} + ${A.mainStart()} + ${A.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + ${S?"output[global_idx] = input[global_idx];":` + let output_indices = ${_.offsetToIndices("global_idx")}; + var input_indices: ${w.type.indices}; + ${(()=>{switch(t.mode){case"nearest":return`input_indices = calculateInputIndicesFromOutputIndices(output_indices); + if (checkInputIndices(input_indices)) { + output[global_idx] = ${w.getByIndices("input_indices")}; + } else { + output[global_idx] = ${t.extrapolationValue}; + }`;case"linear":return`output[global_idx] = ${p.length===2||p.length===4?"bilinearInterpolation":"trilinearInterpolation"}(output_indices);`;case"cubic":return"output[global_idx] = bicubicInterpolation(output_indices);";default:throw Error(`Unsupported resize mode: ${t.mode}`)}})()}; +`} + }`;return{name:"Resize",shaderCache:{hint:`${t.cacheKey}|${o}|${b.length>0?t.mode==="cubic"?b:b.length:""}|${u.length>0?u:""}|${m.length>0?m:""}|${S}|${t.mode==="nearest"?p.length:p}`,inputDependencies:["rank"]},getShaderSource:k,getRunData:()=>({outputs:[{dims:g,dataType:e.dataType}],dispatchGroup:{x:Math.ceil(x/64)},programUniforms:[{type:12,data:x},{type:1,data:b},{type:1,data:m},...te(p,g)]})}},P$=e=>{let t=e.customDataBuffer;return new Uint32Array(t,t.byteOffset,1)[0]},xh=(e,t)=>{let o=[],n=[],u=[],c=P$(e);if(t.antialias!==0)throw Error("Only default value (0) for Antialias attribute is supported");b$(e.inputs,t,c,o,n,u),e.compute(E$(e.inputs[0],t,c,o,n,u),{inputs:[0]})},$h=e=>{let t=e.antialias,o=e.axes,n=e.coordinateTransformMode,u=e.cubicCoeffA,c=e.excludeOutside!==0,p=e.extrapolationValue,m=e.keepAspectRatioPolicy,g=e.mode,b=e.nearestMode===""?"simple":e.nearestMode;return pe({antialias:t,axes:o,coordinateTransformMode:n,cubicCoeffA:u,excludeOutside:c,extrapolationValue:p,keepAspectRatioPolicy:m,mode:g,nearestMode:b})}});var O$,z$,Sh,Th=X(()=>{"use strict";ce();be();we();O$=e=>{if(!e||e.length<3)throw new Error("layerNorm requires at least 3 inputs.");let t=e[0],o=e[1],n=e[2];if(t.dataType!==o.dataType||t.dataType!==n.dataType)throw new Error("All inputs must have the same data type");if(t.dims.length!==3&&t.dims.length!==2)throw new Error("Input must be 2D or 3D");if(o.dims.length!==3&&o.dims.length!==2)throw new Error("Skip must be 2D or 3D");let u=t.dims[t.dims.length-1],c=t.dims[t.dims.length-2];if(o.dims[o.dims.length-1]!==u)throw new Error("Skip must have the same hidden size as input");if(o.dims[o.dims.length-2]!==c)throw new Error("Skip must have the same sequence length as input");if(n.dims.length!==1)throw new Error("Gamma must be 1D");if(n.dims[n.dims.length-1]!==u)throw new Error("Gamma must have the same hidden size as input");if(e.length>3){let p=e[3];if(p.dims.length!==1)throw new Error("Beta must be 1D");if(p.dims[p.dims.length-1]!==u)throw new Error("Beta must have the same hidden size as input")}if(e.length>4){let p=e[4];if(p.dims.length!==1)throw new Error("Bias must be 1D");if(p.dims[p.dims.length-1]!==u)throw new Error("Bias must have the same hidden size as input")}},z$=(e,t,o,n)=>{let u=t.simplified,c=e[0].dims,p=L.size(c),m=c,g=p,b=c.slice(-1)[0],_=n?c.slice(0,-1).concat(1):[],w=!u&&e.length>3,x=e.length>4,S=n&&o>1,C=n&&o>2,T=o>3,z=64,k=Ae(b),A=[{type:12,data:g},{type:12,data:k},{type:12,data:b},{type:1,data:t.epsilon}],O=W=>{let N=[{name:"output_size",type:"u32"},{name:"components",type:"u32"},{name:"hidden_size",type:"u32"},{name:"epsilon",type:"f32"}],q=[F("x",e[0].dataType,e[0].dims,k),F("skip",e[1].dataType,e[1].dims,k),F("gamma",e[2].dataType,e[2].dims,k)];w&&q.push(F("beta",e[3].dataType,e[3].dims,k)),x&&q.push(F("bias",e[4].dataType,e[4].dims,k)),q.push(Y("output",e[0].dataType,m,k)),S&&q.push(Y("mean_output",1,_)),C&&q.push(Y("inv_std_output",1,_)),T&&q.push(Y("input_skip_bias_sum",e[0].dataType,m,k));let K=Be(e[0].dataType),Q=Be(1,k);return` + + ${W.registerUniforms(N).declareVariables(...q)} + var sum_shared : array<${Q}, ${z}>; + var sum_squared_shared : array<${Q}, ${z}>; + + ${W.mainStart([z,1,1])} + let ix = local_id.x; + let iy = global_id.x / ${z}; + + let hidden_size_vectorized: u32 = uniforms.hidden_size / uniforms.components; + var stride = hidden_size_vectorized / ${z}; + let offset = ix * stride + iy * hidden_size_vectorized; + let offset1d = stride * ix; + if (ix == ${z-1}) { + stride = hidden_size_vectorized - stride * ix; + } + for (var i: u32 = 0; i < stride; i++) { + let skip_value = skip[offset + i]; + let bias_value = ${x?"bias[offset1d + i]":K+"(0.0)"}; + let input_value = x[offset + i]; + let value = input_value + skip_value + bias_value; + ${T?"input_skip_bias_sum[offset + i] = value;":""} + output[offset + i] = value; + let f32_value = ${qt(K,k,"value")}; + sum_shared[ix] += f32_value; + sum_squared_shared[ix] += f32_value * f32_value; + } + workgroupBarrier(); + + var reduce_size : u32 = ${z}; + for (var curr_size = reduce_size >> 1; curr_size > 0; curr_size = reduce_size >> 1) { + reduce_size = curr_size + (reduce_size & 1); + if (ix < curr_size) { + sum_shared[ix] += sum_shared[ix + reduce_size]; + sum_squared_shared[ix] += sum_squared_shared[ix + reduce_size]; + } + workgroupBarrier(); + } + + let sum = sum_shared[0]; + let square_sum = sum_squared_shared[0]; + let mean = ${ut("sum",k)} / f32(uniforms.hidden_size); + let inv_std_dev = inverseSqrt(${ut("square_sum",k)} / f32(uniforms.hidden_size) ${u?"":"- mean * mean"} + uniforms.epsilon); + ${S?"mean_output[global_idx] = mean;":""} + ${C?"inv_std_output[global_idx] = inv_std_dev;":""} + + for (var i: u32 = 0; i < stride; i++) { + output[offset + i] = (output[offset + i] ${u?"":`- ${K}(mean)`}) * + ${K}(inv_std_dev) * gamma[offset1d + i] + ${w?"+ beta[offset1d + i]":""}; + } + }`},B=[{dims:m,dataType:e[0].dataType}];return o>1&&B.push({dims:_,dataType:1}),o>2&&B.push({dims:_,dataType:1}),o>3&&B.push({dims:c,dataType:e[0].dataType}),{name:"SkipLayerNormalization",shaderCache:{hint:`${k};${S};${C};${T}`,inputDependencies:e.map((W,N)=>"type")},getShaderSource:O,getRunData:()=>({outputs:B,dispatchGroup:{x:Math.ceil(g/b)},programUniforms:A})}},Sh=(e,t)=>{O$(e.inputs);let n=[0];e.outputCount>1&&n.push(-3),e.outputCount>2&&n.push(-3),e.outputCount>3&&n.push(3),e.compute(z$(e.inputs,t,e.outputCount,!1),{outputs:n})}});var B$,Sn,D$,Ih,j$,M$,Ah,kh,Eh=X(()=>{"use strict";ce();be();We();we();B$=(e,t)=>{if(!e||e.length<1)throw new Error("too few inputs");if(t.axes.length!==0){if(t.axes.length!==t.starts.length||t.axes.length!==t.ends.length)throw new Error("axes, starts and ends must have the same length")}else if(t.starts.length!==t.ends.length)throw new Error("starts and ends must have the same length");e.slice(1).forEach((o,n)=>{if(e[n+1].dataType!==6&&e[n+1].dataType!==7)throw new Error(`Input ${n} must be an array of int32 or int64`)})},Sn=(e,t)=>{let o=[];if(e.length>t)if(e[t].dataType===7)e[t].getBigInt64Array().forEach(n=>o.push(Number(n)));else if(e[t].dataType===6)e[t].getInt32Array().forEach(n=>o.push(Number(n)));else throw new Error(`Input ${t} must be an array of int32 or int64`);return o},D$=(e,t)=>{if(e.length>1){let o=Sn(e,1),n=Sn(e,2),u=Sn(e,3);return u.length===0&&(u=[...Array(e[0].dims.length).keys()]),pe({starts:o,ends:n,axes:u})}else return t},Ih=(e,t,o,n,u)=>{let c=e;return e<0&&(c+=o[n[t]]),u[t]<0?Math.max(0,Math.min(c,o[n[t]]-1)):Math.max(0,Math.min(c,o[n[t]]))},j$=(e,t,o)=>`fn calculateInputIndices(output_indices: ${t.type.indices}) -> ${e.type.indices} { + var input_indices: ${e.type.indices}; + var carry = 0u; + for (var i = ${o.length}; i >= 0; i--) { + let input_shape_i = ${ae("uniforms.input_shape","i",o.length)}; + let steps_i = ${ae("uniforms.steps","i",o.length)}; + let signs_i = ${ae("uniforms.signs","i",o.length)}; + let starts_i = ${ae("uniforms.starts","i",o.length)}; + var output_index = ${t.indicesGet("output_indices","i")}; + var input_index = output_index * steps_i + starts_i + carry; + carry = input_index / input_shape_i; + input_index = input_index % input_shape_i; + if (signs_i < 0) { + input_index = input_shape_i - input_index - 1u + starts_i; + } + ${e.indicesSet("input_indices","i","input_index")}; + } + return input_indices; + }`,M$=(e,t)=>{let o=e[0].dims,n=L.size(o),u=t.axes.length>0?L.normalizeAxes(t.axes,o.length):[...Array(o.length).keys()],c=Sn(e,4);c.forEach(k=>k!==0||(()=>{throw new Error("step cannot be 0")})),c.length===0&&(c=Array(u.length).fill(1));let p=t.starts.map((k,A)=>Ih(k,A,o,u,c)),m=t.ends.map((k,A)=>Ih(k,A,o,u,c));if(u.length!==p.length||u.length!==m.length)throw new Error("start, ends and axes should have the same number of elements");if(u.length!==o.length)for(let k=0;kMath.sign(k));c.forEach((k,A,O)=>{if(k<0){let B=(m[A]-p[A])/k,W=p[A],N=W+B*c[A];p[A]=N,m[A]=W,O[A]=-k}});let b=o.slice(0);u.forEach((k,A)=>{b[k]=Math.ceil((m[k]-p[k])/c[k])});let _={dims:b,dataType:e[0].dataType},w=Y("output",e[0].dataType,b.length),x=F("input",e[0].dataType,e[0].dims.length),S=L.size(b),C=[{name:"outputSize",type:"u32"},{name:"starts",type:"u32",length:p.length},{name:"signs",type:"i32",length:g.length},{name:"steps",type:"u32",length:c.length}],T=[{type:12,data:S},{type:12,data:p},{type:6,data:g},{type:12,data:c},...te(e[0].dims,b)],z=k=>` + ${k.registerUniforms(C).declareVariables(x,w)} + ${j$(x,w,o)} + ${k.mainStart()} + ${k.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + let output_indices = ${w.offsetToIndices("global_idx")}; + let input_indices = calculateInputIndices(output_indices); + ${w.setByOffset("global_idx",x.getByIndices("input_indices"))} + }`;return{name:"Slice",shaderCache:{hint:`${g.length}_${p.length}_${c.length}`,inputDependencies:["rank"]},getShaderSource:z,getRunData:()=>({outputs:[_],dispatchGroup:{x:Math.ceil(n/64)},programUniforms:T})}},Ah=(e,t)=>{B$(e.inputs,t);let o=D$(e.inputs,t);e.compute(M$(e.inputs,o),{inputs:[0]})},kh=e=>{let t=e.starts,o=e.ends,n=e.axes;return pe({starts:t,ends:o,axes:n})}});var R$,U$,Ph,Oh,zh=X(()=>{"use strict";ce();be();We();Tt();we();R$=e=>{if(!e||e.length!==1)throw new Error("Softmax op requires 1 input.")},U$=(e,t)=>{let o=e.inputs[0],n=o.dims,u=L.size(n),c=n.length,p=L.normalizeAxis(t.axis,c),m=pK),b[p]=c-1,b[c-1]=p,g=e.compute(Fe(o,b),{inputs:[o],outputs:[-1]})[0]):g=o;let _=g.dims,w=_[c-1],x=u/w,S=Ae(w),C=w/S,T=64;x===1&&(T=256);let z=(q,K)=>K===4?`max(max(${q}.x, ${q}.y), max(${q}.z, ${q}.w))`:K===2?`max(${q}.x, ${q}.y)`:K===3?`max(max(${q}.x, ${q}.y), ${q}.z)`:q,k=F("x",g.dataType,g.dims,S),A=Y("result",g.dataType,g.dims,S),O=k.type.value,B=Be(g.dataType)==="f32"?`var threadMax = ${O}(-3.402823e+38f);`:`var threadMax = ${O}(-65504.0h);`,W=q=>` + var rowMaxShared : ${O}; + var rowSumShared : ${O}; + var threadShared : array<${O}, ${T}>; + + fn getValue(row: i32, col: i32, row_stride: i32) -> ${O} { + let index = row * row_stride + col; + return x[index]; + } + + fn setValue(row: i32, col: i32, row_stride: i32, value: ${O}) { + let index = row * row_stride + col; + result[index] = value; + } + ${q.registerUniform("packedCols","i32").declareVariables(k,A)} + ${q.mainStart(T)} + let gindex = i32(global_idx); + let lindex = i32(local_idx); + const wg = ${T}; + let row = gindex / wg; + let cols = uniforms.packedCols; + let row_stride : i32 = uniforms.packedCols; + + // find the rows max + ${B} + for (var col = lindex; col < cols; col += wg) { + let value = getValue(row, col, row_stride); + threadMax = max(threadMax, value); + } + if (lindex < cols) { + threadShared[lindex] = threadMax; + } + workgroupBarrier(); + + var reduceSize = min(cols, wg); + for (var currSize = reduceSize >> 1; currSize > 0; currSize = reduceSize >> 1) { + reduceSize = currSize + (reduceSize & 1); + if (lindex < currSize) { + threadShared[lindex] = max(threadShared[lindex], threadShared[lindex + reduceSize]); + } + workgroupBarrier(); + } + if (lindex == 0) { + rowMaxShared = ${O}(${z("threadShared[0]",S)}); + } + workgroupBarrier(); + + // find the rows sum + var threadSum = ${O}(0.0); + for (var col = lindex; col < cols; col += wg) { + let subExp = exp(getValue(row, col, row_stride) - rowMaxShared); + threadSum += subExp; + } + threadShared[lindex] = threadSum; + workgroupBarrier(); + + for (var currSize = wg >> 1; currSize > 0; currSize = currSize >> 1) { + if (lindex < currSize) { + threadShared[lindex] = threadShared[lindex] + threadShared[lindex + currSize]; + } + workgroupBarrier(); + } + if (lindex == 0) { + rowSumShared = ${O}(${ut("threadShared[0]",S)}); + } + workgroupBarrier(); + + // calculate final value for each element in the row + for (var col = lindex; col < cols; col += wg) { + let value = exp(getValue(row, col, row_stride) - rowMaxShared) / rowSumShared; + setValue(row, col, row_stride, value); + } + }`,N=e.compute({name:"Softmax",shaderCache:{hint:`${S};${T}`,inputDependencies:["type"]},getRunData:()=>({outputs:[{dims:_,dataType:g.dataType}],dispatchGroup:{x},programUniforms:[{type:6,data:C}]}),getShaderSource:W},{inputs:[g],outputs:[m?-1:0]})[0];m&&e.compute(Fe(N,b),{inputs:[N]})},Ph=(e,t)=>{R$(e.inputs),U$(e,t)},Oh=e=>pe({axis:e.axis})});var Bh,N$,V$,W$,Dh,jh=X(()=>{"use strict";ce();be();we();Bh=e=>Array.from(e.getBigInt64Array(),Number),N$=e=>{if(!e||e.length!==2)throw new Error("Tile requires 2 inputs.");if(e[0].dataType!==1&&e[0].dataType!==10&&e[0].dataType!==6&&e[0].dataType!==12)throw new Error("Tile only support float, float16, int32, and uint32 data types");if(e[1].dataType!==7)throw new Error("Tile `repeats` input should be of int64 data type");if(e[1].dims.length!==1)throw new Error("Tile `repeats` input should be 1-D");if(Bh(e[1]).length!==e[0].dims.length)throw new Error("Tile `repeats` input should have same number of elements as rank of input data tensor")},V$=(e,t)=>{let o=[];for(let n=0;n{let o=e[0].dims,n=t??Bh(e[1]),u=V$(o,n),c=L.size(u),p=e[0].dataType,m=F("input",p,o.length),g=Y("output",p,u.length),b=_=>` + const inputShape = ${m.indices(...o)}; + ${_.registerUniform("output_size","u32").declareVariables(m,g)} + ${_.mainStart()} + ${_.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let output_indices = ${g.offsetToIndices("global_idx")}; + var input_indices: ${m.type.indices}; + for (var i = 0; i < ${o.length}; i++) { + let input_dim_i = ${m.indicesGet("uniforms.input_shape","i")}; + let input_dim_value = ${g.indicesGet("output_indices","i")} % input_dim_i; + + ${m.indicesSet("input_indices","i","input_dim_value")} + } + ${g.setByOffset("global_idx",m.getByIndices("input_indices"))} + }`;return{name:"Tile",shaderCache:{hint:`${n}`,inputDependencies:["rank"]},getRunData:()=>({outputs:[{dims:u,dataType:e[0].dataType}],dispatchGroup:{x:Math.ceil(c/64)},programUniforms:[{type:12,data:c},...te(e[0].dims,u)]}),getShaderSource:b}},Dh=e=>{N$(e.inputs),e.compute(W$(e.inputs),{inputs:[0]})}});var L$,G$,Mh,Rh=X(()=>{"use strict";ce();be();we();L$=(e,t,o,n,u)=>{let c=Y("output_data",u,o.length,4),p=F("a_data",t[1].dataType,t[1].dims.length,4),m=F("b_data",t[2].dataType,t[2].dims.length,4),g=F("c_data",t[0].dataType,t[0].dims.length,4),b,_=(w,x,S)=>`select(${x}, ${w}, ${S})`;if(!n)b=c.setByOffset("global_idx",_(p.getByOffset("global_idx"),m.getByOffset("global_idx"),g.getByOffset("global_idx")));else{let w=(x,S,C="")=>{let T=`a_data[index_a${S}][component_a${S}]`,z=`b_data[index_b${S}][component_b${S}]`,k=`bool(c_data[index_c${S}] & (0xffu << (component_c${S} * 8)))`;return` + let output_indices${S} = ${c.offsetToIndices(`global_idx * 4u + ${S}u`)}; + let offset_a${S} = ${p.broadcastedIndicesToOffset(`output_indices${S}`,c)}; + let offset_b${S} = ${m.broadcastedIndicesToOffset(`output_indices${S}`,c)}; + let offset_c${S} = ${g.broadcastedIndicesToOffset(`output_indices${S}`,c)}; + let index_a${S} = offset_a${S} / 4u; + let index_b${S} = offset_b${S} / 4u; + let index_c${S} = offset_c${S} / 4u; + let component_a${S} = offset_a${S} % 4u; + let component_b${S} = offset_b${S} % 4u; + let component_c${S} = offset_c${S} % 4u; + ${x}[${S}] = ${C}(${_(T,z,k)}); + `};u===9?b=` + var data = vec4(0); + ${w("data",0,"u32")} + ${w("data",1,"u32")} + ${w("data",2,"u32")} + ${w("data",3,"u32")} + output_data[global_idx] = dot(vec4(0x1, 0x100, 0x10000, 0x1000000), vec4(data));`:b=` + ${w("output_data[global_idx]",0)} + ${w("output_data[global_idx]",1)} + ${w("output_data[global_idx]",2)} + ${w("output_data[global_idx]",3)} + `}return` + ${e.registerUniform("vec_size","u32").declareVariables(g,p,m,c)} + ${e.mainStart()} + ${e.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")} + ${b} + }`},G$=e=>{let t=e[1].dims,o=e[2].dims,n=e[0].dims,u=e[1].dataType,c=!(L.areEqual(t,o)&&L.areEqual(o,n)),p=t,m=L.size(t);if(c){let b=bt.calcShape(bt.calcShape(t,o,!1),n,!1);if(!b)throw new Error("Can't perform where op on the given tensors");p=b,m=L.size(p)}let g=Math.ceil(m/4);return{name:"Where",shaderCache:{inputDependencies:["rank","rank","rank"]},getShaderSource:b=>L$(b,e,p,c,u),getRunData:()=>({outputs:[{dims:p,dataType:u}],dispatchGroup:{x:Math.ceil(m/64/4)},programUniforms:[{type:12,data:g},...te(n,t,o,p)]})}},Mh=e=>{e.compute(G$(e.inputs))}});var Uh,Nh=X(()=>{"use strict";_p();fn();xp();Cp();df();vf();Cf();Nf();qf();Zf();Xf();im();sm();lm();pm();hm();bm();vm();Cm();Im();jm();Um();Vm();Lm();Fm();to();Km();ch();mh();gh();_h();cn();Ch();io();Th();Eh();zh();no();jh();Tt();hn();Rh();Uh=new Map([["Abs",[Sp]],["Acos",[Tp]],["Acosh",[Ip]],["Add",[cf]],["ArgMax",[bp,Wi]],["ArgMin",[yp,Wi]],["Asin",[Ap]],["Asinh",[kp]],["Atan",[Ep]],["Atanh",[Pp]],["Attention",[wp]],["AveragePool",[rh,th]],["BatchNormalization",[vp]],["BiasAdd",[$p]],["BiasSplitGelu",[lf]],["Cast",[zp,Op]],["Ceil",[Dp]],["Clip",[Bp]],["Concat",[xf,$f]],["Conv",[Qi,Zi]],["ConvTranspose",[Ff,Gf]],["Cos",[jp]],["Cosh",[Mp]],["CumSum",[Kf,Jf]],["DepthToSpace",[Qf,Yf]],["DequantizeLinear",[ph,fh]],["Div",[pf]],["Einsum",[rm,nm]],["Elu",[Rp,br]],["Equal",[ff]],["Erf",[Up]],["Exp",[Np]],["Expand",[am]],["FastGelu",[um]],["Floor",[Vp]],["FusedConv",[Qi,Zi]],["Gather",[cm,dm]],["GatherElements",[wm,_m]],["GatherBlockQuantized",[gm,ym]],["GatherND",[fm,mm]],["Gelu",[Wp]],["Gemm",[$m,xm]],["GlobalAveragePool",[oh,ih]],["GlobalMaxPool",[dh,lh]],["Greater",[yf]],["GreaterOrEqual",[_f]],["GridSample",[Sm,Tm]],["GroupQueryAttention",[Dm]],["HardSigmoid",[Zp,Jp]],["InstanceNormalization",[Rm]],["LayerNormalization",[Nm]],["LeakyRelu",[Lp,br]],["Less",[bf]],["LessOrEqual",[wf]],["Log",[af]],["MatMul",[Wm]],["MatMulNBits",[Gm,Hm]],["MaxPool",[sh,uh]],["Mul",[mf]],["MultiHeadAttention",[Em,km]],["Neg",[Hp]],["Not",[Gp]],["Pad",[qm]],["Pow",[hf]],["QuickGelu",[sf,br]],["Range",[hh]],["Reciprocal",[Fp]],["ReduceMin",[cp]],["ReduceMean",[ap]],["ReduceMax",[dp]],["ReduceSum",[fp]],["ReduceProd",[pp]],["ReduceL1",[sp]],["ReduceL2",[up]],["ReduceLogSum",[hp]],["ReduceLogSumExp",[lp]],["ReduceSumSquare",[mp]],["Relu",[qp]],["Resize",[xh,$h]],["RotaryEmbedding",[zm]],["ScatterND",[bh,yh]],["Sigmoid",[Kp]],["Sin",[Qp]],["Sinh",[Yp]],["Slice",[Ah,kh]],["SkipLayerNormalization",[Sh]],["Split",[Pm,Om]],["Sqrt",[Xp]],["Softmax",[Ph,Oh]],["Sub",[gf]],["Tan",[ef]],["Tanh",[rf]],["ThresholdedRelu",[of,br]],["Tile",[Dh]],["Transpose",[qc,Kc]],["Where",[Mh]]])});var Tn,Vh=X(()=>{"use strict";nt();yt();we();Tn=class{constructor(t){this.backend=t;this.repo=new Map,this.attributesBound=!1}getArtifact(t){return this.repo.get(t)}setArtifact(t,o){this.repo.set(t,o)}run(t,o,n,u,c){Xe(t.programInfo.name);let p=this.backend.device,m=this.backend.getComputePassEncoder();this.backend.writeTimestamp(this.backend.pendingDispatchNumber*2);let g=[];for(let _ of o)g.push({binding:g.length,resource:{buffer:_.buffer}});for(let _ of n)g.push({binding:g.length,resource:{buffer:_.buffer}});c&&g.push({binding:g.length,resource:c});let b=p.createBindGroup({layout:t.computePipeline.getBindGroupLayout(0),entries:g,label:t.programInfo.name});if(this.backend.sessionStatus==="capturing"){let _={kernelId:this.backend.currentKernelId,computePipeline:t.computePipeline,bindGroup:b,dispatchGroup:u};this.backend.capturedCommandList.get(this.backend.currentSessionId).push(_)}m.setPipeline(t.computePipeline),m.setBindGroup(0,b),m.dispatchWorkgroups(...u),this.backend.writeTimestamp(this.backend.pendingDispatchNumber*2+1),this.backend.pendingDispatchNumber++,(this.backend.pendingDispatchNumber>=this.backend.maxDispatchNumber||this.backend.queryType==="at-passes")&&this.backend.endComputePass(),this.backend.pendingDispatchNumber>=this.backend.maxDispatchNumber&&this.backend.flush(),Ze(t.programInfo.name)}dispose(){}build(t,o){Xe(t.name);let n=this.backend.device,u=[];[{feature:"shader-f16",extension:"f16"},{feature:"subgroups",extension:"subgroups"}].forEach(w=>{n.features.has(w.feature)&&u.push(`enable ${w.extension};`)});let p=Hc(o,this.backend.device.limits),m=t.getShaderSource(p),g=`${u.join(` +`)} +${p.additionalImplementations} +${m}`,b=n.createShaderModule({code:g,label:t.name});$e("verbose",()=>`[WebGPU] ${t.name} shader code: ${g}`);let _=n.createComputePipeline({compute:{module:b,entryPoint:"main"},layout:"auto",label:t.name});return Ze(t.name),{programInfo:t,computePipeline:_,uniformVariablesInfo:p.variablesInfo}}normalizeDispatchGroupSize(t){let o=typeof t=="number"?t:t.x,n=typeof t=="number"?1:t.y||1,u=typeof t=="number"?1:t.z||1,c=this.backend.device.limits.maxComputeWorkgroupsPerDimension;if(o<=c&&n<=c&&u<=c)return[o,n,u];let p=o*n*u,m=Math.ceil(Math.sqrt(p));if(m>c){if(m=Math.ceil(Math.cbrt(p)),m>c)throw new Error("Total dispatch size exceeds WebGPU maximum.");return[m,m,m]}else return[m,m,1]}}});var Wh={};Xt(Wh,{WebGpuBackend:()=>ao});var H$,F$,oo,ao,Lh=X(()=>{"use strict";nt();ce();yt();Ei();Lc();Nh();Vh();H$=(e,t)=>{if(t.length!==e.length)throw new Error(`inputDependencies length ${t.length} is not equal to inputTensors length ${e.length}.`);let o=[];for(let n=0;n{let n=e.name;return e.shaderCache?.hint&&(n+="["+e.shaderCache.hint+"]"),n+=":"+o+`:${H$(t,e.shaderCache?.inputDependencies??new Array(t.length).fill("dims"))}`,n},oo=class{constructor(t){t&&(this.architecture=t.architecture,this.vendor=t.vendor)}isArchitecture(t){return this.architecture===t}isVendor(t){return this.vendor===t}},ao=class{constructor(){this.currentSessionId=null;this.currentKernelId=null;this.commandEncoder=null;this.computePassEncoder=null;this.maxDispatchNumber=16;this.pendingDispatchNumber=0;this.pendingKernels=[];this.pendingQueries=new Map;this.sessionStatus="default";this.capturedCommandList=new Map;this.capturedPendingKernels=new Map;this.sessionExternalDataMapping=new Map}get currentKernelCustomData(){if(this.currentKernelId===null)throw new Error("currentKernelCustomData(): currentKernelId is null. (should not happen)");let t=this.kernelCustomData.get(this.currentKernelId);return t||(t={},this.kernelCustomData.set(this.currentKernelId,t)),t}async initialize(t,o){this.env=t;let n=[],u={requiredLimits:{maxComputeWorkgroupStorageSize:o.limits.maxComputeWorkgroupStorageSize,maxComputeWorkgroupsPerDimension:o.limits.maxComputeWorkgroupsPerDimension,maxStorageBufferBindingSize:o.limits.maxStorageBufferBindingSize,maxBufferSize:o.limits.maxBufferSize,maxComputeInvocationsPerWorkgroup:o.limits.maxComputeInvocationsPerWorkgroup,maxComputeWorkgroupSizeX:o.limits.maxComputeWorkgroupSizeX,maxComputeWorkgroupSizeY:o.limits.maxComputeWorkgroupSizeY,maxComputeWorkgroupSizeZ:o.limits.maxComputeWorkgroupSizeZ},requiredFeatures:n},c=p=>o.features.has(p)&&n.push(p)&&!0;c("chromium-experimental-timestamp-query-inside-passes")||c("timestamp-query"),c("shader-f16"),c("subgroups"),this.device=await o.requestDevice(u),this.adapterInfo=new oo(o.info||await o.requestAdapterInfo()),this.gpuDataManager=Wc(this),this.programManager=new Tn(this),this.kernels=new Map,this.kernelPersistentData=new Map,this.kernelCustomData=new Map,en(t.logLevel,!!t.debug),this.device.onuncapturederror=p=>{p.error instanceof GPUValidationError&&console.error(`An uncaught WebGPU validation error was raised: ${p.error.message}`)},Object.defineProperty(this.env.webgpu,"device",{value:this.device,writable:!1,enumerable:!0,configurable:!1}),Object.defineProperty(this.env.webgpu,"adapter",{value:o,writable:!1,enumerable:!0,configurable:!1}),this.setQueryType()}dispose(){typeof this.querySet<"u"&&this.querySet.destroy(),this.gpuDataManager.dispose()}getCommandEncoder(){return this.commandEncoder||(this.commandEncoder=this.device.createCommandEncoder()),this.commandEncoder}getComputePassEncoder(){if(!this.computePassEncoder){let t=this.getCommandEncoder(),o={};this.queryType==="at-passes"&&(o.timestampWrites={querySet:this.querySet,beginningOfPassWriteIndex:this.pendingDispatchNumber*2,endOfPassWriteIndex:this.pendingDispatchNumber*2+1}),this.computePassEncoder=t.beginComputePass(o)}return this.computePassEncoder}endComputePass(){this.computePassEncoder&&(this.computePassEncoder.end(),this.computePassEncoder=null)}flush(){if(!this.commandEncoder)return;Xe(),this.endComputePass();let t;this.queryType!=="none"&&(this.commandEncoder.resolveQuerySet(this.querySet,0,this.pendingDispatchNumber*2,this.queryResolveBuffer,0),t=this.device.createBuffer({size:this.pendingDispatchNumber*2*8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST}),this.pendingQueries.set(t,this.pendingKernels),this.pendingKernels=[],this.commandEncoder.copyBufferToBuffer(this.queryResolveBuffer,0,t,0,this.pendingDispatchNumber*2*8)),this.device.queue.submit([this.commandEncoder.finish()]),this.gpuDataManager.refreshPendingBuffers(),this.commandEncoder=null,this.pendingDispatchNumber=0,this.queryType!=="none"&&t.mapAsync(GPUMapMode.READ).then(()=>{let o=new BigUint64Array(t.getMappedRange()),n=this.pendingQueries.get(t);for(let u=0;u"u"&&(this.queryTimeBase=S);let T=Number(S-this.queryTimeBase),z=Number(C-this.queryTimeBase);if(!Number.isSafeInteger(T)||!Number.isSafeInteger(z))throw new RangeError("incorrect timestamp range");if(this.env.webgpu.profiling?.ondata)this.env.webgpu.profiling.ondata({version:1,inputsMetadata:w.map(k=>({dims:k.dims,dataType:gt(k.dataType)})),outputsMetadata:x.map(k=>({dims:k.dims,dataType:gt(k.dataType)})),kernelId:p,kernelType:g,kernelName:b,programName:_,startTime:T,endTime:z});else{let k="";w.forEach((O,B)=>{k+=`input[${B}]: [${O.dims}] | ${gt(O.dataType)}, `});let A="";x.forEach((O,B)=>{A+=`output[${B}]: [${O.dims}] | ${gt(O.dataType)}, `}),console.log(`[profiling] kernel "${p}|${g}|${b}|${_}" ${k}${A}execution time: ${z-T} ns`)}Mr("GPU",`${_}::${S}::${C}`)}t.unmap(),this.pendingQueries.delete(t)}),Ze()}run(t,o,n,u,c,p){Xe(t.name);let m=[];for(let O=0;OB):n;if(w.length!==g.length)throw new Error(`Output size ${w.length} must be equal to ${g.length}.`);let x=[],S=[];for(let O=0;O=p)throw new Error(`Invalid output index: ${w[O]}`);if(w[O]===-3)continue;let B=w[O]===-1,W=w[O]===-2,N=B||W?c(g[O].dataType,g[O].dims):u(w[O],g[O].dataType,g[O].dims);if(x.push(N),N.data===0)continue;let q=this.gpuDataManager.get(N.data);if(!q)throw new Error(`no GPU data for output: ${N.data}`);if(B&&this.temporaryData.push(q),W){let K=this.kernelPersistentData.get(this.currentKernelId);K||(K=[],this.kernelPersistentData.set(this.currentKernelId,K)),K.push(q)}S.push(q)}if(m.length!==o.length||S.length!==x.length){if(S.length===0)return Ze(t.name),x;throw new Error(`Program ${t.name} has zero-sized tensor(s) in inputs or outputs. This is not supported now.`)}let C;if(_){let O=0,B=[];_.forEach(K=>{let Q=typeof K.data=="number"?[K.data]:K.data;if(Q.length===0)return;let ne=K.type===10?2:4,se,ue;K.type===10?(ue=Q.length>4?16:Q.length>2?8:Q.length*ne,se=Q.length>4?16:ne*Q.length):(ue=Q.length<=2?Q.length*ne:16,se=16),O=Math.ceil(O/ue)*ue,B.push(O);let ge=K.type===10?8:4;O+=Q.length>4?Math.ceil(Q.length/ge)*se:Q.length*ne});let W=16;O=Math.ceil(O/W)*W;let N=new ArrayBuffer(O);_.forEach((K,Q)=>{let ne=B[Q],se=typeof K.data=="number"?[K.data]:K.data;if(K.type===6)new Int32Array(N,ne,se.length).set(se);else if(K.type===12)new Uint32Array(N,ne,se.length).set(se);else if(K.type===10)new Uint16Array(N,ne,se.length).set(se);else if(K.type===1)new Float32Array(N,ne,se.length).set(se);else throw new Error(`Unsupported uniform type: ${gt(K.type)}`)});let q=this.gpuDataManager.create(O,GPUBufferUsage.COPY_DST|GPUBufferUsage.UNIFORM);this.device.queue.writeBuffer(q.buffer,0,N,0,O),this.gpuDataManager.release(q.id),C={offset:0,size:O,buffer:q.buffer}}let T=this.programManager.normalizeDispatchGroupSize(b),z=T[1]===1&&T[2]===1,k=F$(t,o,z),A=this.programManager.getArtifact(k);if(A||(A=this.programManager.build(t,T),this.programManager.setArtifact(k,A),$e("info",()=>`[artifact] key: ${k}, programName: ${t.name}`)),_&&A.uniformVariablesInfo){if(_.length!==A.uniformVariablesInfo.length)throw new Error(`Uniform variables count mismatch: expect ${A.uniformVariablesInfo.length}, got ${_.length} in program "${A.programInfo.name}".`);for(let O=0;O<_.length;O++){let B=_[O],W=B.type,N=typeof B.data=="number"?1:B.data.length,[q,K]=A.uniformVariablesInfo[O];if(W!==q||N!==K)throw new Error(`Uniform variable ${O} mismatch: expect type ${q} with size ${K}, got type ${W} with size ${N} in program "${A.programInfo.name}".`)}}if($e("info",()=>`[ProgramManager] run "${t.name}" (key=${k}) with ${T[0]}x${T[1]}x${T[2]}`),this.queryType!=="none"||this.sessionStatus==="capturing"){let O={kernelId:this.currentKernelId,programName:A.programInfo.name,inputTensorViews:o,outputTensorViews:x};this.pendingKernels.push(O),this.sessionStatus==="capturing"&&this.capturedPendingKernels.get(this.currentSessionId).push(O)}return this.programManager.run(A,m,S,T,C),Ze(t.name),x}upload(t,o){this.gpuDataManager.upload(t,o)}memcpy(t,o){this.gpuDataManager.memcpy(t,o)}async download(t,o){await this.gpuDataManager.download(t,o)}alloc(t){return this.gpuDataManager.create(t).id}free(t){return this.gpuDataManager.release(t)}createKernel(t,o,n,u){let c=Uh.get(t);if(!c)throw new Error(`kernel not implemented: ${t}`);let p={kernelType:t,kernelName:u,kernelEntry:c[0],attributes:[c[1],n]};this.kernels.set(o,p)}releaseKernel(t){let o=this.kernelPersistentData.get(t);if(o){for(let n of o)this.gpuDataManager.release(n.id);this.kernelPersistentData.delete(t)}this.kernelCustomData.delete(t),this.kernels.delete(t)}computeKernel(t,o,n){let u=this.kernels.get(t);if(!u)throw new Error(`kernel not created: ${t}`);let c=u.kernelType,p=u.kernelName,m=u.kernelEntry,g=u.attributes;if(this.currentKernelId!==null)throw new Error(`kernel "[${c}] ${p}" is not allowed to be called recursively`);this.currentKernelId=t,g[0]&&(g[1]=g[0](g[1]),g[0]=void 0),$e("info",()=>`[WebGPU] Start to run kernel "[${c}] ${p}"...`);let b=this.env.debug;this.temporaryData=[];try{return b&&this.device.pushErrorScope("validation"),m(o,g[1]),0}catch(_){return n.push(Promise.resolve(`[WebGPU] Kernel "[${c}] ${p}" failed. ${_}`)),1}finally{b&&n.push(this.device.popErrorScope().then(_=>_?`GPU validation error for kernel "[${c}] ${p}": ${_.message}`:null));for(let _ of this.temporaryData)this.gpuDataManager.release(_.id);this.temporaryData=[],this.currentKernelId=null}}registerBuffer(t,o,n,u){let c=this.sessionExternalDataMapping.get(t);c||(c=new Map,this.sessionExternalDataMapping.set(t,c));let p=c.get(o),m=this.gpuDataManager.registerExternalBuffer(n,u,p);return c.set(o,[m,n]),m}unregisterBuffers(t){let o=this.sessionExternalDataMapping.get(t);o&&(o.forEach(n=>this.gpuDataManager.unregisterExternalBuffer(n[0])),this.sessionExternalDataMapping.delete(t))}getBuffer(t){let o=this.gpuDataManager.get(t);if(!o)throw new Error(`no GPU data for buffer: ${t}`);return o.buffer}createDownloader(t,o,n){return async()=>{let u=await ji(this,t,o);return rn(u.buffer,n)}}writeTimestamp(t){this.queryType==="inside-passes"&&this.computePassEncoder.writeTimestamp(this.querySet,t)}setQueryType(){this.queryType="none",(this.env.webgpu.profiling?.mode==="default"||(typeof this.env.trace>"u"?this.env.wasm.trace:this.env.trace))&&(this.device.features.has("chromium-experimental-timestamp-query-inside-passes")?this.queryType="inside-passes":this.device.features.has("timestamp-query")&&(this.queryType="at-passes"),this.queryType!=="none"&&typeof this.querySet>"u"&&(this.querySet=this.device.createQuerySet({type:"timestamp",count:this.maxDispatchNumber*2}),this.queryResolveBuffer=this.device.createBuffer({size:this.maxDispatchNumber*2*8,usage:GPUBufferUsage.COPY_SRC|GPUBufferUsage.QUERY_RESOLVE})))}captureBegin(){$e("info","captureBegin"),this.capturedCommandList.get(this.currentSessionId)||this.capturedCommandList.set(this.currentSessionId,[]),this.capturedPendingKernels.get(this.currentSessionId)||this.capturedPendingKernels.set(this.currentSessionId,[]),this.flush(),this.sessionStatus="capturing"}captureEnd(){$e("info","captureEnd"),this.flush(),this.sessionStatus="default"}replay(){$e("info","replay"),this.sessionStatus="replaying";let t=this.capturedCommandList.get(this.currentSessionId),o=this.capturedPendingKernels.get(this.currentSessionId),n=t.length;this.pendingKernels=[];for(let u=0;u=this.maxDispatchNumber||this.queryType==="at-passes")&&this.endComputePass(),this.pendingDispatchNumber>=this.maxDispatchNumber&&this.flush()}this.flush(),this.sessionStatus="default"}onCreateSession(){this.gpuDataManager.onCreateSession()}onReleaseSession(t){this.unregisterBuffers(t),this.capturedCommandList.has(t)&&this.capturedCommandList.delete(t),this.capturedPendingKernels.has(t)&&this.capturedPendingKernels.delete(t),this.gpuDataManager.onReleaseSession(t)}onRunStart(t){this.currentSessionId=t,this.setQueryType()}}});var Gh={};Xt(Gh,{init:()=>q$});var xr,so,q$,Hh=X(()=>{"use strict";ce();yt();be();Rc();xr=class e{constructor(t,o,n,u){this.module=t;this.dataType=o;this.data=n;this.dims=u}getFloat32Array(){if(this.dataType!==1)throw new Error("Invalid data type");let t=L.size(this.dims);return t===0?new Float32Array:new Float32Array(this.module.HEAP8.buffer,this.data,t)}getBigInt64Array(){if(this.dataType!==7)throw new Error("Invalid data type");let t=L.size(this.dims);return t===0?new BigInt64Array:new BigInt64Array(this.module.HEAP8.buffer,this.data,t)}getInt32Array(){if(this.dataType!==6)throw new Error("Invalid data type");let t=L.size(this.dims);return t===0?new Int32Array:new Int32Array(this.module.HEAP8.buffer,this.data,t)}getUint16Array(){if(this.dataType!==10&&this.dataType!==4)throw new Error("Invalid data type");let t=L.size(this.dims);return t===0?new Uint16Array:new Uint16Array(this.module.HEAP8.buffer,this.data,t)}reshape(t){if(L.size(t)!==L.size(this.dims))throw new Error("Invalid new shape");return new e(this.module,this.dataType,this.data,t)}},so=class{constructor(t,o,n){this.module=t;this.backend=o;this.customDataOffset=0;this.customDataSize=0;this.adapterInfo=o.adapterInfo;let u=t.PTR_SIZE,c=n/t.PTR_SIZE,p=u===4?"i32":"i64";this.opKernelContext=Number(t.getValue(u*c++,p));let m=Number(t.getValue(u*c++,p));this.outputCount=Number(t.getValue(u*c++,p)),this.customDataOffset=Number(t.getValue(u*c++,"*")),this.customDataSize=Number(t.getValue(u*c++,p));let g=[];for(let b=0;btypeof m=="number"?this.inputs[m]:m)??this.inputs,u=o?.outputs??[],c=(m,g,b)=>new xr(this.module,g,this.output(m,b),b),p=(m,g)=>{let b=Bt(m,g);if(!b)throw new Error(`Unsupported data type: ${m}`);let _=b>0?this.backend.gpuDataManager.create(b).id:0;return new xr(this.module,m,_,g)};return this.backend.run(t,n,u,c,p,this.outputCount)}output(t,o){let n=this.module.stackSave();try{let u=this.module.PTR_SIZE,c=u===4?"i32":"i64",p=this.module.stackAlloc((1+o.length)*u);this.module.setValue(p,o.length,c);for(let m=0;m{let u=t.jsepInit;if(!u)throw new Error("Failed to initialize JSEP. The WebAssembly module is not built with JSEP support.");if(e==="webgpu"){let c=(Lh(),pr(Wh)).WebGpuBackend,p=new c;await p.initialize(o,n),u("webgpu",[p,m=>p.alloc(Number(m)),m=>p.free(m),(m,g,b,_=!1)=>{if(_)$e("verbose",()=>`[WebGPU] jsepCopyGpuToGpu: src=${Number(m)}, dst=${Number(g)}, size=${Number(b)}`),p.memcpy(Number(m),Number(g));else{$e("verbose",()=>`[WebGPU] jsepCopyCpuToGpu: dataOffset=${Number(m)}, gpuDataId=${Number(g)}, size=${Number(b)}`);let w=t.HEAPU8.subarray(Number(m>>>0),Number(m>>>0)+Number(b));p.upload(Number(g),w)}},async(m,g,b)=>{$e("verbose",()=>`[WebGPU] jsepCopyGpuToCpu: gpuDataId=${m}, dataOffset=${g}, size=${b}`),await p.download(Number(m),()=>t.HEAPU8.subarray(Number(g)>>>0,Number(g+b)>>>0))},(m,g,b)=>p.createKernel(m,Number(g),b,t.UTF8ToString(t._JsepGetNodeName(Number(g)))),m=>p.releaseKernel(m),(m,g,b,_)=>{$e("verbose",()=>`[WebGPU] jsepRun: sessionHandle=${b}, kernel=${m}, contextDataOffset=${g}`);let w=new so(t,p,Number(g));return p.computeKernel(Number(m),w,_)},()=>p.captureBegin(),()=>p.captureEnd(),()=>p.replay()])}else{let c=new sn(o);u("webnn",[c,()=>c.reserveTensorId(),p=>c.releaseTensorId(p),async(p,m,g,b,_)=>c.ensureTensor(p,m,g,b,_),(p,m)=>{c.uploadTensor(p,m)},async(p,m)=>c.downloadTensor(p,m)])}}});var K$,Wr,Lr,Kt,J$,Fh,mr,Gr,Hr,qh,Fr,qr,Kr,vi=X(()=>{"use strict";Sc();Ic();ce();Ot();Zr();Ai();K$=(e,t)=>{Oe()._OrtInit(e,t)!==0&&Ee("Can't initialize onnxruntime.")},Wr=async e=>{K$(e.wasm.numThreads,gr(e.logLevel))},Lr=async(e,t)=>{Oe().asyncInit?.();{let o=(Hh(),pr(Gh)).init;if(t==="webgpu"){if(typeof navigator>"u"||!navigator.gpu)throw new Error("WebGPU is not supported in current environment");let n=e.webgpu.adapter;if(n){if(typeof n.limits!="object"||typeof n.features!="object"||typeof n.requestDevice!="function")throw new Error("Invalid GPU adapter set in `env.webgpu.adapter`. It must be a GPUAdapter object.")}else{let u=e.webgpu.powerPreference;if(u!==void 0&&u!=="low-power"&&u!=="high-performance")throw new Error(`Invalid powerPreference setting: "${u}"`);let c=e.webgpu.forceFallbackAdapter;if(c!==void 0&&typeof c!="boolean")throw new Error(`Invalid forceFallbackAdapter setting: "${c}"`);if(n=await navigator.gpu.requestAdapter({powerPreference:u,forceFallbackAdapter:c}),!n)throw new Error('Failed to get GPU adapter. You may need to enable flag "--enable-unsafe-webgpu" if you are using Chrome.')}await o("webgpu",Oe(),e,n)}if(t==="webnn"){if(typeof navigator>"u"||!navigator.ml)throw new Error("WebNN is not supported in current environment");await o("webnn",Oe(),e)}}},Kt=new Map,J$=e=>{let t=Oe(),o=t.stackSave();try{let n=t.PTR_SIZE,u=t.stackAlloc(2*n);t._OrtGetInputOutputCount(e,u,u+n)!==0&&Ee("Can't get session input/output count.");let p=n===4?"i32":"i64";return[Number(t.getValue(u,p)),Number(t.getValue(u+n,p))]}finally{t.stackRestore(o)}},Fh=(e,t)=>{let o=Oe(),n=o.stackSave(),u=0;try{let c=o.PTR_SIZE,p=o.stackAlloc(2*c);o._OrtGetInputOutputMetadata(e,t,p,p+c)!==0&&Ee("Can't get session input/output metadata.");let g=Number(o.getValue(p,"*"));u=Number(o.getValue(p+c,"*"));let b=o.HEAP32[u/4];if(b===0)return[g,0];let _=o.HEAPU32[u/4+1],w=[];for(let x=0;x<_;x++){let S=Number(o.getValue(u+8+x*c,"*"));w.push(S!==0?o.UTF8ToString(S):Number(o.getValue(u+8+(x+_)*c,"*")))}return[g,b,w]}finally{o.stackRestore(n),u!==0&&o._OrtFree(u)}},mr=e=>{let t=Oe(),o=t._malloc(e.byteLength);if(o===0)throw new Error(`Can't create a session. failed to allocate a buffer of size ${e.byteLength}.`);return t.HEAPU8.set(e,o),[o,e.byteLength]},Gr=async(e,t)=>{let o,n,u=Oe();Array.isArray(e)?[o,n]=e:e.buffer===u.HEAPU8.buffer?[o,n]=[e.byteOffset,e.byteLength]:[o,n]=mr(e);let c=0,p=0,m=0,g=[],b=[],_=[];try{if([p,g]=await Tc(t),t?.externalData&&u.mountExternalData){let B=[];for(let W of t.externalData){let N=typeof W=="string"?W:W.path;B.push(yr(typeof W=="string"?W:W.data).then(q=>{u.mountExternalData(N,q)}))}await Promise.all(B)}for(let B of t?.executionProviders??[])if((typeof B=="string"?B:B.name)==="webnn"){if(u.shouldTransferToMLTensor=!1,typeof B!="string"){let N=B,q=N?.context,K=N?.gpuDevice,Q=N?.deviceType,ne=N?.powerPreference;q?u.currentContext=q:K?u.currentContext=await u.webnnCreateMLContext(K):u.currentContext=await u.webnnCreateMLContext({deviceType:Q,powerPreference:ne})}else u.currentContext=await u.webnnCreateMLContext();break}c=await u._OrtCreateSession(o,n,p),u.webgpuOnCreateSession?.(c),c===0&&Ee("Can't create a session."),u.jsepOnCreateSession?.(),u.currentContext&&(u.webnnRegisterMLContext(c,u.currentContext),u.currentContext=void 0,u.shouldTransferToMLTensor=!0);let[w,x]=J$(c),S=!!t?.enableGraphCapture,C=[],T=[],z=[],k=[],A=[];for(let B=0;BB==="gpu-buffer"||B==="ml-tensor"||B==="ml-tensor-cpu-output")&&(m=u._OrtCreateBinding(c),m===0&&Ee("Can't create IO binding."),O={handle:m,outputPreferredLocations:A,outputPreferredLocationsEncoded:A.map(B=>B==="ml-tensor-cpu-output"?"ml-tensor":B).map(B=>Ii(B))}),Kt.set(c,[c,b,_,O,S,!1]),[c,C,T,z,k]}catch(w){throw b.forEach(x=>u._OrtFree(x)),_.forEach(x=>u._OrtFree(x)),m!==0&&u._OrtReleaseBinding(m)!==0&&Ee("Can't release IO binding."),c!==0&&u._OrtReleaseSession(c)!==0&&Ee("Can't release session."),w}finally{u._free(o),p!==0&&u._OrtReleaseSessionOptions(p)!==0&&Ee("Can't release session options."),g.forEach(w=>u._free(w)),u.unmountExternalData?.()}},Hr=e=>{let t=Oe(),o=Kt.get(e);if(!o)throw new Error(`cannot release session. invalid session id: ${e}`);let[n,u,c,p,m]=o;p&&(m&&t._OrtClearBoundOutputs(p.handle)!==0&&Ee("Can't clear bound outputs."),t._OrtReleaseBinding(p.handle)!==0&&Ee("Can't release IO binding.")),t.jsepOnReleaseSession?.(e),t.webnnOnReleaseSession?.(e),t.webgpuOnReleaseSession?.(e),u.forEach(g=>t._OrtFree(g)),c.forEach(g=>t._OrtFree(g)),t._OrtReleaseSession(n)!==0&&Ee("Can't release session."),Kt.delete(e)},qh=async(e,t,o,n,u,c,p=!1)=>{if(!e){t.push(0);return}let m=Oe(),g=m.PTR_SIZE,b=e[0],_=e[1],w=e[3],x=w,S,C;if(b==="string"&&(w==="gpu-buffer"||w==="ml-tensor"))throw new Error("String tensor is not supported on GPU.");if(p&&w!=="gpu-buffer")throw new Error(`External buffer must be provided for input/output index ${c} when enableGraphCapture is true.`);if(w==="gpu-buffer"){let k=e[2].gpuBuffer;C=Bt(zt(b),_);{let A=m.jsepRegisterBuffer;if(!A)throw new Error('Tensor location "gpu-buffer" is not supported without using WebGPU.');S=A(n,c,k,C)}}else if(w==="ml-tensor"){let k=e[2].mlTensor;C=Bt(zt(b),_);let A=m.webnnRegisterMLTensor;if(!A)throw new Error('Tensor location "ml-tensor" is not supported without using WebNN.');S=A(n,k,zt(b),_)}else{let k=e[2];if(Array.isArray(k)){C=g*k.length,S=m._malloc(C),o.push(S);for(let A=0;Am.setValue(z+O*g,A,g===4?"i32":"i64"));let k=m._OrtCreateTensor(zt(b),S,C,z,_.length,Ii(x));k===0&&Ee(`Can't create tensor for input/output. session=${n}, index=${c}.`),t.push(k)}finally{m.stackRestore(T)}},Fr=async(e,t,o,n,u,c)=>{let p=Oe(),m=p.PTR_SIZE,g=Kt.get(e);if(!g)throw new Error(`cannot run inference. invalid session id: ${e}`);let b=g[0],_=g[1],w=g[2],x=g[3],S=g[4],C=g[5],T=t.length,z=n.length,k=0,A=[],O=[],B=[],W=[],N=p.stackSave(),q=p.stackAlloc(T*m),K=p.stackAlloc(T*m),Q=p.stackAlloc(z*m),ne=p.stackAlloc(z*m);try{[k,A]=Cc(c);for(let re=0;reVe*Ie,1);le=gt(he);let Je=x?.outputPreferredLocations[n[re]];if(le==="string"){if(Je==="gpu-buffer"||Je==="ml-tensor")throw new Error("String tensor is not supported on GPU.");let Ve=[];for(let Ie=0;Ie0){let Ve=p.jsepGetBuffer;if(!Ve)throw new Error('preferredLocation "gpu-buffer" is not supported without using WebGPU.');let Ie=Ve(me),ot=Bt(he,Ne);if(ot===void 0||!Yr(le))throw new Error(`Unsupported data type: ${le}`);ve=!0,ue.push([le,Ce,{gpuBuffer:Ie,download:p.jsepCreateDownloader(Ie,ot,le),dispose:()=>{p._OrtReleaseTensor(Se)!==0&&Ee("Can't release tensor.")}},"gpu-buffer"])}else if(Je==="ml-tensor"&&Ne>0){let Ve=p.webnnEnsureTensor,Ie=p.webnnIsGraphInputOutputTypeSupported;if(!Ve||!Ie)throw new Error('preferredLocation "ml-tensor" is not supported without using WebNN.');if(Bt(he,Ne)===void 0||!Xr(le))throw new Error(`Unsupported data type: ${le}`);if(!Ie(e,le,!1))throw new Error(`preferredLocation "ml-tensor" for ${le} output is not supported by current WebNN Context.`);let At=await Ve(e,me,he,Ce,!1);ve=!0,ue.push([le,Ce,{mlTensor:At,download:p.webnnCreateMLTensorDownloader(me,le),dispose:()=>{p.webnnReleaseTensorId(me),p._OrtReleaseTensor(Se)}},"ml-tensor"])}else if(Je==="ml-tensor-cpu-output"&&Ne>0){let Ve=p.webnnCreateMLTensorDownloader(me,le)(),Ie=ue.length;ve=!0,ge.push((async()=>{let ot=[Ie,await Ve];return p.webnnReleaseTensorId(me),p._OrtReleaseTensor(Se),ot})()),ue.push([le,Ce,[],"cpu"])}else{let Ve=er(le),Ie=new Ve(Ne);new Uint8Array(Ie.buffer,Ie.byteOffset,Ie.byteLength).set(p.HEAPU8.subarray(me,me+Ie.byteLength)),ue.push([le,Ce,Ie,"cpu"])}}finally{p.stackRestore(fe),le==="string"&&me&&p._free(me),ve||p._OrtReleaseTensor(Se)}}x&&!S&&(p._OrtClearBoundOutputs(x.handle)!==0&&Ee("Can't clear bound outputs."),Kt.set(e,[b,_,w,x,S,!1]));for(let[re,Se]of await Promise.all(ge))ue[re][2]=Se;return ue}finally{p.webnnOnRunEnd?.(b),p.stackRestore(N),O.forEach(se=>p._OrtReleaseTensor(se)),B.forEach(se=>p._OrtReleaseTensor(se)),W.forEach(se=>p._free(se)),k!==0&&p._OrtReleaseRunOptions(k),A.forEach(se=>p._free(se))}},qr=e=>{let t=Oe(),o=Kt.get(e);if(!o)throw new Error("invalid session id");let n=o[0],u=t._OrtEndProfiling(n);u===0&&Ee("Can't get an profile file name."),t._OrtFree(u)},Kr=e=>{let t=[];for(let o of e){let n=o[2];!Array.isArray(n)&&"buffer"in n&&t.push(n.buffer)}return t}});var Jt,it,$r,An,kn,In,uo,lo,ir,or,Q$,Kh,Jh,Zh,Qh,Yh,Xh,eg,co=X(()=>{"use strict";nt();vi();Ot();Nr();Jt=()=>!!ze.wasm.proxy&&typeof document<"u",$r=!1,An=!1,kn=!1,lo=new Map,ir=(e,t)=>{let o=lo.get(e);o?o.push(t):lo.set(e,[t])},or=()=>{if($r||!An||kn||!it)throw new Error("worker not ready")},Q$=e=>{switch(e.data.type){case"init-wasm":$r=!1,e.data.err?(kn=!0,uo[1](e.data.err)):(An=!0,uo[0]()),In&&(URL.revokeObjectURL(In),In=void 0);break;case"init-ep":case"copy-from":case"create":case"release":case"run":case"end-profiling":{let t=lo.get(e.data.type);e.data.err?t.shift()[1](e.data.err):t.shift()[0](e.data.out);break}default:}},Kh=async()=>{if(!An){if($r)throw new Error("multiple calls to 'initWasm()' detected.");if(kn)throw new Error("previous call to 'initWasm()' failed.");if($r=!0,Jt())return new Promise((e,t)=>{it?.terminate(),vc().then(([o,n])=>{try{it=n,it.onerror=c=>t(c),it.onmessage=Q$,uo=[e,t];let u={type:"init-wasm",in:ze};!u.in.wasm.wasmPaths&&(o||Ci)&&(u.in.wasm.wasmPaths={wasm:new URL("ort-wasm-simd-threaded.jsep.wasm",import.meta.url).href}),it.postMessage(u),In=o}catch(u){t(u)}},t)});try{await Vr(ze.wasm),await Wr(ze),An=!0}catch(e){throw kn=!0,e}finally{$r=!1}}},Jh=async e=>{if(Jt())return or(),new Promise((t,o)=>{ir("init-ep",[t,o]);let n={type:"init-ep",in:{epName:e,env:ze}};it.postMessage(n)});await Lr(ze,e)},Zh=async e=>Jt()?(or(),new Promise((t,o)=>{ir("copy-from",[t,o]);let n={type:"copy-from",in:{buffer:e}};it.postMessage(n,[e.buffer])})):mr(e),Qh=async(e,t)=>{if(Jt()){if(t?.preferredOutputLocation)throw new Error('session option "preferredOutputLocation" is not supported for proxy.');return or(),new Promise((o,n)=>{ir("create",[o,n]);let u={type:"create",in:{model:e,options:{...t}}},c=[];e instanceof Uint8Array&&c.push(e.buffer),it.postMessage(u,c)})}else return Gr(e,t)},Yh=async e=>{if(Jt())return or(),new Promise((t,o)=>{ir("release",[t,o]);let n={type:"release",in:e};it.postMessage(n)});Hr(e)},Xh=async(e,t,o,n,u,c)=>{if(Jt()){if(o.some(p=>p[3]!=="cpu"))throw new Error("input tensor on GPU is not supported for proxy.");if(u.some(p=>p))throw new Error("pre-allocated output tensor is not supported for proxy.");return or(),new Promise((p,m)=>{ir("run",[p,m]);let g=o,b={type:"run",in:{sessionId:e,inputIndices:t,inputs:g,outputIndices:n,options:c}};it.postMessage(b,Kr(g))})}else return Fr(e,t,o,n,u,c)},eg=async e=>{if(Jt())return or(),new Promise((t,o)=>{ir("end-profiling",[t,o]);let n={type:"end-profiling",in:e};it.postMessage(n)});qr(e)}});var tg,Y$,En,rg=X(()=>{"use strict";nt();co();ce();Ur();Ai();tg=(e,t)=>{switch(e.location){case"cpu":return[e.type,e.dims,e.data,"cpu"];case"gpu-buffer":return[e.type,e.dims,{gpuBuffer:e.gpuBuffer},"gpu-buffer"];case"ml-tensor":return[e.type,e.dims,{mlTensor:e.mlTensor},"ml-tensor"];default:throw new Error(`invalid data location: ${e.location} for ${t()}`)}},Y$=e=>{switch(e[3]){case"cpu":return new st(e[0],e[2],e[1]);case"gpu-buffer":{let t=e[0];if(!Yr(t))throw new Error(`not supported data type: ${t} for deserializing GPU tensor`);let{gpuBuffer:o,download:n,dispose:u}=e[2];return st.fromGpuBuffer(o,{dataType:t,dims:e[1],download:n,dispose:u})}case"ml-tensor":{let t=e[0];if(!Xr(t))throw new Error(`not supported data type: ${t} for deserializing MLTensor tensor`);let{mlTensor:o,download:n,dispose:u}=e[2];return st.fromMLTensor(o,{dataType:t,dims:e[1],download:n,dispose:u})}default:throw new Error(`invalid data location: ${e[3]}`)}},En=class{async fetchModelAndCopyToWasmMemory(t){return Zh(await yr(t))}async loadModel(t,o){Xe();let n;typeof t=="string"?n=await this.fetchModelAndCopyToWasmMemory(t):n=t,[this.sessionId,this.inputNames,this.outputNames,this.inputMetadata,this.outputMetadata]=await Qh(n,o),Ze()}async dispose(){return Yh(this.sessionId)}async run(t,o,n){Xe();let u=[],c=[];Object.entries(t).forEach(x=>{let S=x[0],C=x[1],T=this.inputNames.indexOf(S);if(T===-1)throw new Error(`invalid input '${S}'`);u.push(C),c.push(T)});let p=[],m=[];Object.entries(o).forEach(x=>{let S=x[0],C=x[1],T=this.outputNames.indexOf(S);if(T===-1)throw new Error(`invalid output '${S}'`);p.push(C),m.push(T)});let g=u.map((x,S)=>tg(x,()=>`input "${this.inputNames[c[S]]}"`)),b=p.map((x,S)=>x?tg(x,()=>`output "${this.outputNames[m[S]]}"`):null),_=await Xh(this.sessionId,c,g,m,b,n),w={};for(let x=0;x<_.length;x++)w[this.outputNames[m[x]]]=p[x]??Y$(_[x]);return Ze(),w}startProfiling(){}endProfiling(){eg(this.sessionId)}}});var ig={};Xt(ig,{OnnxruntimeWebAssemblyBackend:()=>Pn,initializeFlags:()=>ng,wasmBackend:()=>X$});var ng,Pn,X$,og=X(()=>{"use strict";nt();co();rg();ng=()=>{(typeof ze.wasm.initTimeout!="number"||ze.wasm.initTimeout<0)&&(ze.wasm.initTimeout=0);let e=ze.wasm.simd;if(typeof e!="boolean"&&e!==void 0&&e!=="fixed"&&e!=="relaxed"&&(console.warn(`Property "env.wasm.simd" is set to unknown value "${e}". Reset it to \`false\` and ignore SIMD feature checking.`),ze.wasm.simd=!1),typeof ze.wasm.proxy!="boolean"&&(ze.wasm.proxy=!1),typeof ze.wasm.trace!="boolean"&&(ze.wasm.trace=!1),typeof ze.wasm.numThreads!="number"||!Number.isInteger(ze.wasm.numThreads)||ze.wasm.numThreads<=0)if(typeof self<"u"&&!self.crossOriginIsolated)ze.wasm.numThreads=1;else{let t=typeof navigator>"u"?mi("node:os").cpus().length:navigator.hardwareConcurrency;ze.wasm.numThreads=Math.min(4,Math.ceil((t||1)/2))}},Pn=class{async init(t){ng(),await Kh(),await Jh(t)}async createInferenceSessionHandler(t,o){let n=new En;return await n.loadModel(t,o),n}},X$=new Pn});nt();nt();nt();var uc="1.23.0";var lO=wi;{let e=(og(),pr(ig)).wasmBackend;Wt("webgpu",e,5),Wt("webnn",e,5),Wt("cpu",e,10),Wt("wasm",e,10)}Object.defineProperty(ze.versions,"web",{value:uc,enumerable:!0});export{Cv as InferenceSession,Mr as TRACE,Xe as TRACE_FUNC_BEGIN,Ze as TRACE_FUNC_END,st as Tensor,lO as default,ze as env,Wt as registerBackend}; +/** + * @license + * Copyright 2021 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ +/** + * @license + * Copyright 2020 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ +/** + * @license + * Copyright 2019 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ +//# sourceMappingURL=ort.webgpu.bundle.min.mjs.map