Ex0bit Claude Opus 4.6 commited on
Commit
53772ae
·
1 Parent(s): c174d98

Add vision/multimodal support across all Qwen3.5 models

Browse files

- Vision encoder (ViT) with 2D RoPE, BF16 weights, GELU saturation fix
- Image preprocessing: CLIP normalization, smart_resize, Conv3D patch embed
- Spatial merge (2x2 → MLP → LLM hidden), mRoPE position computation
- Tested: 2B (155 tok/s), 4B (77 tok/s), 9B (54 tok/s) — zero text-only regression
- GELU NaN fix: clamp at ±10 to prevent intermediate overflow in tanh approximation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

assets/{gpu-ops-DKsrMEcC.js → gpu-ops-BbLjsC0p.js} RENAMED
@@ -1,4 +1,4 @@
1
- (function(){const a=document.createElement("link").relList;if(a&&a.supports&&a.supports("modulepreload"))return;for(const r of document.querySelectorAll('link[rel="modulepreload"]'))u(r);new MutationObserver(r=>{for(const e of r)if(e.type==="childList")for(const i of e.addedNodes)i.tagName==="LINK"&&i.rel==="modulepreload"&&u(i)}).observe(document,{childList:!0,subtree:!0});function t(r){const e={};return r.integrity&&(e.integrity=r.integrity),r.referrerPolicy&&(e.referrerPolicy=r.referrerPolicy),r.crossOrigin==="use-credentials"?e.credentials="include":r.crossOrigin==="anonymous"?e.credentials="omit":e.credentials="same-origin",e}function u(r){if(r.ep)return;r.ep=!0;const e=t(r);fetch(r.href,e)}})();class v{constructor(){this.device=null,this.adapter=null,this.adapterInfo=null,this.pipelineCache=new Map,this.bufferCache=new Map,this.bindGroupCache=new Map}async init(){if(!navigator.gpu)throw new Error("WebGPU not supported");if(this.adapter=await navigator.gpu.requestAdapter({powerPreference:"high-performance"}),!this.adapter)throw new Error("No WebGPU adapter found");this.adapterInfo=await this.adapter.requestAdapterInfo?.()??{};const a={},t={maxBufferSize:4*1024*1024*1024,maxStorageBufferBindingSize:4*1024*1024*1024,maxComputeWorkgroupStorageSize:32768,maxComputeInvocationsPerWorkgroup:256,maxComputeWorkgroupSizeX:256,maxStorageBuffersPerShaderStage:10};for(const[r,e]of Object.entries(t))this.adapter.limits[r]!==void 0&&(a[r]=Math.min(e,this.adapter.limits[r]));const u=[];return this.adapter.features.has("shader-f16")&&u.push("shader-f16"),this.adapter.features.has("subgroups")&&u.push("subgroups"),this.device=await this.adapter.requestDevice({requiredLimits:a,requiredFeatures:u}),this.hasF16=this.device.features.has("shader-f16"),this.hasSubgroups=this.device.features.has("subgroups"),this.device.lost.then(r=>console.error("WebGPU device lost:",r)),this}createBuffer(a,t,u=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST){const r=Math.ceil(t/4)*4,e=this.device.createBuffer({size:r,usage:u,label:a});return this.bufferCache.set(a,e),e}createBufferFromData(a,t,u=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC){const r=this.createBuffer(a,t.byteLength,u);return this.device.queue.writeBuffer(r,0,t),r}createReadbackBuffer(a,t){const u=Math.ceil(t/4)*4;return this.device.createBuffer({size:u,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:a+"_readback"})}getOrCreatePipeline(a,t,u="main"){if(this.pipelineCache.has(a))return this.pipelineCache.get(a);const r=this.device.createShaderModule({code:t,label:a}),e=this.device.createComputePipeline({layout:"auto",compute:{module:r,entryPoint:u},label:a});return this.pipelineCache.set(a,e),e}initTimestamps(){this.device.features.has("timestamp-query")&&(this._tsQuerySet=this.device.createQuerySet({type:"timestamp",count:2}),this._tsResolveBuf=this.device.createBuffer({size:16,usage:GPUBufferUsage.QUERY_RESOLVE|GPUBufferUsage.COPY_SRC}),this._tsReadBuf=this.device.createBuffer({size:16,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST}),this._tsEnabled=!0,this._tsResults=[])}beginBatch(){this._encoder=this.device.createCommandEncoder(),this._passCount=0,this.singlePassMode&&(this._singlePass=this._encoder.beginComputePass()),this._tsEnabled&&this._encoder.beginComputePass({timestampWrites:{querySet:this._tsQuerySet,beginningOfPassWriteIndex:0}}).end()}endBatch(){this._singlePass&&(this._singlePass.end(),this._singlePass=null),this._tsEnabled&&this._encoder&&(this._encoder.beginComputePass({timestampWrites:{querySet:this._tsQuerySet,beginningOfPassWriteIndex:1}}).end(),this._encoder.resolveQuerySet(this._tsQuerySet,0,2,this._tsResolveBuf,0),this._encoder.copyBufferToBuffer(this._tsResolveBuf,0,this._tsReadBuf,0,16)),this._encoder&&(this.device.queue.submit([this._encoder.finish()]),this._encoder=null)}async readTimestamp(){if(!this._tsEnabled)return null;await this._tsReadBuf.mapAsync(GPUMapMode.READ);const a=new BigInt64Array(this._tsReadBuf.getMappedRange().slice(0));this._tsReadBuf.unmap();const u=Number(a[1]-a[0])/1e6;return this._tsResults.push(u),u}copyBuffer(a,t,u,r=0,e=0){if(this._singlePass){this._singlePass.end(),this._encoder.copyBufferToBuffer(a,r,t,e,u),this._singlePass=this._encoder.beginComputePass();return}const i=this._encoder||this.device.createCommandEncoder();i.copyBufferToBuffer(a,r,t,e,u),this._encoder||this.device.queue.submit([i.finish()])}startRecording(){this._recording=[]}stopRecording(){const a=this._recording;return this._recording=null,a}replay(a,t){if(t)for(const r of t)this.device.queue.writeBuffer(r.buffer,r.offset,r.data,r.dataOffset,r.size);const u=this._encoder;for(let r=0;r<a.length;r++){const e=a[r];if(e.multi){const i=u.beginComputePass(),s=e.ops;for(let o=0;o<s.length;o++){const n=s[o];i.setPipeline(n.pipeline),i.setBindGroup(0,n.bindGroup),i.dispatchWorkgroups(n.wgX,n.wgY)}i.end()}else{const i=u.beginComputePass();i.setPipeline(e.pipeline),i.setBindGroup(0,e.bindGroup),i.dispatchWorkgroups(e.wgX,e.wgY),i.end()}}}dispatch(a,t,u,r=1,e=1){if(this._recording&&this._recording.push({pipeline:a,bindGroup:t[0],wgX:u,wgY:r}),this._singlePass){const o=this._singlePass;this._passCount!==void 0&&this._passCount++,o.setPipeline(a);for(let n=0;n<t.length;n++)o.setBindGroup(n,t[n]);o.dispatchWorkgroups(u,r,e);return}const i=this._encoder||this.device.createCommandEncoder(),s=i.beginComputePass();this._passCount!==void 0&&this._passCount++,s.setPipeline(a);for(let o=0;o<t.length;o++)s.setBindGroup(o,t[o]);s.dispatchWorkgroups(u,r,e),s.end(),this._encoder||this.device.queue.submit([i.finish()])}dispatchMulti(a){if(this._recording&&this._recording.push({multi:!0,ops:a.map(r=>({pipeline:r.pipeline,bindGroup:r.bindGroups[0],wgX:r.workgroupsX,wgY:r.workgroupsY||1}))}),this._singlePass){this._passCount!==void 0&&this._passCount++;for(const r of a){this._singlePass.setPipeline(r.pipeline);for(let e=0;e<r.bindGroups.length;e++)this._singlePass.setBindGroup(e,r.bindGroups[e]);this._singlePass.dispatchWorkgroups(r.workgroupsX,r.workgroupsY||1,r.workgroupsZ||1)}return}const t=this._encoder||this.device.createCommandEncoder(),u=t.beginComputePass();this._passCount!==void 0&&this._passCount++;for(const r of a){u.setPipeline(r.pipeline);for(let e=0;e<r.bindGroups.length;e++)u.setBindGroup(e,r.bindGroups[e]);u.dispatchWorkgroups(r.workgroupsX,r.workgroupsY||1,r.workgroupsZ||1)}u.end(),this._encoder||this.device.queue.submit([t.finish()])}async readBuffer(a,t){const u=this.createReadbackBuffer("_readback",t),r=this.device.createCommandEncoder();r.copyBufferToBuffer(a,0,u,0,t),this.device.queue.submit([r.finish()]),await u.mapAsync(GPUMapMode.READ);const e=new Float32Array(u.getMappedRange().slice(0));return u.unmap(),u.destroy(),e}createBindGroup(a,t,u){return this.device.createBindGroup({layout:a.getBindGroupLayout(t),entries:u.map((r,e)=>({binding:e,resource:{buffer:r}}))})}createBindGroupWithOffsets(a,t,u){return this.device.createBindGroup({layout:a.getBindGroupLayout(t),entries:u.map((r,e)=>({binding:e,resource:r.buffer?{buffer:r.buffer,offset:r.offset||0,size:r.size}:{buffer:r}}))})}getCachedBindGroup(a,t,u,r){let e=this.bindGroupCache.get(a);return e||(e=this.createBindGroup(t,u,r),this.bindGroupCache.set(a,e)),e}destroy(){for(const a of this.bufferCache.values())a.destroy();this.bufferCache.clear(),this.bindGroupCache.clear(),this.device?.destroy()}}const Hr=Object.freeze(Object.defineProperty({__proto__:null,GPUContext:v},Symbol.toStringTag,{value:"Module"})),m="modulepreload",k=function(p,a){return new URL(p,a).href},w={},Mr=function(a,t,u){let r=Promise.resolve();if(t&&t.length>0){let n=function(g){return Promise.all(g.map(l=>Promise.resolve(l).then(f=>({status:"fulfilled",value:f}),f=>({status:"rejected",reason:f}))))};const i=document.getElementsByTagName("link"),s=document.querySelector("meta[property=csp-nonce]"),o=s?.nonce||s?.getAttribute("nonce");r=n(t.map(g=>{if(g=k(g,u),g in w)return;w[g]=!0;const l=g.endsWith(".css"),f=l?'[rel="stylesheet"]':"";if(u)for(let _=i.length-1;_>=0;_--){const c=i[_];if(c.href===g&&(!l||c.rel==="stylesheet"))return}else if(document.querySelector(`link[href="${g}"]${f}`))return;const d=document.createElement("link");if(d.rel=l?"stylesheet":m,l||(d.as="script"),d.crossOrigin="",d.href=g,o&&d.setAttribute("nonce",o),document.head.appendChild(d),l)return new Promise((_,c)=>{d.addEventListener("load",_),d.addEventListener("error",()=>c(new Error(`Unable to preload CSS for ${g}`)))})}))}function e(i){const s=new Event("vite:preloadError",{cancelable:!0});if(s.payload=i,window.dispatchEvent(s),!s.defaultPrevented)throw i}return r.then(i=>{for(const s of i||[])s.status==="rejected"&&e(s.reason);return a().catch(e)})},h=`
2
  struct Params { K: u32, N: u32, group_size: u32, }
3
 
4
  @group(0) @binding(0) var<storage, read> input: array<f32>;
@@ -49,7 +49,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
49
  }
50
 
51
  output[col] = sum;
52
- }`,x=`
53
  enable f16;
54
 
55
  struct Params { K: u32, N: u32, group_size: u32, }
@@ -103,7 +103,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
103
  }
104
 
105
  output[col] = sum;
106
- }`,q=`
107
  struct Params { K: u32, N: u32, group_size: u32, num_splits: u32, }
108
 
109
  @group(0) @binding(0) var<storage, read> input: array<f32>;
@@ -160,7 +160,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u, @builtin(workgroup_id) wgid:
160
  }
161
 
162
  partials[split_id * N + col] = sum;
163
- }`,y=`
164
  struct Params { N: u32, num_splits: u32, }
165
 
166
  @group(0) @binding(0) var<storage, read> partials: array<f32>;
@@ -176,7 +176,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
176
  sum += partials[s * params.N + col];
177
  }
178
  output[col] = sum;
179
- }`,F=`
180
  struct Params { K: u32, N: u32, }
181
 
182
  @group(0) @binding(0) var<storage, read> input: array<f32>;
@@ -203,7 +203,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
203
  sum += input[k] * w0 + input[k + 1u] * w1 + input[k + 2u] * w2 + input[k + 3u] * w3;
204
  }
205
  output[col] = sum;
206
- }`,N=`
207
  struct Params { N: u32, eps: f32, }
208
 
209
  @group(0) @binding(0) var<storage, read> input: array<f32>;
@@ -244,7 +244,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u) {
244
  let w = unpack_bf16(weight[i / 2u], i % 2u);
245
  output[i] = input[i] * rms * (1.0 + w);
246
  }
247
- }`,P=`
248
  struct Params { N: u32, }
249
 
250
  @group(0) @binding(0) var<storage, read> gate: array<f32>;
@@ -258,7 +258,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
258
  if (i >= params.N) { return; }
259
  let x = gate[i];
260
  output[i] = (x / (1.0 + exp(-x))) * up[i];
261
- }`,S=`
262
  struct Params { N: u32, }
263
 
264
  @group(0) @binding(0) var<storage, read_write> a: array<f32>;
@@ -270,7 +270,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
270
  let i = gid.x;
271
  if (i >= params.N) { return; }
272
  a[i] = a[i] + b[i];
273
- }`,E=`
274
  struct Params { N: u32, num_heads: u32, head_dim: u32, }
275
 
276
  @group(0) @binding(0) var<storage, read> src: array<f32>;
@@ -287,7 +287,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
287
  let d = i % hd;
288
  dst_a[i] = src[head * hd * 2u + d];
289
  dst_b[i] = src[head * hd * 2u + hd + d];
290
- }`,R=`
291
  struct Params { N: u32, }
292
 
293
  @group(0) @binding(0) var<storage, read> x: array<f32>;
@@ -300,7 +300,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
300
  let i = gid.x;
301
  if (i >= params.N) { return; }
302
  output[i] = x[i] / (1.0 + exp(-gate[i]));
303
- }`,K=`
304
  struct Params { token_id: u32, dim: u32, }
305
 
306
  @group(0) @binding(0) var<storage, read> embeddings: array<u32>;
@@ -318,7 +318,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
318
  if (i >= params.dim) { return; }
319
  let flat = params.token_id * params.dim + i;
320
  output[i] = unpack_bf16(embeddings[flat / 2u], flat % 2u);
321
- }`,A=`
322
  struct ArgmaxResult { idx: u32, val: f32, }
323
  struct Params { dim: u32, }
324
 
@@ -338,7 +338,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
338
  if (i >= params.dim) { return; }
339
  let flat = argmax_result.idx * params.dim + i;
340
  output[i] = unpack_bf16(embeddings[flat / 2u], flat % 2u);
341
- }`,B=`
342
  struct Params { N: u32, }
343
  struct Result { idx: u32, val: f32, }
344
 
@@ -381,7 +381,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u) {
381
  result.idx = s_idx[0];
382
  result.val = s_val[0];
383
  }
384
- }`,D=`
385
  struct Params { N: u32, }
386
 
387
  @group(0) @binding(0) var<storage, read> logits: array<f32>;
@@ -406,7 +406,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u) {
406
  // Output (idx, val) pair — 256 candidates total
407
  result[tid * 2u] = best_idx;
408
  result[tid * 2u + 1u] = bitcast<u32>(best_val);
409
- }`,z=`
410
  struct ArgmaxResult { idx: u32, val: f32, }
411
  struct Params { recent_count: u32, history_slot: u32, }
412
 
@@ -420,7 +420,7 @@ fn main() {
420
  let tok = argmax_result.idx;
421
  recent_tokens[params.recent_count] = tok;
422
  token_history[params.history_slot] = tok;
423
- }`,T=`
424
  struct Params { position: u32, num_kv_heads: u32, head_dim: u32, }
425
 
426
  @group(0) @binding(0) var<storage, read> k_proj: array<f32>;
@@ -437,7 +437,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
437
  let offset = params.position * total + i;
438
  k_cache[offset] = k_proj[i];
439
  v_cache[offset] = v_proj[i];
440
- }`,H=`
441
  struct Params {
442
  seq_len: u32,
443
  head_dim: u32,
@@ -531,7 +531,7 @@ fn main(@builtin(workgroup_id) wg: vec3u, @builtin(local_invocation_id) lid: vec
531
  output[base + hd + 1u] = running_max;
532
  }
533
  }
534
- }`,M=`
535
  struct Params {
536
  head_dim: u32,
537
  num_splits: u32,
@@ -583,7 +583,7 @@ fn main(@builtin(workgroup_id) wg: vec3u, @builtin(local_invocation_id) lid: vec
583
  }
584
 
585
  output[h * hd + tid] = acc / ws;
586
- }`,G=`
587
  struct Params {
588
  num_heads: u32,
589
  key_dim: u32,
@@ -669,7 +669,7 @@ fn main(@builtin(workgroup_id) wg: vec3u, @builtin(local_invocation_id) lid: vec
669
  }
670
  output[h * vd + vi] = o_val;
671
  }
672
- }`,O=`
673
  struct Params { num_heads: u32, head_dim: u32, eps: f32, }
674
 
675
  @group(0) @binding(0) var<storage, read_write> x: array<f32>;
@@ -749,7 +749,7 @@ fn main(@builtin(workgroup_id) wg: vec3u, @builtin(local_invocation_id) lid: vec
749
  let w = unpack_bf16(weight[i / 2u], i % 2u);
750
  x[off + i] = x[off + i] * rms * w;
751
  }
752
- }`,I=`
753
  struct Params { channels: u32, }
754
 
755
  @group(0) @binding(0) var<storage, read_write> x: array<f32>;
@@ -785,7 +785,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
785
  hist[c] = h1;
786
  hist[ch + c] = h2;
787
  hist[2u * ch + c] = cur;
788
- }`,C=`
789
  struct Params {
790
  num_heads: u32,
791
  key_dim: u32,
@@ -1002,7 +1002,7 @@ fn main(@builtin(workgroup_id) wg: vec3u, @builtin(local_invocation_id) lid: vec
1002
  }
1003
  workgroupBarrier();
1004
  }
1005
- }`,L=`
1006
  struct Params { K: u32, N: u32, group_size: u32, }
1007
 
1008
  @group(0) @binding(0) var<storage, read> input: array<f32>;
@@ -1056,7 +1056,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
1056
  }
1057
 
1058
  output[col] = (gate_sum / (1.0 + exp(-gate_sum))) * up_sum;
1059
- }`,Q=`
1060
  enable f16;
1061
 
1062
  struct Params { K: u32, N: u32, group_size: u32, }
@@ -1113,7 +1113,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
1113
  }
1114
 
1115
  output[col] = (gate_sum / (1.0 + exp(-gate_sum))) * up_sum;
1116
- }`,j=`
1117
  struct Params { K: u32, N: u32, group_size: u32, }
1118
 
1119
  @group(0) @binding(0) var<storage, read> input: array<f32>;
@@ -1185,7 +1185,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u,
1185
  let u = partial[32u+tid] + partial[32u+tid+1u] + partial[32u+tid+2u] + partial[32u+tid+3u];
1186
  output[col] = (g / (1.0 + exp(-g))) * u;
1187
  }
1188
- }`,V=`
1189
  enable f16;
1190
 
1191
  struct Params { K: u32, N: u32, group_size: u32, }
@@ -1260,7 +1260,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u,
1260
  let u = partial[32u+tid] + partial[32u+tid+1u] + partial[32u+tid+2u] + partial[32u+tid+3u];
1261
  output[col] = (g / (1.0 + exp(-g))) * u;
1262
  }
1263
- }`,W=`
1264
  struct Params { N: u32, eps: f32, }
1265
 
1266
  @group(0) @binding(0) var<storage, read_write> hidden: array<f32>;
@@ -1304,7 +1304,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u) {
1304
  let w = unpack_bf16(weight[i / 2u], i % 2u);
1305
  output[i] = hidden[i] * rms * (1.0 + w);
1306
  }
1307
- }`,Y=`
1308
  struct Params { N: u32, eps: f32, }
1309
 
1310
  @group(0) @binding(0) var<storage, read> hidden: array<f32>;
@@ -1346,7 +1346,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u) {
1346
  let w = unpack_bf16(weight[i / 2u], i % 2u);
1347
  output[i] = wg_vals[i] * rms * (1.0 + w);
1348
  }
1349
- }`,X=`
1350
  struct Params { N: u32, eps: f32, }
1351
 
1352
  @group(0) @binding(0) var<storage, read> input_a: array<f32>;
@@ -1391,7 +1391,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u) {
1391
  let w = unpack_bf16(weight[i / 2u], i % 2u);
1392
  normed[i] = wg_vals[i] * rms * (1.0 + w);
1393
  }
1394
- }`,$=`
1395
  struct Params { K: u32, N: u32, group_size: u32, }
1396
 
1397
  @group(0) @binding(0) var<storage, read> x: array<f32>;
@@ -1437,7 +1437,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
1437
  }
1438
 
1439
  output[col] = sum;
1440
- }`,Z=`
1441
  enable f16;
1442
 
1443
  struct Params { K: u32, N: u32, group_size: u32, }
@@ -1486,7 +1486,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
1486
  }
1487
 
1488
  output[col] = sum;
1489
- }`,J=`
1490
  struct Params { K: u32, N: u32, group_size: u32, }
1491
 
1492
  @group(0) @binding(0) var<storage, read> a: array<f32>;
@@ -1532,7 +1532,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
1532
  }
1533
 
1534
  output[col] = sum;
1535
- }`,rr=`
1536
  enable f16;
1537
 
1538
  struct Params { K: u32, N: u32, group_size: u32, }
@@ -1581,7 +1581,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
1581
  }
1582
 
1583
  output[col] = sum;
1584
- }`,ar=`
1585
  struct Params { K: u32, N: u32, group_size: u32, }
1586
 
1587
  @group(0) @binding(0) var<storage, read> a: array<f32>;
@@ -1635,7 +1635,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
1635
  if (lane == 0u && col < N) {
1636
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
1637
  }
1638
- }`,er=`
1639
  enable f16;
1640
 
1641
  struct Params { K: u32, N: u32, group_size: u32, }
@@ -1692,7 +1692,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
1692
  if (lane == 0u && col < N) {
1693
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
1694
  }
1695
- }`,ur=`
1696
  struct Params { K: u32, N: u32, group_size: u32, }
1697
 
1698
  @group(0) @binding(0) var<storage, read> x: array<f32>;
@@ -1746,7 +1746,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
1746
  if (lane == 0u && col < N) {
1747
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
1748
  }
1749
- }`,tr=`
1750
  enable f16;
1751
 
1752
  struct Params { K: u32, N: u32, group_size: u32, }
@@ -1803,7 +1803,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
1803
  if (lane == 0u && col < N) {
1804
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
1805
  }
1806
- }`,ir=`
1807
  struct Params {
1808
  K: u32, // hidden_size
1809
  N: u32, // intermediate_size
@@ -1914,7 +1914,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u,
1914
  }
1915
 
1916
  output[col] = (gate_sum / (1.0 + exp(-gate_sum))) * up_sum;
1917
- }`,sr=`
1918
  struct Params {
1919
  K: u32,
1920
  N: u32,
@@ -2002,7 +2002,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u,
2002
  }
2003
 
2004
  output[col] = sum;
2005
- }`,or=`
2006
  struct Params {
2007
  K: u32,
2008
  N: u32,
@@ -2087,7 +2087,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u,
2087
  }
2088
 
2089
  output[col] = sum;
2090
- }`,nr=`
2091
  struct Params {
2092
  K: u32, N: u32, group_size: u32, eps: f32,
2093
  norm_weight: array<vec4<u32>, 640>,
@@ -2169,7 +2169,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u,
2169
  }
2170
  }
2171
  output[col] = sum;
2172
- }`,gr=`
2173
  struct Params {
2174
  K: u32, N: u32, group_size: u32, eps: f32,
2175
  norm_weight: array<vec4<u32>, 640>,
@@ -2244,7 +2244,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u,
2244
  }
2245
  }
2246
  output[col] = sum;
2247
- }`,dr=`
2248
  struct Params { num_tokens: u32, penalty: f32, presence: f32, _pad: u32, }
2249
 
2250
  @group(0) @binding(0) var<storage, read_write> logits: array<f32>;
@@ -2268,7 +2268,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
2268
  // Presence penalty (additive)
2269
  val -= params.presence;
2270
  logits[tok] = val;
2271
- }`,lr=`
2272
  struct Params {
2273
  temperature: f32,
2274
  top_k: u32,
@@ -2404,8 +2404,8 @@ fn main(@builtin(local_invocation_id) lid: vec3u) {
2404
  result.idx = wg_idx[selected];
2405
  result.val = wg_val[selected];
2406
  }
2407
- }`;function b(p=320,a=1e7,t=33,u=30,r=128){return`
2408
- const ROPE_THETA: f32 = ${a};
2409
  const MROPE_S1_LIMIT: u32 = ${t}u;
2410
  const MROPE_S2_LIMIT: u32 = ${u}u;
2411
  const PARTIAL_DIM: u32 = ${r}u;
@@ -2421,7 +2421,7 @@ struct Params {
2421
  position_w: u32, // mRoPE width dimension (= position for text tokens)
2422
  // Packed BF16 norm weights: [Q norm weights | K norm weights]
2423
  // Q: numHeads*headDim BF16 values, K: numKVHeads*headDim BF16 values
2424
- qk_norm_weight: array<vec4<u32>, ${p}>,
2425
  }
2426
 
2427
  @group(0) @binding(0) var<storage, read> q_proj_full: array<f32>;
@@ -2562,7 +2562,7 @@ fn main(@builtin(workgroup_id) wg: vec3u, @builtin(local_invocation_id) lid: vec
2562
  v_cache[cache_off + i] = v_proj[off + i];
2563
  }
2564
  }
2565
- }`}const pr=b(320),fr=`
2566
  struct Params { K: u32, N: u32, group_size: u32, }
2567
 
2568
  @group(0) @binding(0) var<storage, read> input: array<f32>;
@@ -2630,7 +2630,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
2630
  if (lane == 0u && col < N) {
2631
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
2632
  }
2633
- }`,_r=`
2634
  enable f16;
2635
 
2636
  struct Params { K: u32, N: u32, group_size: u32, }
@@ -2698,7 +2698,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
2698
  if (lane == 0u && col < N) {
2699
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
2700
  }
2701
- }`,cr=`
2702
  enable f16;
2703
  enable subgroups;
2704
 
@@ -2774,7 +2774,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
2774
  if (lane == 0u && col < N) {
2775
  partials[split_id * N + col] = s;
2776
  }
2777
- }`,wr=`
2778
  struct Params { H_half: u32, num_tokens: u32, }
2779
  @group(0) @binding(0) var<storage, read> src: array<u32>;
2780
  @group(0) @binding(1) var<storage, read> indices: array<u32>;
@@ -2789,7 +2789,7 @@ fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
2789
  let src_row = indices[row_out];
2790
  dst[row_out * p.H_half + col] = src[src_row * p.H_half + col];
2791
  }
2792
- `,br=`
2793
  struct Params { K: u32, N: u32, group_size: u32, }
2794
 
2795
  @group(0) @binding(0) var<storage, read> bf16_weight: array<u32>;
@@ -2853,7 +2853,7 @@ fn main(@builtin(workgroup_id) wg_id: vec3u,
2853
  qweight[packed_row * N + col] = packed;
2854
  }
2855
  }
2856
- }`,vr=`
2857
  struct Params { count: u32, }
2858
 
2859
  @group(0) @binding(0) var<storage, read> input: array<f32>;
@@ -2867,7 +2867,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
2867
  let a = input[idx * 2u];
2868
  let b = input[idx * 2u + 1u];
2869
  output[idx] = pack2x16float(vec2<f32>(a, b));
2870
- }`,mr=`
2871
  struct Params { N: u32, eps: f32, }
2872
 
2873
  @group(0) @binding(0) var<storage, read_write> hidden: array<f32>;
@@ -2909,7 +2909,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
2909
  let w = unpack_bf16(weight[i / 2u], i % 2u);
2910
  output[off + i] = hidden[off + i] * rms * (1.0 + w);
2911
  }
2912
- }`,kr=`
2913
  struct Params { N: u32, eps: f32, }
2914
 
2915
  @group(0) @binding(0) var<storage, read> hidden: array<f32>;
@@ -2952,7 +2952,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
2952
  let w = unpack_bf16(weight[i / 2u], i % 2u);
2953
  output[off + i] = wg_vals[i] * rms * (1.0 + w);
2954
  }
2955
- }`,hr=`
2956
  struct Params { N: u32, eps: f32, }
2957
 
2958
  @group(0) @binding(0) var<storage, read> input_a: array<f32>;
@@ -2998,7 +2998,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
2998
  let w = unpack_bf16(weight[i / 2u], i % 2u);
2999
  normed[off + i] = wg_vals[i] * rms * (1.0 + w);
3000
  }
3001
- }`,xr=`
3002
  enable f16;
3003
 
3004
  struct Params { K: u32, N: u32, group_size: u32, }
@@ -3060,7 +3060,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
3060
 
3061
  output[col] = (gate0 / (1.0 + exp(-gate0))) * up0;
3062
  output[N + col] = (gate1 / (1.0 + exp(-gate1))) * up1;
3063
- }`,qr=`
3064
  enable f16;
3065
 
3066
  struct Params { K: u32, N: u32, group_size: u32, }
@@ -3113,7 +3113,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
3113
 
3114
  output[col] = sum0;
3115
  output[N + col] = sum1;
3116
- }`,yr=`
3117
  enable f16;
3118
 
3119
  struct Params { K: u32, N: u32, group_size: u32, }
@@ -3166,7 +3166,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
3166
 
3167
  output[col] = sum0;
3168
  output[N + col] = sum1;
3169
- }`,Fr=`
3170
  struct Params { K: u32, N: u32, group_size: u32, }
3171
 
3172
  @group(0) @binding(0) var<storage, read> input: array<f32>;
@@ -3221,7 +3221,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
3221
 
3222
  output[col] = sum0;
3223
  output[N + col] = sum1;
3224
- }`,Nr=`
3225
  enable f16;
3226
 
3227
  struct Params { K: u32, N: u32, group_size: u32, }
@@ -3279,7 +3279,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
3279
 
3280
  output[col] = sum0;
3281
  output[N + col] = sum1;
3282
- }`,Pr=`
3283
  enable f16;
3284
 
3285
  struct Params { K: u32, N: u32, group_size: u32, }
@@ -3354,7 +3354,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
3354
  output[col] = partial0[tid] + partial0[tid+1u] + partial0[tid+2u] + partial0[tid+3u];
3355
  output[N + col] = partial1[tid] + partial1[tid+1u] + partial1[tid+2u] + partial1[tid+3u];
3356
  }
3357
- }`,Sr=`
3358
  struct Params { N: u32, eps: f32, }
3359
 
3360
  @group(0) @binding(0) var<storage, read> input: array<f32>;
@@ -3411,7 +3411,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
3411
  let b = unpack_bf16(bias[i >> 1u], i);
3412
  output[base + i] = normalized * w + b;
3413
  }
3414
- }`,Er=`
3415
  struct Params { K: u32, N: u32, }
3416
 
3417
  @group(0) @binding(0) var<storage, read> input: array<f32>;
@@ -3447,7 +3447,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
3447
  let bp = bias[col >> 1u];
3448
  let b = bitcast<f32>(select((bp & 0xFFFFu) << 16u, bp & 0xFFFF0000u, (col & 1u) == 1u));
3449
  output[token * N + col] = sum + b;
3450
- }`,Rr=`
3451
  @group(0) @binding(0) var<storage, read_write> data: array<f32>;
3452
  @group(0) @binding(1) var<uniform> len: u32;
3453
 
@@ -3459,9 +3459,13 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
3459
  let i = gid.x;
3460
  if (i >= len) { return; }
3461
  let x = data[i];
 
 
 
 
3462
  let inner = SQRT_2_OVER_PI * (x + COEFF * x * x * x);
3463
  data[i] = 0.5 * x * (1.0 + tanh(inner));
3464
- }`,Kr=`
3465
  @group(0) @binding(0) var<storage, read_write> data: array<f32>;
3466
  @group(0) @binding(1) var<uniform> len: u32;
3467
 
@@ -3473,9 +3477,11 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
3473
  let i = gid.x;
3474
  if (i >= len) { return; }
3475
  let x = data[i];
 
 
3476
  let inner = SQRT_2_OVER_PI * (x + COEFF * x * x * x);
3477
  data[i] = 0.5 * x * (1.0 + tanh(inner));
3478
- }`,Ar=`
3479
  struct Params { seq_len: u32, num_heads: u32, head_dim: u32, }
3480
 
3481
  @group(0) @binding(0) var<storage, read> q_in: array<f32>;
@@ -3509,7 +3515,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
3509
 
3510
  q_out[idx] = q_in[idx] * c + sign * q_in[partner_idx] * s;
3511
  k_out[idx] = k_in[idx] * c + sign * k_in[partner_idx] * s;
3512
- }`,Br=`
3513
  struct Params { seq_len: u32, num_heads: u32, head_dim: u32, scale: f32, }
3514
 
3515
  @group(0) @binding(0) var<storage, read> q: array<f32>;
@@ -3591,7 +3597,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
3591
  output[out_base + d] = acc[d] * inv_sum;
3592
  }
3593
  }
3594
- }`,Dr=`
3595
  @group(0) @binding(0) var<storage, read_write> a: array<f32>;
3596
  @group(0) @binding(1) var<storage, read> b: array<f32>;
3597
  @group(0) @binding(2) var<uniform> len: u32;
@@ -3601,7 +3607,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
3601
  let i = gid.x;
3602
  if (i >= len) { return; }
3603
  a[i] += b[i];
3604
- }`,zr=`
3605
  struct Params { H: u32, }
3606
 
3607
  @group(0) @binding(0) var<storage, read> vision: array<f32>;
@@ -3619,4 +3625,4 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
3619
  for (var i = tid; i < H; i += 256u) {
3620
  embeds[pos * H + i] = vision[vit_idx * H + i];
3621
  }
3622
- }`,Tr={gptq_matvec:h,gptq_matvec_f16:x,gptq_matvec_4t:fr,gptq_matvec_4t_f16:_r,gptq_splitk:q,reduce_splitk:y,bf16_matvec:F,rmsnorm:N,silu_mul:P,add:S,embedding:K,embed_from_argmax:A,argmax:B,topk_extract:D,kv_cache_store:T,gqa_attention_head:H,gqa_reduce:M,deltanet_recurrent:G,head_rmsnorm:O,head_rmsnorm_nogated:U,causal_conv1d:I,split:E,sigmoid_mul:R,fused_gate_up_silu:L,fused_gate_up_silu_f16:Q,fused_gate_up_silu_4t:j,fused_gate_up_silu_4t_f16:V,add_rmsnorm:W,add_rmsnorm_ro:Y,three_way_add_rmsnorm:X,norm_gptq_lite:nr,norm_gptq_lite_noadd:gr,fused_sigmoid_gptq:$,fused_sigmoid_gptq_f16:Z,fused_sigmoid_gptq_4t:ur,fused_sigmoid_gptq_4t_f16:tr,fused_silu_gptq:J,fused_silu_gptq_f16:rr,fused_silu_gptq_4t:ar,fused_silu_gptq_4t_f16:er,fused_addnorm_gate_up_silu:ir,rep_penalty:dr,gpu_sample:lr,append_token:z,fused_norm_gptq:sr,fused_norm_gptq_noadd:or,fused_conv_deltanet_norm:C,fused_split_qknorm_kvstore:pr,gptq_matvec_4t_f16_sk:cr,gather_rows_bf16:wr,quantize_bf16_to_int4:br,pack_f32_to_f16_pairs:vr,gptq_matmul_b2:Fr,gptq_matmul_b2_f16:Nr,gptq_matmul_b2_4t_f16:Pr,add_rmsnorm_b2:mr,add_rmsnorm_ro_b2:kr,three_way_add_rmsnorm_b2:hr,fused_gate_up_silu_b2_f16:xr,fused_silu_gptq_b2_f16:qr,fused_sigmoid_gptq_b2_f16:yr,vit_layernorm:Sr,vit_bf16_matvec_bias:Er,vit_gelu_tanh:Rr,vit_gelu:Kr,vit_rope:Ar,vit_attention:Br,vit_add:Dr,vit_scatter_embed:zr},Gr=Object.freeze(Object.defineProperty({__proto__:null,SHADERS:Tr,SHADER_FUSED_SPLIT_QKNORM_KVSTORE_FN:b},Symbol.toStringTag,{value:"Module"}));export{v as G,Tr as S,Mr as _,b as a,Gr as b,Hr as g};
 
1
+ (function(){const e=document.createElement("link").relList;if(e&&e.supports&&e.supports("modulepreload"))return;for(const r of document.querySelectorAll('link[rel="modulepreload"]'))u(r);new MutationObserver(r=>{for(const a of r)if(a.type==="childList")for(const i of a.addedNodes)i.tagName==="LINK"&&i.rel==="modulepreload"&&u(i)}).observe(document,{childList:!0,subtree:!0});function t(r){const a={};return r.integrity&&(a.integrity=r.integrity),r.referrerPolicy&&(a.referrerPolicy=r.referrerPolicy),r.crossOrigin==="use-credentials"?a.credentials="include":r.crossOrigin==="anonymous"?a.credentials="omit":a.credentials="same-origin",a}function u(r){if(r.ep)return;r.ep=!0;const a=t(r);fetch(r.href,a)}})();class b{constructor(){this.device=null,this.adapter=null,this.adapterInfo=null,this.pipelineCache=new Map,this.bufferCache=new Map,this.bindGroupCache=new Map}async init(){if(!navigator.gpu)throw new Error("WebGPU not supported");if(this.adapter=await navigator.gpu.requestAdapter({powerPreference:"high-performance"}),!this.adapter)throw new Error("No WebGPU adapter found");this.adapterInfo=await this.adapter.requestAdapterInfo?.()??{};const e={},t={maxBufferSize:4*1024*1024*1024,maxStorageBufferBindingSize:4*1024*1024*1024,maxComputeWorkgroupStorageSize:32768,maxComputeInvocationsPerWorkgroup:256,maxComputeWorkgroupSizeX:256,maxStorageBuffersPerShaderStage:10};for(const[r,a]of Object.entries(t))this.adapter.limits[r]!==void 0&&(e[r]=Math.min(a,this.adapter.limits[r]));const u=[];return this.adapter.features.has("shader-f16")&&u.push("shader-f16"),this.adapter.features.has("subgroups")&&u.push("subgroups"),this.device=await this.adapter.requestDevice({requiredLimits:e,requiredFeatures:u}),this.hasF16=this.device.features.has("shader-f16"),this.hasSubgroups=this.device.features.has("subgroups"),this.device.lost.then(r=>console.error("WebGPU device lost:",r)),this}createBuffer(e,t,u=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST){const r=Math.ceil(t/4)*4,a=this.device.createBuffer({size:r,usage:u,label:e});return this.bufferCache.set(e,a),a}createBufferFromData(e,t,u=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC){const r=this.createBuffer(e,t.byteLength,u);return this.device.queue.writeBuffer(r,0,t),r}createReadbackBuffer(e,t){const u=Math.ceil(t/4)*4;return this.device.createBuffer({size:u,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:e+"_readback"})}getOrCreatePipeline(e,t,u="main"){if(this.pipelineCache.has(e))return this.pipelineCache.get(e);const r=this.device.createShaderModule({code:t,label:e}),a=this.device.createComputePipeline({layout:"auto",compute:{module:r,entryPoint:u},label:e});return this.pipelineCache.set(e,a),a}initTimestamps(){this.device.features.has("timestamp-query")&&(this._tsQuerySet=this.device.createQuerySet({type:"timestamp",count:2}),this._tsResolveBuf=this.device.createBuffer({size:16,usage:GPUBufferUsage.QUERY_RESOLVE|GPUBufferUsage.COPY_SRC}),this._tsReadBuf=this.device.createBuffer({size:16,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST}),this._tsEnabled=!0,this._tsResults=[])}beginBatch(){this._encoder=this.device.createCommandEncoder(),this._passCount=0,this.singlePassMode&&(this._singlePass=this._encoder.beginComputePass()),this._tsEnabled&&this._encoder.beginComputePass({timestampWrites:{querySet:this._tsQuerySet,beginningOfPassWriteIndex:0}}).end()}endBatch(){this._singlePass&&(this._singlePass.end(),this._singlePass=null),this._tsEnabled&&this._encoder&&(this._encoder.beginComputePass({timestampWrites:{querySet:this._tsQuerySet,beginningOfPassWriteIndex:1}}).end(),this._encoder.resolveQuerySet(this._tsQuerySet,0,2,this._tsResolveBuf,0),this._encoder.copyBufferToBuffer(this._tsResolveBuf,0,this._tsReadBuf,0,16)),this._encoder&&(this.device.queue.submit([this._encoder.finish()]),this._encoder=null)}async readTimestamp(){if(!this._tsEnabled)return null;await this._tsReadBuf.mapAsync(GPUMapMode.READ);const e=new BigInt64Array(this._tsReadBuf.getMappedRange().slice(0));this._tsReadBuf.unmap();const u=Number(e[1]-e[0])/1e6;return this._tsResults.push(u),u}copyBuffer(e,t,u,r=0,a=0){if(this._singlePass){this._singlePass.end(),this._encoder.copyBufferToBuffer(e,r,t,a,u),this._singlePass=this._encoder.beginComputePass();return}const i=this._encoder||this.device.createCommandEncoder();i.copyBufferToBuffer(e,r,t,a,u),this._encoder||this.device.queue.submit([i.finish()])}startRecording(){this._recording=[]}stopRecording(){const e=this._recording;return this._recording=null,e}replay(e,t){if(t)for(const r of t)this.device.queue.writeBuffer(r.buffer,r.offset,r.data,r.dataOffset,r.size);const u=this._encoder;for(let r=0;r<e.length;r++){const a=e[r];if(a.multi){const i=u.beginComputePass(),o=a.ops;for(let n=0;n<o.length;n++){const s=o[n];i.setPipeline(s.pipeline),i.setBindGroup(0,s.bindGroup),i.dispatchWorkgroups(s.wgX,s.wgY)}i.end()}else{const i=u.beginComputePass();i.setPipeline(a.pipeline),i.setBindGroup(0,a.bindGroup),i.dispatchWorkgroups(a.wgX,a.wgY),i.end()}}}dispatch(e,t,u,r=1,a=1){if(this._recording&&this._recording.push({pipeline:e,bindGroup:t[0],wgX:u,wgY:r}),this._singlePass){const n=this._singlePass;this._passCount!==void 0&&this._passCount++,n.setPipeline(e);for(let s=0;s<t.length;s++)n.setBindGroup(s,t[s]);n.dispatchWorkgroups(u,r,a);return}const i=this._encoder||this.device.createCommandEncoder(),o=i.beginComputePass();this._passCount!==void 0&&this._passCount++,o.setPipeline(e);for(let n=0;n<t.length;n++)o.setBindGroup(n,t[n]);o.dispatchWorkgroups(u,r,a),o.end(),this._encoder||this.device.queue.submit([i.finish()])}dispatchMulti(e){if(this._recording&&this._recording.push({multi:!0,ops:e.map(r=>({pipeline:r.pipeline,bindGroup:r.bindGroups[0],wgX:r.workgroupsX,wgY:r.workgroupsY||1}))}),this._singlePass){this._passCount!==void 0&&this._passCount++;for(const r of e){this._singlePass.setPipeline(r.pipeline);for(let a=0;a<r.bindGroups.length;a++)this._singlePass.setBindGroup(a,r.bindGroups[a]);this._singlePass.dispatchWorkgroups(r.workgroupsX,r.workgroupsY||1,r.workgroupsZ||1)}return}const t=this._encoder||this.device.createCommandEncoder(),u=t.beginComputePass();this._passCount!==void 0&&this._passCount++;for(const r of e){u.setPipeline(r.pipeline);for(let a=0;a<r.bindGroups.length;a++)u.setBindGroup(a,r.bindGroups[a]);u.dispatchWorkgroups(r.workgroupsX,r.workgroupsY||1,r.workgroupsZ||1)}u.end(),this._encoder||this.device.queue.submit([t.finish()])}async readBuffer(e,t){const u=this.createReadbackBuffer("_readback",t),r=this.device.createCommandEncoder();r.copyBufferToBuffer(e,0,u,0,t),this.device.queue.submit([r.finish()]),await u.mapAsync(GPUMapMode.READ);const a=new Float32Array(u.getMappedRange().slice(0));return u.unmap(),u.destroy(),a}createBindGroup(e,t,u){return this.device.createBindGroup({layout:e.getBindGroupLayout(t),entries:u.map((r,a)=>({binding:a,resource:{buffer:r}}))})}createBindGroupWithOffsets(e,t,u){return this.device.createBindGroup({layout:e.getBindGroupLayout(t),entries:u.map((r,a)=>({binding:a,resource:r.buffer?{buffer:r.buffer,offset:r.offset||0,size:r.size}:{buffer:r}}))})}getCachedBindGroup(e,t,u,r){let a=this.bindGroupCache.get(e);return a||(a=this.createBindGroup(t,u,r),this.bindGroupCache.set(e,a)),a}destroy(){for(const e of this.bufferCache.values())e.destroy();this.bufferCache.clear(),this.bindGroupCache.clear(),this.device?.destroy()}}const Tr=Object.freeze(Object.defineProperty({__proto__:null,GPUContext:b},Symbol.toStringTag,{value:"Module"})),v="modulepreload",m=function(d){return"/"+d},f={},Hr=function(e,t,u){let r=Promise.resolve();if(t&&t.length>0){let n=function(s){return Promise.all(s.map(l=>Promise.resolve(l).then(p=>({status:"fulfilled",value:p}),p=>({status:"rejected",reason:p}))))};document.getElementsByTagName("link");const i=document.querySelector("meta[property=csp-nonce]"),o=i?.nonce||i?.getAttribute("nonce");r=n(t.map(s=>{if(s=m(s),s in f)return;f[s]=!0;const l=s.endsWith(".css"),p=l?'[rel="stylesheet"]':"";if(document.querySelector(`link[href="${s}"]${p}`))return;const g=document.createElement("link");if(g.rel=l?"stylesheet":v,l||(g.as="script"),g.crossOrigin="",g.href=s,o&&g.setAttribute("nonce",o),document.head.appendChild(g),l)return new Promise((c,w)=>{g.addEventListener("load",c),g.addEventListener("error",()=>w(new Error(`Unable to preload CSS for ${s}`)))})}))}function a(i){const o=new Event("vite:preloadError",{cancelable:!0});if(o.payload=i,window.dispatchEvent(o),!o.defaultPrevented)throw i}return r.then(i=>{for(const o of i||[])o.status==="rejected"&&a(o.reason);return e().catch(a)})},k=`
2
  struct Params { K: u32, N: u32, group_size: u32, }
3
 
4
  @group(0) @binding(0) var<storage, read> input: array<f32>;
 
49
  }
50
 
51
  output[col] = sum;
52
+ }`,h=`
53
  enable f16;
54
 
55
  struct Params { K: u32, N: u32, group_size: u32, }
 
103
  }
104
 
105
  output[col] = sum;
106
+ }`,x=`
107
  struct Params { K: u32, N: u32, group_size: u32, num_splits: u32, }
108
 
109
  @group(0) @binding(0) var<storage, read> input: array<f32>;
 
160
  }
161
 
162
  partials[split_id * N + col] = sum;
163
+ }`,q=`
164
  struct Params { N: u32, num_splits: u32, }
165
 
166
  @group(0) @binding(0) var<storage, read> partials: array<f32>;
 
176
  sum += partials[s * params.N + col];
177
  }
178
  output[col] = sum;
179
+ }`,y=`
180
  struct Params { K: u32, N: u32, }
181
 
182
  @group(0) @binding(0) var<storage, read> input: array<f32>;
 
203
  sum += input[k] * w0 + input[k + 1u] * w1 + input[k + 2u] * w2 + input[k + 3u] * w3;
204
  }
205
  output[col] = sum;
206
+ }`,F=`
207
  struct Params { N: u32, eps: f32, }
208
 
209
  @group(0) @binding(0) var<storage, read> input: array<f32>;
 
244
  let w = unpack_bf16(weight[i / 2u], i % 2u);
245
  output[i] = input[i] * rms * (1.0 + w);
246
  }
247
+ }`,N=`
248
  struct Params { N: u32, }
249
 
250
  @group(0) @binding(0) var<storage, read> gate: array<f32>;
 
258
  if (i >= params.N) { return; }
259
  let x = gate[i];
260
  output[i] = (x / (1.0 + exp(-x))) * up[i];
261
+ }`,P=`
262
  struct Params { N: u32, }
263
 
264
  @group(0) @binding(0) var<storage, read_write> a: array<f32>;
 
270
  let i = gid.x;
271
  if (i >= params.N) { return; }
272
  a[i] = a[i] + b[i];
273
+ }`,S=`
274
  struct Params { N: u32, num_heads: u32, head_dim: u32, }
275
 
276
  @group(0) @binding(0) var<storage, read> src: array<f32>;
 
287
  let d = i % hd;
288
  dst_a[i] = src[head * hd * 2u + d];
289
  dst_b[i] = src[head * hd * 2u + hd + d];
290
+ }`,E=`
291
  struct Params { N: u32, }
292
 
293
  @group(0) @binding(0) var<storage, read> x: array<f32>;
 
300
  let i = gid.x;
301
  if (i >= params.N) { return; }
302
  output[i] = x[i] / (1.0 + exp(-gate[i]));
303
+ }`,R=`
304
  struct Params { token_id: u32, dim: u32, }
305
 
306
  @group(0) @binding(0) var<storage, read> embeddings: array<u32>;
 
318
  if (i >= params.dim) { return; }
319
  let flat = params.token_id * params.dim + i;
320
  output[i] = unpack_bf16(embeddings[flat / 2u], flat % 2u);
321
+ }`,K=`
322
  struct ArgmaxResult { idx: u32, val: f32, }
323
  struct Params { dim: u32, }
324
 
 
338
  if (i >= params.dim) { return; }
339
  let flat = argmax_result.idx * params.dim + i;
340
  output[i] = unpack_bf16(embeddings[flat / 2u], flat % 2u);
341
+ }`,A=`
342
  struct Params { N: u32, }
343
  struct Result { idx: u32, val: f32, }
344
 
 
381
  result.idx = s_idx[0];
382
  result.val = s_val[0];
383
  }
384
+ }`,B=`
385
  struct Params { N: u32, }
386
 
387
  @group(0) @binding(0) var<storage, read> logits: array<f32>;
 
406
  // Output (idx, val) pair — 256 candidates total
407
  result[tid * 2u] = best_idx;
408
  result[tid * 2u + 1u] = bitcast<u32>(best_val);
409
+ }`,D=`
410
  struct ArgmaxResult { idx: u32, val: f32, }
411
  struct Params { recent_count: u32, history_slot: u32, }
412
 
 
420
  let tok = argmax_result.idx;
421
  recent_tokens[params.recent_count] = tok;
422
  token_history[params.history_slot] = tok;
423
+ }`,z=`
424
  struct Params { position: u32, num_kv_heads: u32, head_dim: u32, }
425
 
426
  @group(0) @binding(0) var<storage, read> k_proj: array<f32>;
 
437
  let offset = params.position * total + i;
438
  k_cache[offset] = k_proj[i];
439
  v_cache[offset] = v_proj[i];
440
+ }`,T=`
441
  struct Params {
442
  seq_len: u32,
443
  head_dim: u32,
 
531
  output[base + hd + 1u] = running_max;
532
  }
533
  }
534
+ }`,H=`
535
  struct Params {
536
  head_dim: u32,
537
  num_splits: u32,
 
583
  }
584
 
585
  output[h * hd + tid] = acc / ws;
586
+ }`,M=`
587
  struct Params {
588
  num_heads: u32,
589
  key_dim: u32,
 
669
  }
670
  output[h * vd + vi] = o_val;
671
  }
672
+ }`,G=`
673
  struct Params { num_heads: u32, head_dim: u32, eps: f32, }
674
 
675
  @group(0) @binding(0) var<storage, read_write> x: array<f32>;
 
749
  let w = unpack_bf16(weight[i / 2u], i % 2u);
750
  x[off + i] = x[off + i] * rms * w;
751
  }
752
+ }`,O=`
753
  struct Params { channels: u32, }
754
 
755
  @group(0) @binding(0) var<storage, read_write> x: array<f32>;
 
785
  hist[c] = h1;
786
  hist[ch + c] = h2;
787
  hist[2u * ch + c] = cur;
788
+ }`,I=`
789
  struct Params {
790
  num_heads: u32,
791
  key_dim: u32,
 
1002
  }
1003
  workgroupBarrier();
1004
  }
1005
+ }`,C=`
1006
  struct Params { K: u32, N: u32, group_size: u32, }
1007
 
1008
  @group(0) @binding(0) var<storage, read> input: array<f32>;
 
1056
  }
1057
 
1058
  output[col] = (gate_sum / (1.0 + exp(-gate_sum))) * up_sum;
1059
+ }`,L=`
1060
  enable f16;
1061
 
1062
  struct Params { K: u32, N: u32, group_size: u32, }
 
1113
  }
1114
 
1115
  output[col] = (gate_sum / (1.0 + exp(-gate_sum))) * up_sum;
1116
+ }`,Q=`
1117
  struct Params { K: u32, N: u32, group_size: u32, }
1118
 
1119
  @group(0) @binding(0) var<storage, read> input: array<f32>;
 
1185
  let u = partial[32u+tid] + partial[32u+tid+1u] + partial[32u+tid+2u] + partial[32u+tid+3u];
1186
  output[col] = (g / (1.0 + exp(-g))) * u;
1187
  }
1188
+ }`,j=`
1189
  enable f16;
1190
 
1191
  struct Params { K: u32, N: u32, group_size: u32, }
 
1260
  let u = partial[32u+tid] + partial[32u+tid+1u] + partial[32u+tid+2u] + partial[32u+tid+3u];
1261
  output[col] = (g / (1.0 + exp(-g))) * u;
1262
  }
1263
+ }`,V=`
1264
  struct Params { N: u32, eps: f32, }
1265
 
1266
  @group(0) @binding(0) var<storage, read_write> hidden: array<f32>;
 
1304
  let w = unpack_bf16(weight[i / 2u], i % 2u);
1305
  output[i] = hidden[i] * rms * (1.0 + w);
1306
  }
1307
+ }`,W=`
1308
  struct Params { N: u32, eps: f32, }
1309
 
1310
  @group(0) @binding(0) var<storage, read> hidden: array<f32>;
 
1346
  let w = unpack_bf16(weight[i / 2u], i % 2u);
1347
  output[i] = wg_vals[i] * rms * (1.0 + w);
1348
  }
1349
+ }`,Y=`
1350
  struct Params { N: u32, eps: f32, }
1351
 
1352
  @group(0) @binding(0) var<storage, read> input_a: array<f32>;
 
1391
  let w = unpack_bf16(weight[i / 2u], i % 2u);
1392
  normed[i] = wg_vals[i] * rms * (1.0 + w);
1393
  }
1394
+ }`,X=`
1395
  struct Params { K: u32, N: u32, group_size: u32, }
1396
 
1397
  @group(0) @binding(0) var<storage, read> x: array<f32>;
 
1437
  }
1438
 
1439
  output[col] = sum;
1440
+ }`,$=`
1441
  enable f16;
1442
 
1443
  struct Params { K: u32, N: u32, group_size: u32, }
 
1486
  }
1487
 
1488
  output[col] = sum;
1489
+ }`,Z=`
1490
  struct Params { K: u32, N: u32, group_size: u32, }
1491
 
1492
  @group(0) @binding(0) var<storage, read> a: array<f32>;
 
1532
  }
1533
 
1534
  output[col] = sum;
1535
+ }`,J=`
1536
  enable f16;
1537
 
1538
  struct Params { K: u32, N: u32, group_size: u32, }
 
1581
  }
1582
 
1583
  output[col] = sum;
1584
+ }`,rr=`
1585
  struct Params { K: u32, N: u32, group_size: u32, }
1586
 
1587
  @group(0) @binding(0) var<storage, read> a: array<f32>;
 
1635
  if (lane == 0u && col < N) {
1636
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
1637
  }
1638
+ }`,ar=`
1639
  enable f16;
1640
 
1641
  struct Params { K: u32, N: u32, group_size: u32, }
 
1692
  if (lane == 0u && col < N) {
1693
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
1694
  }
1695
+ }`,er=`
1696
  struct Params { K: u32, N: u32, group_size: u32, }
1697
 
1698
  @group(0) @binding(0) var<storage, read> x: array<f32>;
 
1746
  if (lane == 0u && col < N) {
1747
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
1748
  }
1749
+ }`,ur=`
1750
  enable f16;
1751
 
1752
  struct Params { K: u32, N: u32, group_size: u32, }
 
1803
  if (lane == 0u && col < N) {
1804
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
1805
  }
1806
+ }`,tr=`
1807
  struct Params {
1808
  K: u32, // hidden_size
1809
  N: u32, // intermediate_size
 
1914
  }
1915
 
1916
  output[col] = (gate_sum / (1.0 + exp(-gate_sum))) * up_sum;
1917
+ }`,ir=`
1918
  struct Params {
1919
  K: u32,
1920
  N: u32,
 
2002
  }
2003
 
2004
  output[col] = sum;
2005
+ }`,sr=`
2006
  struct Params {
2007
  K: u32,
2008
  N: u32,
 
2087
  }
2088
 
2089
  output[col] = sum;
2090
+ }`,or=`
2091
  struct Params {
2092
  K: u32, N: u32, group_size: u32, eps: f32,
2093
  norm_weight: array<vec4<u32>, 640>,
 
2169
  }
2170
  }
2171
  output[col] = sum;
2172
+ }`,nr=`
2173
  struct Params {
2174
  K: u32, N: u32, group_size: u32, eps: f32,
2175
  norm_weight: array<vec4<u32>, 640>,
 
2244
  }
2245
  }
2246
  output[col] = sum;
2247
+ }`,gr=`
2248
  struct Params { num_tokens: u32, penalty: f32, presence: f32, _pad: u32, }
2249
 
2250
  @group(0) @binding(0) var<storage, read_write> logits: array<f32>;
 
2268
  // Presence penalty (additive)
2269
  val -= params.presence;
2270
  logits[tok] = val;
2271
+ }`,dr=`
2272
  struct Params {
2273
  temperature: f32,
2274
  top_k: u32,
 
2404
  result.idx = wg_idx[selected];
2405
  result.val = wg_val[selected];
2406
  }
2407
+ }`;function _(d=320,e=1e7,t=33,u=30,r=128){return`
2408
+ const ROPE_THETA: f32 = ${e};
2409
  const MROPE_S1_LIMIT: u32 = ${t}u;
2410
  const MROPE_S2_LIMIT: u32 = ${u}u;
2411
  const PARTIAL_DIM: u32 = ${r}u;
 
2421
  position_w: u32, // mRoPE width dimension (= position for text tokens)
2422
  // Packed BF16 norm weights: [Q norm weights | K norm weights]
2423
  // Q: numHeads*headDim BF16 values, K: numKVHeads*headDim BF16 values
2424
+ qk_norm_weight: array<vec4<u32>, ${d}>,
2425
  }
2426
 
2427
  @group(0) @binding(0) var<storage, read> q_proj_full: array<f32>;
 
2562
  v_cache[cache_off + i] = v_proj[off + i];
2563
  }
2564
  }
2565
+ }`}const lr=_(320),pr=`
2566
  struct Params { K: u32, N: u32, group_size: u32, }
2567
 
2568
  @group(0) @binding(0) var<storage, read> input: array<f32>;
 
2630
  if (lane == 0u && col < N) {
2631
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
2632
  }
2633
+ }`,fr=`
2634
  enable f16;
2635
 
2636
  struct Params { K: u32, N: u32, group_size: u32, }
 
2698
  if (lane == 0u && col < N) {
2699
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
2700
  }
2701
+ }`,_r=`
2702
  enable f16;
2703
  enable subgroups;
2704
 
 
2774
  if (lane == 0u && col < N) {
2775
  partials[split_id * N + col] = s;
2776
  }
2777
+ }`,cr=`
2778
  struct Params { H_half: u32, num_tokens: u32, }
2779
  @group(0) @binding(0) var<storage, read> src: array<u32>;
2780
  @group(0) @binding(1) var<storage, read> indices: array<u32>;
 
2789
  let src_row = indices[row_out];
2790
  dst[row_out * p.H_half + col] = src[src_row * p.H_half + col];
2791
  }
2792
+ `,wr=`
2793
  struct Params { K: u32, N: u32, group_size: u32, }
2794
 
2795
  @group(0) @binding(0) var<storage, read> bf16_weight: array<u32>;
 
2853
  qweight[packed_row * N + col] = packed;
2854
  }
2855
  }
2856
+ }`,br=`
2857
  struct Params { count: u32, }
2858
 
2859
  @group(0) @binding(0) var<storage, read> input: array<f32>;
 
2867
  let a = input[idx * 2u];
2868
  let b = input[idx * 2u + 1u];
2869
  output[idx] = pack2x16float(vec2<f32>(a, b));
2870
+ }`,vr=`
2871
  struct Params { N: u32, eps: f32, }
2872
 
2873
  @group(0) @binding(0) var<storage, read_write> hidden: array<f32>;
 
2909
  let w = unpack_bf16(weight[i / 2u], i % 2u);
2910
  output[off + i] = hidden[off + i] * rms * (1.0 + w);
2911
  }
2912
+ }`,mr=`
2913
  struct Params { N: u32, eps: f32, }
2914
 
2915
  @group(0) @binding(0) var<storage, read> hidden: array<f32>;
 
2952
  let w = unpack_bf16(weight[i / 2u], i % 2u);
2953
  output[off + i] = wg_vals[i] * rms * (1.0 + w);
2954
  }
2955
+ }`,kr=`
2956
  struct Params { N: u32, eps: f32, }
2957
 
2958
  @group(0) @binding(0) var<storage, read> input_a: array<f32>;
 
2998
  let w = unpack_bf16(weight[i / 2u], i % 2u);
2999
  normed[off + i] = wg_vals[i] * rms * (1.0 + w);
3000
  }
3001
+ }`,hr=`
3002
  enable f16;
3003
 
3004
  struct Params { K: u32, N: u32, group_size: u32, }
 
3060
 
3061
  output[col] = (gate0 / (1.0 + exp(-gate0))) * up0;
3062
  output[N + col] = (gate1 / (1.0 + exp(-gate1))) * up1;
3063
+ }`,xr=`
3064
  enable f16;
3065
 
3066
  struct Params { K: u32, N: u32, group_size: u32, }
 
3113
 
3114
  output[col] = sum0;
3115
  output[N + col] = sum1;
3116
+ }`,qr=`
3117
  enable f16;
3118
 
3119
  struct Params { K: u32, N: u32, group_size: u32, }
 
3166
 
3167
  output[col] = sum0;
3168
  output[N + col] = sum1;
3169
+ }`,yr=`
3170
  struct Params { K: u32, N: u32, group_size: u32, }
3171
 
3172
  @group(0) @binding(0) var<storage, read> input: array<f32>;
 
3221
 
3222
  output[col] = sum0;
3223
  output[N + col] = sum1;
3224
+ }`,Fr=`
3225
  enable f16;
3226
 
3227
  struct Params { K: u32, N: u32, group_size: u32, }
 
3279
 
3280
  output[col] = sum0;
3281
  output[N + col] = sum1;
3282
+ }`,Nr=`
3283
  enable f16;
3284
 
3285
  struct Params { K: u32, N: u32, group_size: u32, }
 
3354
  output[col] = partial0[tid] + partial0[tid+1u] + partial0[tid+2u] + partial0[tid+3u];
3355
  output[N + col] = partial1[tid] + partial1[tid+1u] + partial1[tid+2u] + partial1[tid+3u];
3356
  }
3357
+ }`,Pr=`
3358
  struct Params { N: u32, eps: f32, }
3359
 
3360
  @group(0) @binding(0) var<storage, read> input: array<f32>;
 
3411
  let b = unpack_bf16(bias[i >> 1u], i);
3412
  output[base + i] = normalized * w + b;
3413
  }
3414
+ }`,Sr=`
3415
  struct Params { K: u32, N: u32, }
3416
 
3417
  @group(0) @binding(0) var<storage, read> input: array<f32>;
 
3447
  let bp = bias[col >> 1u];
3448
  let b = bitcast<f32>(select((bp & 0xFFFFu) << 16u, bp & 0xFFFF0000u, (col & 1u) == 1u));
3449
  output[token * N + col] = sum + b;
3450
+ }`,Er=`
3451
  @group(0) @binding(0) var<storage, read_write> data: array<f32>;
3452
  @group(0) @binding(1) var<uniform> len: u32;
3453
 
 
3459
  let i = gid.x;
3460
  if (i >= len) { return; }
3461
  let x = data[i];
3462
+ // For |x| > 10, GELU saturates: positive → x, negative → 0
3463
+ // Avoids intermediate overflow in x^3 for large values
3464
+ if (x > 10.0) { return; } // data[i] = x (already in place)
3465
+ if (x < -10.0) { data[i] = 0.0; return; }
3466
  let inner = SQRT_2_OVER_PI * (x + COEFF * x * x * x);
3467
  data[i] = 0.5 * x * (1.0 + tanh(inner));
3468
+ }`,Rr=`
3469
  @group(0) @binding(0) var<storage, read_write> data: array<f32>;
3470
  @group(0) @binding(1) var<uniform> len: u32;
3471
 
 
3477
  let i = gid.x;
3478
  if (i >= len) { return; }
3479
  let x = data[i];
3480
+ if (x > 10.0) { return; }
3481
+ if (x < -10.0) { data[i] = 0.0; return; }
3482
  let inner = SQRT_2_OVER_PI * (x + COEFF * x * x * x);
3483
  data[i] = 0.5 * x * (1.0 + tanh(inner));
3484
+ }`,Kr=`
3485
  struct Params { seq_len: u32, num_heads: u32, head_dim: u32, }
3486
 
3487
  @group(0) @binding(0) var<storage, read> q_in: array<f32>;
 
3515
 
3516
  q_out[idx] = q_in[idx] * c + sign * q_in[partner_idx] * s;
3517
  k_out[idx] = k_in[idx] * c + sign * k_in[partner_idx] * s;
3518
+ }`,Ar=`
3519
  struct Params { seq_len: u32, num_heads: u32, head_dim: u32, scale: f32, }
3520
 
3521
  @group(0) @binding(0) var<storage, read> q: array<f32>;
 
3597
  output[out_base + d] = acc[d] * inv_sum;
3598
  }
3599
  }
3600
+ }`,Br=`
3601
  @group(0) @binding(0) var<storage, read_write> a: array<f32>;
3602
  @group(0) @binding(1) var<storage, read> b: array<f32>;
3603
  @group(0) @binding(2) var<uniform> len: u32;
 
3607
  let i = gid.x;
3608
  if (i >= len) { return; }
3609
  a[i] += b[i];
3610
+ }`,Dr=`
3611
  struct Params { H: u32, }
3612
 
3613
  @group(0) @binding(0) var<storage, read> vision: array<f32>;
 
3625
  for (var i = tid; i < H; i += 256u) {
3626
  embeds[pos * H + i] = vision[vit_idx * H + i];
3627
  }
3628
+ }`,zr={gptq_matvec:k,gptq_matvec_f16:h,gptq_matvec_4t:pr,gptq_matvec_4t_f16:fr,gptq_splitk:x,reduce_splitk:q,bf16_matvec:y,rmsnorm:F,silu_mul:N,add:P,embedding:R,embed_from_argmax:K,argmax:A,topk_extract:B,kv_cache_store:z,gqa_attention_head:T,gqa_reduce:H,deltanet_recurrent:M,head_rmsnorm:G,head_rmsnorm_nogated:U,causal_conv1d:O,split:S,sigmoid_mul:E,fused_gate_up_silu:C,fused_gate_up_silu_f16:L,fused_gate_up_silu_4t:Q,fused_gate_up_silu_4t_f16:j,add_rmsnorm:V,add_rmsnorm_ro:W,three_way_add_rmsnorm:Y,norm_gptq_lite:or,norm_gptq_lite_noadd:nr,fused_sigmoid_gptq:X,fused_sigmoid_gptq_f16:$,fused_sigmoid_gptq_4t:er,fused_sigmoid_gptq_4t_f16:ur,fused_silu_gptq:Z,fused_silu_gptq_f16:J,fused_silu_gptq_4t:rr,fused_silu_gptq_4t_f16:ar,fused_addnorm_gate_up_silu:tr,rep_penalty:gr,gpu_sample:dr,append_token:D,fused_norm_gptq:ir,fused_norm_gptq_noadd:sr,fused_conv_deltanet_norm:I,fused_split_qknorm_kvstore:lr,gptq_matvec_4t_f16_sk:_r,gather_rows_bf16:cr,quantize_bf16_to_int4:wr,pack_f32_to_f16_pairs:br,gptq_matmul_b2:yr,gptq_matmul_b2_f16:Fr,gptq_matmul_b2_4t_f16:Nr,add_rmsnorm_b2:vr,add_rmsnorm_ro_b2:mr,three_way_add_rmsnorm_b2:kr,fused_gate_up_silu_b2_f16:hr,fused_silu_gptq_b2_f16:xr,fused_sigmoid_gptq_b2_f16:qr,vit_layernorm:Pr,vit_bf16_matvec_bias:Sr,vit_gelu_tanh:Er,vit_gelu:Rr,vit_rope:Kr,vit_attention:Ar,vit_add:Br,vit_scatter_embed:Dr},Mr=Object.freeze(Object.defineProperty({__proto__:null,SHADERS:zr,SHADER_FUSED_SPLIT_QKNORM_KVSTORE_FN:_},Symbol.toStringTag,{value:"Module"}));export{b as G,zr as S,Hr as _,_ as a,Mr as b,Tr as g};
assets/{main-CKQMLD5b.js → main-Y3tn35iX.js} RENAMED
@@ -1,4 +1,4 @@
1
- import{G as Ed}from"./gpu-ops-DKsrMEcC.js";import{Qwen35Model as Pd}from"./qwen35-model-BwnUri7A.js";import{loadConfig as Sd,loadQuantConfig as Cd,loadModelWeights as Fd}from"./safetensors-loader-CwGm5mJX.js";class Un{}let ti=class{static create(){throw new Error("ONNX not available")}};class si{}const ni={},Ad={Tensor:Un,InferenceSession:ti,OrtEnv:si,env:ni},Ld=Object.freeze(Object.defineProperty({__proto__:null,InferenceSession:ti,OrtEnv:si,Tensor:Un,default:Ad,env:ni},Symbol.toStringTag,{value:"Module"}));var bn={},Id=Object.defineProperty,Wt=(e,t)=>{for(var s in t)Id(e,s,{get:t[s],enumerable:!0})},Le={},Ze={},Od={},Nd="4.0.0-next.6",Wn=typeof self<"u",Vt=!ii(Le),ri=!ii(Ze),$s=Wn&&"caches"in self,zd=typeof globalThis.Deno<"u",Zs=zd&&$s&&!Vt,ai=typeof process<"u",oi=ai&&process?.release?.name==="node"&&!Zs,Hn=typeof window<"u"&&typeof window.document<"u",Qn=Wn&&["DedicatedWorkerGlobalScope","ServiceWorkerGlobalScope","SharedWorkerGlobalScope"].includes(self.constructor?.name),Dd=Hn||Qn||Zs,Bd=oi||typeof navigator<"u"&&"gpu"in navigator,Vd=typeof navigator<"u"&&"ml"in navigator,Gd=typeof crypto<"u"&&typeof crypto.getRandomValues=="function",$d=typeof chrome<"u"&&typeof chrome.runtime<"u"&&typeof chrome.runtime.id=="string",Rd=typeof ServiceWorkerGlobalScope<"u"&&Wn&&self instanceof ServiceWorkerGlobalScope,jd=()=>{if(typeof navigator>"u")return!1;const e=navigator.userAgent,s=(navigator.vendor||"").indexOf("Apple")>-1,n=!e.match(/CriOS|FxiOS|EdgiOS|OPiOS|mercury|brave/i)&&!e.includes("Chrome")&&!e.includes("Android");return s&&n},qd=jd(),K=Object.freeze({IS_BROWSER_ENV:Hn,IS_WEBWORKER_ENV:Qn,IS_WEB_ENV:Dd,IS_SERVICE_WORKER_ENV:Rd,IS_DENO_WEB_RUNTIME:Zs,IS_WEB_CACHE_AVAILABLE:$s,IS_WEBGPU_AVAILABLE:Bd,IS_WEBNN_AVAILABLE:Vd,IS_SAFARI:qd,IS_PROCESS_AVAILABLE:ai,IS_NODE_ENV:oi,IS_FS_AVAILABLE:Vt,IS_PATH_AVAILABLE:ri,IS_CRYPTO_AVAILABLE:Gd,IS_CHROME_AVAILABLE:$d}),Xn=Vt&&ri,Rs="./";if(Xn){const e=Object(import.meta).url;e?Rs=Ze.dirname(Ze.dirname(Od.fileURLToPath(e))):typeof __dirname<"u"&&(Rs=Ze.dirname(__dirname))}var Ud=Xn?Ze.join(Rs,"/.cache/"):null,lo="/models/",Wd=Xn?Ze.join(Rs,lo):lo,Hd=typeof globalThis.fetch=="function"?globalThis.fetch.bind(globalThis):void 0,$e=Object.freeze({DEBUG:10,INFO:20,WARNING:30,ERROR:40,NONE:50}),co=$e.WARNING,ee={version:Nd,backends:{onnx:{}},get logLevel(){return co},set logLevel(e){co=e,ee.backends.onnx?.setLogLevel?.(e)},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!(Hn||Qn||Zs),localModelPath:Wd,useFS:Vt,useBrowserCache:$s,useFSCache:Vt,cacheDir:Ud,useCustomCache:!1,customCache:null,useWasmCache:$s||Vt,cacheKey:"transformers-cache",fetch:Hd};function ii(e){return Object.keys(e).length===0}function Dt(e,t){e&&e(t)}function Qd(e){return Number.isInteger(e)||typeof e=="bigint"}function uo(e){return e==null||e===-1}function _o(e){const t=[];let s=e;for(;Array.isArray(s);)t.push(s.length),s=s[0];return t}function et(...e){return Array.prototype.concat.apply([],e)}function js(e,t){return Math.abs((e+t)%(2*t)-t)}function Se(e,t){return Object.assign({},...t.map(s=>{if(e[s]!==void 0)return{[s]:e[s]}}))}function Xd(e,t){let s=0;for(const n of e)n===t&&++s;return s}var J={error(...e){ee.logLevel<=$e.ERROR&&console.error(...e)},warn(...e){ee.logLevel<=$e.WARNING&&console.warn(...e)},info(...e){ee.logLevel<=$e.INFO&&console.log(...e)},debug(...e){ee.logLevel<=$e.DEBUG&&console.log(...e)},log(...e){this.info(...e)}},Kd=class{constructor(e){this.trie=this._build_trie(e)}_build_trie(e){const t=Object.create(null);for(const s of e){let n=t;for(let r=0;r<s.length;++r){const a=s[r];n=n[a]??=Object.create(null)}n.end=s}return t}split(e){const t=[],s=e.length;let n=0,r=0;for(;r<s;){let a=this.trie,o=null,i=r;for(;i<s&&(a=a[e[i]]);)a.end&&(o=a.end),++i;o?(r>n&&t.push(e.slice(n,r)),t.push(o),r+=o.length,n=r):++r}return n<s&&t.push(e.slice(n)),t}},ho=Kd,Jd=class{constructor(e){this.content=e.content,this.id=e.id,this.single_word=e.single_word??!1,this.lstrip=e.lstrip??!1,this.rstrip=e.rstrip??!1,this.special=e.special??!1,this.normalized=e.normalized??!this.special}},Yd=Jd,li=(()=>{const e=[...Array.from({length:94},(r,a)=>a+33),...Array.from({length:12},(r,a)=>a+161),...Array.from({length:82},(r,a)=>a+174)],t=e.slice();let s=0;for(let r=0;r<256;++r)e.includes(r)||(e.push(r),t.push(256+s),s+=1);const n=t.map(r=>String.fromCharCode(r));return Object.fromEntries(e.map((r,a)=>[r,n[a]]))})(),Zd=e=>Object.fromEntries(Object.entries(e).map(([t,s])=>[s,t])),eu=Zd(li),po=".,!?…。,、।۔،",tu=new Map([["(?i:'s|'t|'re|'ve|'m|'ll|'d)","(?:'([sS]|[tT]|[rR][eE]|[vV][eE]|[mM]|[lL][lL]|[dD]))"],["(?i:[sdmt]|ll|ve|re)","(?:[sS]|[dD]|[mM]|[tT]|[lL][lL]|[vV][eE]|[rR][eE])"],["[^\\r\\n\\p{L}\\p{N}]?+","[^\\r\\n\\p{L}\\p{N}]?"],["[^\\s\\p{L}\\p{N}]++","[^\\s\\p{L}\\p{N}]+"],["(?>\\p{Nd}{510})","(?:\\p{Nd}{510})"],["\\p{Nd}{3}+","(?:\\p{Nd}{3})+"],["\\G",""],[` ?[^(\\s|[${po}])]+`,` ?[^\\s${po}]+`]]),qs="\\p{P}\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E",Kn=e=>e.replace(/ \./g,".").replace(/ \?/g,"?").replace(/ \!/g,"!").replace(/ ,/g,",").replace(/ \' /g,"'").replace(/ n't/g,"n't").replace(/ 'm/g,"'m").replace(/ 's/g,"'s").replace(/ 've/g,"'ve").replace(/ 're/g,"'re"),en=(e,t=!0)=>{if(e.Regex!==void 0){let s=e.Regex.replace(/\\([#&~])/g,"$1");s=s.replace(/\\A/g,"^").replace(/\\z/g,"$").replace(/\\Z/g,"(?=\\r?\\n?$)");for(const[n,r]of tu)s=s.replaceAll(n,r);try{return new RegExp(s,"gu")}catch(n){if(!(n instanceof SyntaxError)||!n.message.toLowerCase().includes("invalid property name"))throw n;let r=!1;const a=s.replace(/(\\[pP])\{([^}=]+)\}/g,(o,i,l)=>{try{return new RegExp(`\\p{${l}}`,"u"),`${i}{${l}}`}catch{return r=!0,`${i}{Script=${l}}`}});if(!r)throw n;try{return new RegExp(a,"gu")}catch{throw n}}}else if(e.String!==void 0){const s=su(e.String);return new RegExp(t?s:`(${s})`,"gu")}else return console.warn("Unknown pattern type:",e),null},su=e=>e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&"),nu=(e,t,s)=>{const n=[];let r=0;for(;r<e.length;){if(n.push(e[r]),(t.get(e[r])??s)!==s){++r;continue}for(;++r<e.length&&(t.get(e[r])??s)===s;)t.get(n.at(-1))!==s&&(n[n.length-1]+=e[r])}return n},ru=e=>e>=19968&&e<=40959||e>=13312&&e<=19903||e>=131072&&e<=173791||e>=173824&&e<=177983||e>=177984&&e<=178207||e>=178208&&e<=183983||e>=63744&&e<=64255||e>=194560&&e<=195103,au=e=>Number.isInteger(e)||typeof e=="bigint",ou=e=>{let t=0;for(const s of e)++t;return t},iu=e=>ci(e.toLowerCase()),Re=(...e)=>Array.prototype.concat.apply([],e),Jn=e=>new Map(Object.entries(e)),lu=(e,t)=>{const s=[];let n=0;for(const r of e.matchAll(t)){const a=r[0];n<r.index&&s.push(e.slice(n,r.index)),a.length>0&&s.push(a),n=r.index+a.length}return n<e.length&&s.push(e.slice(n)),s},ci=e=>e.replace(new RegExp("\\p{M}","gu"),""),fo=(e,t,s=[])=>{if(!e||Array.isArray(e)||typeof e!="object")return`${t} must be a valid object`;for(const n of s)if(!(n in e))return`${t} must contain a "${n}" property`;return null},cu=e=>e.match(/\S+/g)||[],du=class{constructor(){const e=function(...t){return e._call(...t)};return Object.setPrototypeOf(e,new.target.prototype)}},ws=du,uu=class extends ws{constructor(e){super(),this.config=e}_call(e){return this.normalize(e)}},it=uu,_u=class extends it{tokenize_chinese_chars(e){const t=[];for(let s=0;s<e.length;++s){const n=e[s],r=n.charCodeAt(0);ru(r)?(t.push(" "),t.push(n),t.push(" ")):t.push(n)}return t.join("")}strip_accents(e){return e.normalize("NFD").replace(new RegExp("\\p{Mn}","gu"),"")}is_control(e){switch(e){case" ":case`
2
  `:case"\r":return!1;default:return new RegExp("^\\p{Cc}|\\p{Cf}|\\p{Co}|\\p{Cs}$","u").test(e)}}clean_text(e){const t=[];for(const s of e){const n=s.charCodeAt(0);n===0||n===65533||this.is_control(s)||(/^\s$/.test(s)?t.push(" "):t.push(s))}return t.join("")}normalize(e){return this.config.clean_text&&(e=this.clean_text(e)),this.config.handle_chinese_chars&&(e=this.tokenize_chinese_chars(e)),this.config.lowercase?(e=e.toLowerCase(),this.config.strip_accents!==!1&&(e=this.strip_accents(e))):this.config.strip_accents&&(e=this.strip_accents(e)),e}},hu=_u,pu=class extends it{constructor(e){super(e),this.charsmap=e.precompiled_charsmap??null}normalize(e){return e=e.replace(/[\u0001-\u0008\u000B\u000E-\u001F\u007F\u008F\u009F]/gm,""),e=e.replace(/[\u0009\u000A\u000C\u000D\u00A0\u1680\u2000-\u200F\u2028\u2029\u202F\u205F\u2581\u3000\uFEFF\uFFFD]/gm," "),e.includes("~")?e=e.split("~").map(s=>s.normalize("NFKC")).join("~"):e=e.normalize("NFKC"),e}},fu=pu,mu=class extends it{constructor(e){super(e),this.normalizers=(e.normalizers??[]).map(t=>di(t))}normalize(e){return this.normalizers.reduce((t,s)=>s?s.normalize(t):t,e)}},gu=mu,wu=class extends it{normalize(e){const t=en(this.config.pattern??{});return t===null?e:e.replaceAll(t,this.config.content??"")}},vu=wu,Mu=class extends it{constructor(){super(...arguments),this.form="NFC"}normalize(e){return e=e.normalize(this.form),e}},tn=Mu,xu=class extends tn{constructor(){super(...arguments),this.form="NFC"}},yu=xu,bu=class extends tn{constructor(){super(...arguments),this.form="NFD"}},ku=bu,Tu=class extends tn{constructor(){super(...arguments),this.form="NFKC"}},Eu=Tu,Pu=class extends tn{constructor(){super(...arguments),this.form="NFKD"}},Su=Pu,Cu=class extends it{normalize(e){return this.config.strip_left&&this.config.strip_right?e=e.trim():(this.config.strip_left&&(e=e.trimStart()),this.config.strip_right&&(e=e.trimEnd())),e}},Fu=Cu,Au=class extends it{normalize(e){return ci(e)}},Lu=Au,Iu=class extends it{normalize(e){return e.toLowerCase()}},Ou=Iu,Nu=class extends it{normalize(e){return e=this.config.prepend+e,e}},zu=Nu;function Du(e){if(e===null)return null;switch(e.type){case"BertNormalizer":return new hu(e);case"Precompiled":return new fu(e);case"Sequence":return new gu(e);case"Replace":return new vu(e);case"NFC":return new yu(e);case"NFD":return new ku(e);case"NFKC":return new Eu(e);case"NFKD":return new Su(e);case"Strip":return new Fu(e);case"StripAccents":return new Lu(e);case"Lowercase":return new Ou(e);case"Prepend":return new zu(e);default:throw new Error(`Unknown Normalizer type: ${e.type}`)}}var di=Du,Bu=class extends ws{pre_tokenize(e,t){return(Array.isArray(e)?e.map(s=>this.pre_tokenize_text(s,t)):this.pre_tokenize_text(e,t)).flat()}_call(e,t){return this.pre_tokenize(e,t)}},qe=Bu,Vu=class extends qe{constructor(e){super(),this.config=e,this.add_prefix_space=this.config.add_prefix_space??!1,this.trim_offsets=this.config.trim_offsets??!1,this.use_regex=this.config.use_regex??!0,this.pattern=new RegExp("'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+","gu"),this.byte_encoder=li,this.text_encoder=new TextEncoder}pre_tokenize_text(e,t){return this.add_prefix_space&&!e.startsWith(" ")&&(e=" "+e),(this.use_regex?e.match(this.pattern)||[]:[e]).map(n=>Array.from(this.text_encoder.encode(n),r=>this.byte_encoder[r]).join(""))}},Gu=Vu,$u=class extends qe{pre_tokenize_text(e,t){return e.match(/\w+|[^\w\s]+/g)||[]}},Ru=$u,ju=class extends qe{constructor(e){super(),this.replacement=e.replacement??"▁",this.str_rep=e.str_rep||this.replacement,this.prepend_scheme=e.prepend_scheme??"always"}pre_tokenize_text(e,t){const{section_index:s=void 0}=t??{};let n=e.replaceAll(" ",this.str_rep);return!n.startsWith(this.replacement)&&(this.prepend_scheme==="always"||this.prepend_scheme==="first"&&s===0)&&(n=this.str_rep+n),[n]}},qu=ju,Uu=class extends qe{constructor(e){super(),this.config=e,this.pattern=en(this.config.pattern??{},this.config.invert??!0)}pre_tokenize_text(e){return this.pattern===null?[]:this.config.invert?e.match(this.pattern)||[]:this.config.behavior?.toLowerCase()==="removed"?e.split(this.pattern).filter(t=>t):lu(e,this.pattern)}},Wu=Uu,Hu=class extends qe{constructor(e){super(),this.config=e,this.pattern=new RegExp(`[^${qs}]+|[${qs}]+`,"gu")}pre_tokenize_text(e){return e.match(this.pattern)||[]}},Qu=Hu,Xu=class extends qe{constructor(e){super(),this.config=e;const t=`[^\\d]+|\\d${this.config.individual_digits?"":"+"}`;this.pattern=new RegExp(t,"gu")}pre_tokenize_text(e){return e.match(this.pattern)||[]}},Ku=Xu,Ju=class extends qe{constructor(){super(),this.pattern=new RegExp(`[^\\s${qs}]+|[${qs}]`,"gu")}pre_tokenize_text(e,t){return e.trim().match(this.pattern)||[]}},Yu=Ju,Zu=class extends qe{constructor(e){super(),this.config=e,this.pattern=en(this.config.pattern??{}),this.content=this.config.content??""}pre_tokenize_text(e){return this.pattern===null?[e]:[e.replaceAll(this.pattern,this.config.content??"")]}},e_=Zu,t_=class extends qe{constructor(e){super(),this.tokenizers=(e.pretokenizers??[]).map(t=>ui(t))}pre_tokenize_text(e,t){return this.tokenizers.reduce((s,n)=>n?n.pre_tokenize(s,t):s,[e])}},s_=t_,n_=class extends qe{pre_tokenize_text(e){return cu(e)}},r_=n_,a_=class extends qe{constructor(e){super(),this.config=e,this._length=e.length}pre_tokenize_text(e){const t=[];for(let s=0;s<e.length;s+=this._length)t.push(e.slice(s,s+this._length));return t}},o_=a_;function i_(e){if(e===null)return null;switch(e.type){case"BertPreTokenizer":return new Yu;case"Sequence":return new s_(e);case"Whitespace":return new Ru;case"WhitespaceSplit":return new r_;case"Metaspace":return new qu(e);case"ByteLevel":return new Gu(e);case"Split":return new Wu(e);case"Punctuation":return new Qu(e);case"Digits":return new Ku(e);case"Replace":return new e_(e);case"FixedLength":return new o_(e);default:throw new Error(`Unknown PreTokenizer type: ${e.type}`)}}var ui=i_,l_=class extends ws{constructor(e){super(),this.config=e,this.vocab=[],this.tokens_to_ids=new Map,this.unk_token_id=void 0,this.unk_token=void 0,this.end_of_word_suffix=void 0,this.fuse_unk=this.config.fuse_unk??!1}_call(e){let t=this.encode(e);return this.fuse_unk&&(t=nu(t,this.tokens_to_ids,this.unk_token_id)),t}},sn=l_,c_=class extends sn{constructor(e){super(e),this.max_input_chars_per_word=100,this.tokens_to_ids=Jn(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.max_input_chars_per_word=e.max_input_chars_per_word??100,this.vocab=new Array(this.tokens_to_ids.size);for(const[t,s]of this.tokens_to_ids)this.vocab[s]=t}encode(e){const t=[];for(const s of e){const n=[...s];if(n.length>this.max_input_chars_per_word){t.push(this.unk_token);continue}let r=!1,a=0;const o=[];for(;a<n.length;){let i=n.length,l=null;for(;a<i;){let c=n.slice(a,i).join("");if(a>0&&(c=this.config.continuing_subword_prefix+c),this.tokens_to_ids.has(c)){l=c;break}--i}if(l===null){r=!0;break}o.push(l),a=i}r?t.push(this.unk_token):t.push(...o)}return t}},mo=c_,go=class _i{constructor(t,s){this.is_leaf=t,this.children=s}static default(){return new _i(!1,new Map)}},d_=class{constructor(){this.root=go.default()}extend(e){for(const t of e)this.push(t)}push(e){let t=this.root;for(const s of e){let n=t.children.get(s);n===void 0&&(n=go.default(),t.children.set(s,n)),t=n}t.is_leaf=!0}*common_prefix_search(e){let t=this.root;if(t===void 0)return;let s="";for(const n of e){if(s+=n,t=t.children.get(n),t===void 0)return;t.is_leaf&&(yield s)}}},u_=d_,kn=class hi{constructor(t,s,n,r,a){this.token_id=t,this.node_id=s,this.pos=n,this.length=r,this.score=a,this.prev=null,this.backtrace_score=0}clone(){const t=new hi(this.token_id,this.node_id,this.pos,this.length,this.score);return t.prev=this.prev,t.backtrace_score=this.backtrace_score,t}},__=class{constructor(e,t,s){this.chars=Array.from(e),this.len=this.chars.length,this.bos_token_id=t,this.eos_token_id=s,this.nodes=[],this.begin_nodes=Array.from({length:this.len+1},()=>[]),this.end_nodes=Array.from({length:this.len+1},()=>[]);const n=new kn(this.bos_token_id??0,0,0,0,0),r=new kn(this.eos_token_id??0,1,this.len,0,0);this.nodes.push(n.clone()),this.nodes.push(r.clone()),this.begin_nodes[this.len].push(r),this.end_nodes[0].push(n)}insert(e,t,s,n){const r=this.nodes.length,a=new kn(n,r,e,t,s);this.begin_nodes[e].push(a),this.end_nodes[e+t].push(a),this.nodes.push(a)}viterbi(){const e=this.len;let t=0;for(;t<=e;){if(this.begin_nodes[t].length==0)return[];for(let o of this.begin_nodes[t]){o.prev=null;let i=0,l=null;for(let c of this.end_nodes[t]){const d=c.backtrace_score+o.score;(l===null||d>i)&&(l=c.clone(),i=d)}if(l!==null)o.prev=l,o.backtrace_score=i;else return[]}++t}const s=[],r=this.begin_nodes[e][0].prev;if(r===null)return[];let a=r.clone();for(;a.prev!==null;)s.push(a.clone()),a=a.clone().prev.clone();return s.reverse(),s}piece(e){return this.chars.slice(e.pos,e.pos+e.length).join("")}tokens(){return this.viterbi().map(t=>this.piece(t))}token_ids(){return this.viterbi().map(t=>t.token_id)}},h_=__;function p_(e){if(e.length===0)throw new Error("Array must not be empty");let t=e[0],s=0;for(let n=1;n<e.length;++n)e[n]<t&&(t=e[n],s=n);return[t,s]}var f_=class extends sn{constructor(e,t){super(e);const s=e.vocab.length;this.vocab=new Array(s),this.scores=new Array(s);for(let n=0;n<s;++n)[this.vocab[n],this.scores[n]]=e.vocab[n];this.unk_token_id=e.unk_id,this.unk_token=this.vocab[e.unk_id],this.tokens_to_ids=new Map(this.vocab.map((n,r)=>[n,r])),this.bos_token=" ",this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.unk_token=this.vocab[this.unk_token_id],this.min_score=p_(this.scores)[0],this.unk_score=this.min_score-10,this.scores[this.unk_token_id]=this.unk_score,this.trie=new u_,this.trie.extend(this.vocab),this.fuse_unk=!0}populate_nodes(e){const t=e.chars,s=1;let n=0;for(;n<t.length;){let r=!1;const a=t.slice(n).join(""),o=this.trie.common_prefix_search(a);for(const i of o){const l=this.tokens_to_ids.get(i),c=this.scores[l],d=ou(i);e.insert(n,d,c,l),!r&&d===s&&(r=!0)}r||e.insert(n,s,this.unk_score,this.unk_token_id),n+=s}}tokenize(e){const t=new h_(e,this.bos_token_id,this.eos_token_id);return this.populate_nodes(t),t.tokens()}encode(e){const t=[];for(const s of e){const n=this.tokenize(s);t.push(...n)}return t}},wo=f_,m_=class{constructor(e=(s,n)=>s>n,t=1/0){this._heap=[],this._comparator=e,this._max_size=t}get size(){return this._heap.length}is_empty(){return this.size===0}peek(){return this._heap[0]}push(...e){return this.extend(e)}extend(e){for(const t of e)if(this.size<this._max_size)this._heap.push(t),this._sift_up();else{const s=this._smallest();this._comparator(t,this._heap[s])&&(this._heap[s]=t,this._sift_up_from(s))}return this.size}pop(){const e=this.peek(),t=this.size-1;return t>0&&this._swap(0,t),this._heap.pop(),this._sift_down(),e}replace(e){const t=this.peek();return this._heap[0]=e,this._sift_down(),t}_parent(e){return(e+1>>>1)-1}_left(e){return(e<<1)+1}_right(e){return e+1<<1}_greater(e,t){return this._comparator(this._heap[e],this._heap[t])}_swap(e,t){const s=this._heap[e];this._heap[e]=this._heap[t],this._heap[t]=s}_sift_up(){this._sift_up_from(this.size-1)}_sift_up_from(e){for(;e>0&&this._greater(e,this._parent(e));)this._swap(e,this._parent(e)),e=this._parent(e)}_sift_down(){let e=0;for(;this._left(e)<this.size&&this._greater(this._left(e),e)||this._right(e)<this.size&&this._greater(this._right(e),e);){const t=this._right(e)<this.size&&this._greater(this._right(e),this._left(e))?this._right(e):this._left(e);this._swap(e,t),e=t}}_smallest(){return 2**Math.floor(Math.log2(this.size))-1}},g_=m_,w_=class{constructor(e){this.capacity=e,this.cache=new Map}get(e){if(!this.cache.has(e))return;const t=this.cache.get(e);return this.cache.delete(e),this.cache.set(e,t),t}put(e,t){this.cache.has(e)&&this.cache.delete(e),this.cache.set(e,t),this.cache.size>this.capacity&&this.cache.delete(this.cache.keys().next().value)}clear(){this.cache.clear()}},v_=w_,M_=class extends sn{constructor(e){super(e),this.tokens_to_ids=Jn(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.vocab=new Array(this.tokens_to_ids.size);for(const[s,n]of this.tokens_to_ids)this.vocab[n]=s;const t=Array.isArray(e.merges[0]);this.merges=t?e.merges:e.merges.map(s=>s.split(" ",2)),this.bpe_ranks=new Map(this.merges.map((s,n)=>[JSON.stringify(s),n])),this.end_of_word_suffix=e.end_of_word_suffix,this.continuing_subword_suffix=e.continuing_subword_suffix??null,this.byte_fallback=this.config.byte_fallback??!1,this.byte_fallback&&(this.text_encoder=new TextEncoder),this.ignore_merges=this.config.ignore_merges??!1,this.max_length_to_cache=256,this.cache_capacity=1e4,this.cache=new v_(this.cache_capacity)}clear_cache(){this.cache.clear()}bpe(e){if(e.length===0)return[];const t=this.cache.get(e);if(t!==void 0)return t;const s=Array.from(e);this.end_of_word_suffix&&(s[s.length-1]+=this.end_of_word_suffix);let n=[];if(s.length>1){const r=new g_((i,l)=>i.score<l.score);let a={token:s[0],bias:0,prev:null,next:null},o=a;for(let i=1;i<s.length;++i){const l={bias:i/s.length,token:s[i],prev:o,next:null};o.next=l,this.add_node(r,o),o=l}for(;!r.is_empty();){const i=r.pop();if(i.deleted||!i.next||i.next.deleted)continue;if(i.deleted=!0,i.next.deleted=!0,i.prev){const c={...i.prev};i.prev.deleted=!0,i.prev=c,c.prev?c.prev.next=c:a=c}const l={token:i.token+i.next.token,bias:i.bias,prev:i.prev,next:i.next.next};l.prev?(l.prev.next=l,this.add_node(r,l.prev)):a=l,l.next&&(l.next.prev=l,this.add_node(r,l))}for(let i=a;i!==null;i=i.next)n.push(i.token)}else n=s;if(this.continuing_subword_suffix)for(let r=0;r<n.length-1;++r)n[r]+=this.continuing_subword_suffix;return e.length<this.max_length_to_cache&&this.cache.put(e,n),n}add_node(e,t){const s=this.bpe_ranks.get(JSON.stringify([t.token,t.next.token]));s!==void 0&&(t.score=s+t.bias,e.push(t))}encode(e){const t=[];for(const s of e){if(this.ignore_merges&&this.tokens_to_ids.has(s)){t.push(s);continue}const n=this.bpe(s);for(const r of n)if(this.tokens_to_ids.has(r))t.push(r);else if(this.byte_fallback){const a=Array.from(this.text_encoder.encode(r)).map(o=>`<0x${o.toString(16).toUpperCase().padStart(2,"0")}>`);a.every(o=>this.tokens_to_ids.has(o))?t.push(...a):t.push(this.unk_token)}else t.push(this.unk_token)}return t}},vo=M_,x_=class extends sn{constructor(e,t){super(e);const s=e.vocab;this.tokens_to_ids=Jn(t.target_lang?s[t.target_lang]:s),this.bos_token=t.bos_token,this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.pad_token=t.pad_token,this.pad_token_id=this.tokens_to_ids.get(this.pad_token),this.unk_token=t.unk_token,this.unk_token_id=this.tokens_to_ids.get(this.unk_token),this.vocab=new Array(this.tokens_to_ids.size);for(const[n,r]of this.tokens_to_ids)this.vocab[r]=n}encode(e){return e}},y_=x_;function b_(e,t){switch(e.type){case"WordPiece":return new mo(e);case"Unigram":return new wo(e,t.eos_token);case"BPE":return new vo(e);default:if(e.vocab)return Array.isArray(e.vocab)?new wo(e,t.eos_token):Object.hasOwn(e,"continuing_subword_prefix")&&Object.hasOwn(e,"unk_token")?Object.hasOwn(e,"merges")?new vo(e):new mo(e):new y_(e,{target_lang:t.target_lang,bos_token:t.bos_token,eos_token:t.eos_token,pad_token:t.pad_token,unk_token:t.unk_token});throw new Error(`Unknown TokenizerModel type: ${e?.type}`)}}var k_=b_,T_=class extends ws{constructor(e){super(),this.config=e}_call(e,...t){return this.post_process(e,...t)}},vs=T_,E_=class extends vs{post_process(e,t=null,s=!0){const n=t===null?this.config.single:this.config.pair;let r=[],a=[];for(const o of n)"SpecialToken"in o?s&&(r.push(o.SpecialToken.id),a.push(o.SpecialToken.type_id)):"Sequence"in o&&(o.Sequence.id==="A"?(r=Re(r,e),a=Re(a,new Array(e.length).fill(o.Sequence.type_id))):o.Sequence.id==="B"&&(r=Re(r,t),a=Re(a,new Array(t.length).fill(o.Sequence.type_id))));return{tokens:r,token_type_ids:a}}},P_=E_,S_=class extends vs{post_process(e,t=null){return{tokens:e,tokens_pair:t}}},C_=S_,F_=class extends vs{constructor(e){super(e),this.sep=e.sep,this.cls=e.cls}post_process(e,t=null,s=!0){s&&(e=Re([this.cls[0]],e,[this.sep[0]]));let n=new Array(e.length).fill(0);if(t){const r=[],a=s?[this.sep[0]]:[];e=Re(e,r,t,a),n=Re(n,new Array(t.length+r.length+a.length).fill(1))}return{tokens:e,token_type_ids:n}}},A_=F_,L_=class extends vs{constructor(e){super(e),this.sep=e.sep,this.cls=e.cls}post_process(e,t,s=!0){s&&(e=Re([this.cls[0]],e,[this.sep[0]]));let n=new Array(e.length).fill(0);if(t){const r=s?[this.sep[0]]:[],a=s?[this.sep[0]]:[];e=Re(e,r,t,a),n=Re(n,new Array(t.length+r.length+a.length).fill(1))}return{tokens:e,token_type_ids:n}}},I_=L_,O_=class extends vs{constructor(e){super(e),this.processors=(e.processors??[]).map(t=>pi(t))}post_process(e,t=null,s=!0){let n={tokens:e,tokens_pair:t};for(const r of this.processors)n=r.post_process(n.tokens,n.tokens_pair,s);return n}},N_=O_;function z_(e){if(e===null)return null;switch(e.type){case"TemplateProcessing":return new P_(e);case"ByteLevel":return new C_(e);case"BertProcessing":return new A_(e);case"RobertaProcessing":return new I_(e);case"Sequence":return new N_(e);default:throw new Error(`Unknown PostProcessor type: ${e.type}`)}}var pi=z_,D_=class extends ws{constructor(e){super(),this.config=e,this.added_tokens=[],this.end_of_word_suffix=null,this.trim_offsets="trim_offsets"in e?e.trim_offsets:!1}_call(e){return this.decode(e)}decode(e){return this.decode_chain(e).join("")}},Ue=D_,B_=class extends Ue{constructor(e){super(e),this.byte_decoder=eu,this.text_decoder=new TextDecoder("utf-8",{fatal:!1,ignoreBOM:!0}),this.end_of_word_suffix=null}convert_tokens_to_string(e){const t=e.join(""),s=new Uint8Array([...t].map(n=>this.byte_decoder[n]));return this.text_decoder.decode(s)}decode_chain(e){const t=[];let s=[];for(const n of e)this.added_tokens.find(r=>r.content===n)!==void 0?(s.length>0&&(t.push(this.convert_tokens_to_string(s)),s=[]),t.push(n)):s.push(n);return s.length>0&&t.push(this.convert_tokens_to_string(s)),t}},V_=B_,G_=class extends Ue{constructor(e){super(e),this.cleanup=e.cleanup}decode_chain(e){return e.map((t,s)=>{if(s!==0){const n=this.config.prefix;n&&t.startsWith(n)?t=t.replace(n,""):t=" "+t}return this.cleanup&&(t=Kn(t)),t})}},$_=G_,R_=class extends Ue{constructor(e){super(e),this.replacement=e.replacement??"▁"}decode_chain(e){const t=[];for(let s=0;s<e.length;++s){let n=e[s].replaceAll(this.replacement," ");s==0&&n.startsWith(" ")&&(n=n.substring(1)),t.push(n)}return t}},j_=R_,q_=class extends Ue{constructor(e){super(e),this.suffix=e.suffix??""}decode_chain(e){return e.map((t,s)=>t.replaceAll(this.suffix,s===e.length-1?"":" "))}},U_=q_,W_=class extends Ue{constructor(e){super(e),this.pad_token=e.pad_token??"",this.word_delimiter_token=e.word_delimiter_token??"",this.cleanup=e.cleanup}convert_tokens_to_string(e){if(e.length===0)return"";const t=[e[0]];for(let r=1;r<e.length;++r)e[r]!==t.at(-1)&&t.push(e[r]);let n=t.filter(r=>r!==this.pad_token).join("");return this.cleanup&&(n=Kn(n).replaceAll(this.word_delimiter_token," ").trim()),n}decode_chain(e){return[this.convert_tokens_to_string(e)]}},H_=W_,Q_=class extends Ue{constructor(e){super(e),this.decoders=(e.decoders??[]).map(t=>fi(t))}decode_chain(e){return this.decoders.reduce((t,s)=>s.decode_chain(t),e)}},X_=Q_,K_=class extends Ue{decode_chain(e){const t=en(this.config.pattern),s=this.config.content??"";return t===null?e:e.map(n=>n.replaceAll(t,s))}},J_=K_,Y_=class extends Ue{decode_chain(e){return[e.join("")]}},Z_=Y_,eh=class extends Ue{constructor(e){super(e),this.content=e.content??"",this.start=e.start??0,this.stop=e.stop??0}decode_chain(e){return e.map(t=>{let s=0;for(let r=0;r<this.start&&t[r]===this.content;++r){s=r+1;continue}let n=t.length;for(let r=0;r<this.stop;++r){const a=t.length-r-1;if(t[a]===this.content){n=a;continue}else break}return t.slice(s,n)})}},th=eh,sh=class extends Ue{constructor(e){super(e),this.text_decoder=new TextDecoder}decode_chain(e){const t=[];let s=[];for(const n of e){let r=null;if(n.length===6&&n.startsWith("<0x")&&n.endsWith(">")){const a=parseInt(n.slice(3,5),16);isNaN(a)||(r=a)}if(r!==null)s.push(r);else{if(s.length>0){const a=this.text_decoder.decode(Uint8Array.from(s));t.push(a),s=[]}t.push(n)}}if(s.length>0){const n=this.text_decoder.decode(Uint8Array.from(s));t.push(n),s=[]}return t}},nh=sh;function rh(e){if(e===null)return null;switch(e.type){case"ByteLevel":return new V_(e);case"WordPiece":return new $_(e);case"Metaspace":return new j_(e);case"BPEDecoder":return new U_(e);case"CTC":return new H_(e);case"Sequence":return new X_(e);case"Replace":return new J_(e);case"Fuse":return new Z_(e);case"Strip":return new th(e);case"ByteFallback":return new nh(e);default:throw new Error(`Unknown Decoder type: ${e.type}`)}}var fi=rh,ah=class{constructor(e,t){const s=fo(e,"Tokenizer",["model","decoder","post_processor","pre_tokenizer","normalizer"]);if(s)throw new Error(s);const n=fo(t,"Config");if(n)throw new Error(n);this.tokenizer=e,this.config=t,this.normalizer=di(this.tokenizer.normalizer),this.pre_tokenizer=ui(this.tokenizer.pre_tokenizer),this.model=k_(this.tokenizer.model,this.config),this.post_processor=pi(this.tokenizer.post_processor),this.decoder=fi(this.tokenizer.decoder),this.special_tokens=[],this.all_special_ids=[],this.added_tokens=[];const r=[],a=[];this.added_tokens_map=new Map;for(const o of this.tokenizer.added_tokens){const i=new Yd(o);if(this.added_tokens.push(i),this.model.tokens_to_ids.set(i.content,i.id),this.model.vocab[i.id]=i.content,i.special&&(this.special_tokens.push(i.content),this.all_special_ids.push(i.id)),this.added_tokens_map.set(i.content,i),i.normalized&&this.normalizer!==null){const l=this.normalizer(i.content);a.push(l),this.added_tokens_map.set(l,i)}else r.push(i.content)}(this.config.additional_special_tokens??[]).forEach(o=>{this.special_tokens.includes(o)||this.special_tokens.push(o)}),this.decoder&&(this.decoder.added_tokens=this.added_tokens,this.decoder.end_of_word_suffix=this.model.end_of_word_suffix),this.splitter_unnormalized=new ho(r),this.splitter_normalized=new ho(a),this.remove_space=this.config.remove_space,this.clean_up_tokenization_spaces=this.config.clean_up_tokenization_spaces??!0,this.do_lowercase_and_remove_accent=this.config.do_lowercase_and_remove_accent??!1}encode(e,{text_pair:t=null,add_special_tokens:s=!0,return_token_type_ids:n=null}={}){const{tokens:r,token_type_ids:a}=this.tokenize_helper(e,{text_pair:t,add_special_tokens:s}),o=r.map(l=>this.added_tokens_map.get(l)?.id??this.model.tokens_to_ids.get(l)??this.model.unk_token_id),i={ids:o,tokens:r,attention_mask:new Array(o.length).fill(1)};return n&&a&&(i.token_type_ids=a),i}decode(e,t={}){if(!Array.isArray(e)||e.length===0||!au(e[0]))throw Error("token_ids must be a non-empty array of integers.");let s=e.map(r=>this.model.vocab[Number(r)]??this.model.unk_token);t.skip_special_tokens&&(s=s.filter(r=>!this.special_tokens.includes(r)));let n=this.decoder?this.decoder(s):s.join(" ");return this.decoder&&this.decoder.end_of_word_suffix&&(n=n.replaceAll(this.decoder.end_of_word_suffix," "),t.skip_special_tokens&&(n=n.trim())),(t.clean_up_tokenization_spaces??this.clean_up_tokenization_spaces)&&(n=Kn(n)),n}tokenize(e,{text_pair:t=null,add_special_tokens:s=!1}={}){return this.tokenize_helper(e,{text_pair:t,add_special_tokens:s}).tokens}encode_text(e){if(e===null)return null;const t=this.splitter_unnormalized.split(e);return t.forEach((s,n)=>{const r=this.added_tokens_map.get(s);r&&(r.lstrip&&n>0&&(t[n-1]=t[n-1].trimEnd()),r.rstrip&&n<t.length-1&&(t[n+1]=t[n+1].trimStart()))}),t.flatMap((s,n)=>{if(s.length===0)return[];if(this.added_tokens_map.has(s))return[s];if(this.remove_space===!0&&(s=s.trim().split(/\s+/).join(" ")),this.do_lowercase_and_remove_accent&&(s=iu(s)),this.normalizer!==null&&(s=this.normalizer(s)),s.length===0)return[];const r=this.splitter_normalized.split(s);return r.forEach((a,o)=>{const i=this.added_tokens_map.get(a);i&&(i.lstrip&&o>0&&(r[o-1]=r[o-1].trimEnd()),i.rstrip&&o<r.length-1&&(r[o+1]=r[o+1].trimStart()))}),r.flatMap(a=>{if(a.length===0)return[];if(this.added_tokens_map.has(a))return[a];const o=this.pre_tokenizer!==null?this.pre_tokenizer(a,{section_index:n}):[a];return this.model(o)})})}tokenize_helper(e,{text_pair:t=null,add_special_tokens:s=!0}){const n=this.encode_text(e),r=this.encode_text(t||null);return this.post_processor?this.post_processor(n,r,s):{tokens:Re(n??[],r??[])}}token_to_id(e){return this.model.tokens_to_ids.get(e)}id_to_token(e){return this.model.vocab[e]}get_added_tokens_decoder(){const e=new Map;for(const t of this.added_tokens)e.set(t.id,t);return e}get_vocab(e=!0){const t=new Map;for(let s=0;s<this.model.vocab.length;++s){const n=this.model.vocab[s];(e||!this.added_tokens_map.has(n))&&t.set(n,s)}return t}},oh=ah,b=Object.freeze({Text:"Text",NumericLiteral:"NumericLiteral",StringLiteral:"StringLiteral",Identifier:"Identifier",Equals:"Equals",OpenParen:"OpenParen",CloseParen:"CloseParen",OpenStatement:"OpenStatement",CloseStatement:"CloseStatement",OpenExpression:"OpenExpression",CloseExpression:"CloseExpression",OpenSquareBracket:"OpenSquareBracket",CloseSquareBracket:"CloseSquareBracket",OpenCurlyBracket:"OpenCurlyBracket",CloseCurlyBracket:"CloseCurlyBracket",Comma:"Comma",Dot:"Dot",Colon:"Colon",Pipe:"Pipe",CallOperator:"CallOperator",AdditiveBinaryOperator:"AdditiveBinaryOperator",MultiplicativeBinaryOperator:"MultiplicativeBinaryOperator",ComparisonBinaryOperator:"ComparisonBinaryOperator",UnaryOperator:"UnaryOperator",Comment:"Comment"}),Oe=class{constructor(e,t){this.value=e,this.type=t}};function Mo(e){return/\w/.test(e)}function us(e){return/[0-9]/.test(e)}function xo(e){return/\s/.test(e)}var ih=[["{%",b.OpenStatement],["%}",b.CloseStatement],["{{",b.OpenExpression],["}}",b.CloseExpression],["(",b.OpenParen],[")",b.CloseParen],["{",b.OpenCurlyBracket],["}",b.CloseCurlyBracket],["[",b.OpenSquareBracket],["]",b.CloseSquareBracket],[",",b.Comma],[".",b.Dot],[":",b.Colon],["|",b.Pipe],["<=",b.ComparisonBinaryOperator],[">=",b.ComparisonBinaryOperator],["==",b.ComparisonBinaryOperator],["!=",b.ComparisonBinaryOperator],["<",b.ComparisonBinaryOperator],[">",b.ComparisonBinaryOperator],["+",b.AdditiveBinaryOperator],["-",b.AdditiveBinaryOperator],["~",b.AdditiveBinaryOperator],["*",b.MultiplicativeBinaryOperator],["/",b.MultiplicativeBinaryOperator],["%",b.MultiplicativeBinaryOperator],["=",b.Equals]],lh=new Map([["n",`
3
  `],["t"," "],["r","\r"],["b","\b"],["f","\f"],["v","\v"],["'","'"],['"','"'],["\\","\\"]]);function ch(e,t={}){return e.endsWith(`
4
  `)&&(e=e.slice(0,-1)),t.lstrip_blocks&&(e=e.replace(/^[ \t]*({[#%-])/gm,"$1")),t.trim_blocks&&(e=e.replace(/([#%-]})\n/g,"$1")),e.replace(/{%\s*(end)?generation\s*%}/gs,"")}function dh(e,t={}){const s=[],n=ch(e,t);let r=0,a=0;const o=c=>{let d="";for(;c(n[r]);){if(n[r]==="\\"){if(++r,r>=n.length)throw new SyntaxError("Unexpected end of input");const u=n[r++],_=lh.get(u);if(_===void 0)throw new SyntaxError(`Unexpected escaped character: ${u}`);d+=_;continue}if(d+=n[r++],r>=n.length)throw new SyntaxError("Unexpected end of input")}return d},i=()=>{const c=s.at(-1);c&&c.type===b.Text&&(c.value=c.value.trimEnd(),c.value===""&&s.pop())},l=()=>{for(;r<n.length&&xo(n[r]);)++r};e:for(;r<n.length;){const c=s.at(-1)?.type;if(c===void 0||c===b.CloseStatement||c===b.CloseExpression||c===b.Comment){let u="";for(;r<n.length&&!(n[r]==="{"&&(n[r+1]==="%"||n[r+1]==="{"||n[r+1]==="#"));)u+=n[r++];if(u.length>0){s.push(new Oe(u,b.Text));continue}}if(n[r]==="{"&&n[r+1]==="#"){r+=2;const u=n[r]==="-";u&&++r;let _="";for(;n[r]!=="#"||n[r+1]!=="}";){if(r+2>=n.length)throw new SyntaxError("Missing end of comment tag");_+=n[r++]}const h=_.endsWith("-");h&&(_=_.slice(0,-1)),u&&i(),s.push(new Oe(_,b.Comment)),r+=2,h&&l();continue}if(n.slice(r,r+3)==="{%-"){i(),s.push(new Oe("{%",b.OpenStatement)),r+=3;continue}if(n.slice(r,r+3)==="{{-"){i(),s.push(new Oe("{{",b.OpenExpression)),a=0,r+=3;continue}if(o(xo),n.slice(r,r+3)==="-%}"){s.push(new Oe("%}",b.CloseStatement)),r+=3,l();continue}if(n.slice(r,r+3)==="-}}"){s.push(new Oe("}}",b.CloseExpression)),r+=3,l();continue}const d=n[r];if(d==="-"||d==="+"){const u=s.at(-1)?.type;if(u===b.Text||u===void 0)throw new SyntaxError(`Unexpected character: ${d}`);switch(u){case b.Identifier:case b.NumericLiteral:case b.StringLiteral:case b.CloseParen:case b.CloseSquareBracket:break;default:{++r;const _=o(us);s.push(new Oe(`${d}${_}`,_.length>0?b.NumericLiteral:b.UnaryOperator));continue}}}for(const[u,_]of ih){if(u==="}}"&&a>0)continue;if(n.slice(r,r+u.length)===u){s.push(new Oe(u,_)),_===b.OpenExpression?a=0:_===b.OpenCurlyBracket?++a:_===b.CloseCurlyBracket&&--a,r+=u.length;continue e}}if(d==="'"||d==='"'){++r;const u=o(_=>_!==d);s.push(new Oe(u,b.StringLiteral)),++r;continue}if(us(d)){let u=o(us);if(n[r]==="."&&us(n[r+1])){++r;const _=o(us);u=`${u}.${_}`}s.push(new Oe(u,b.NumericLiteral));continue}if(Mo(d)){const u=o(Mo);s.push(new Oe(u,b.Identifier));continue}throw new SyntaxError(`Unexpected character: ${d}`)}return s}var We=class{type="Statement"},uh=class extends We{constructor(e){super(),this.body=e}type="Program"},_h=class extends We{constructor(e,t,s){super(),this.test=e,this.body=t,this.alternate=s}type="If"},hh=class extends We{constructor(e,t,s,n){super(),this.loopvar=e,this.iterable=t,this.body=s,this.defaultBlock=n}type="For"},ph=class extends We{type="Break"},fh=class extends We{type="Continue"},mh=class extends We{constructor(e,t,s){super(),this.assignee=e,this.value=t,this.body=s}type="Set"},gh=class extends We{constructor(e,t,s){super(),this.name=e,this.args=t,this.body=s}type="Macro"},wh=class extends We{constructor(e){super(),this.value=e}type="Comment"},Ie=class extends We{type="Expression"},vh=class extends Ie{constructor(e,t,s){super(),this.object=e,this.property=t,this.computed=s}type="MemberExpression"},yo=class extends Ie{constructor(e,t){super(),this.callee=e,this.args=t}type="CallExpression"},Lt=class extends Ie{constructor(e){super(),this.value=e}type="Identifier"},Ht=class extends Ie{constructor(e){super(),this.value=e}type="Literal"},Mh=class extends Ht{type="IntegerLiteral"},xh=class extends Ht{type="FloatLiteral"},bo=class extends Ht{type="StringLiteral"},yh=class extends Ht{type="ArrayLiteral"},ko=class extends Ht{type="TupleLiteral"},bh=class extends Ht{type="ObjectLiteral"},_s=class extends Ie{constructor(e,t,s){super(),this.operator=e,this.left=t,this.right=s}type="BinaryExpression"},kh=class extends Ie{constructor(e,t){super(),this.operand=e,this.filter=t}type="FilterExpression"},Th=class extends We{constructor(e,t){super(),this.filter=e,this.body=t}type="FilterStatement"},Eh=class extends Ie{constructor(e,t){super(),this.lhs=e,this.test=t}type="SelectExpression"},Ph=class extends Ie{constructor(e,t,s){super(),this.operand=e,this.negate=t,this.test=s}type="TestExpression"},Sh=class extends Ie{constructor(e,t){super(),this.operator=e,this.argument=t}type="UnaryExpression"},Ch=class extends Ie{constructor(e=void 0,t=void 0,s=void 0){super(),this.start=e,this.stop=t,this.step=s}type="SliceExpression"},Fh=class extends Ie{constructor(e,t){super(),this.key=e,this.value=t}type="KeywordArgumentExpression"},Ah=class extends Ie{constructor(e){super(),this.argument=e}type="SpreadExpression"},Lh=class extends We{constructor(e,t,s){super(),this.call=e,this.callerArgs=t,this.body=s}type="CallStatement"},Ih=class extends Ie{constructor(e,t,s){super(),this.condition=e,this.trueExpr=t,this.falseExpr=s}type="Ternary"};function Oh(e){const t=new uh([]);let s=0;function n(x,y){const E=e[s++];if(!E||E.type!==x)throw new Error(`Parser Error: ${y}. ${E.type} !== ${x}.`);return E}function r(x){if(!l(x))throw new SyntaxError(`Expected ${x}`);++s}function a(){switch(e[s].type){case b.Comment:return new wh(e[s++].value);case b.Text:return c();case b.OpenStatement:return d();case b.OpenExpression:return u();default:throw new SyntaxError(`Unexpected token type: ${e[s].type}`)}}function o(...x){return s+x.length<=e.length&&x.every((y,E)=>y===e[s+E].type)}function i(...x){return e[s]?.type===b.OpenStatement&&e[s+1]?.type===b.Identifier&&x.includes(e[s+1]?.value)}function l(...x){return s+x.length<=e.length&&x.every((y,E)=>e[s+E].type==="Identifier"&&y===e[s+E].value)}function c(){return new bo(n(b.Text,"Expected text token").value)}function d(){if(n(b.OpenStatement,"Expected opening statement token"),e[s].type!==b.Identifier)throw new SyntaxError(`Unknown statement, got ${e[s].type}`);const x=e[s].value;let y;switch(x){case"set":++s,y=_();break;case"if":++s,y=h(),n(b.OpenStatement,"Expected {% token"),r("endif"),n(b.CloseStatement,"Expected %} token");break;case"macro":++s,y=p(),n(b.OpenStatement,"Expected {% token"),r("endmacro"),n(b.CloseStatement,"Expected %} token");break;case"for":++s,y=m(),n(b.OpenStatement,"Expected {% token"),r("endfor"),n(b.CloseStatement,"Expected %} token");break;case"call":{++s;let E=null;o(b.OpenParen)&&(E=B());const U=O();if(U.type!=="Identifier")throw new SyntaxError("Expected identifier following call statement");const Q=B();n(b.CloseStatement,"Expected closing statement token");const fe=[];for(;!i("endcall");)fe.push(a());n(b.OpenStatement,"Expected '{%'"),r("endcall"),n(b.CloseStatement,"Expected closing statement token");const oe=new yo(U,Q);y=new Lh(oe,E,fe);break}case"break":++s,n(b.CloseStatement,"Expected closing statement token"),y=new ph;break;case"continue":++s,n(b.CloseStatement,"Expected closing statement token"),y=new fh;break;case"filter":{++s;let E=O();E instanceof Lt&&o(b.OpenParen)&&(E=I(E)),n(b.CloseStatement,"Expected closing statement token");const U=[];for(;!i("endfilter");)U.push(a());n(b.OpenStatement,"Expected '{%'"),r("endfilter"),n(b.CloseStatement,"Expected '%}'"),y=new Th(E,U);break}default:throw new SyntaxError(`Unknown statement type: ${x}`)}return y}function u(){n(b.OpenExpression,"Expected opening expression token");const x=g();return n(b.CloseExpression,"Expected closing expression token"),x}function _(){const x=f();let y=null;const E=[];if(o(b.Equals))++s,y=f();else{for(n(b.CloseStatement,"Expected %} token");!i("endset");)E.push(a());n(b.OpenStatement,"Expected {% token"),r("endset")}return n(b.CloseStatement,"Expected closing statement token"),new mh(x,y,E)}function h(){const x=g();n(b.CloseStatement,"Expected closing statement token");const y=[],E=[];for(;!i("elif","else","endif");)y.push(a());if(i("elif")){++s,++s;const U=h();E.push(U)}else if(i("else"))for(++s,++s,n(b.CloseStatement,"Expected closing statement token");!i("endif");)E.push(a());return new _h(x,y,E)}function p(){const x=O();if(x.type!=="Identifier")throw new SyntaxError("Expected identifier following macro statement");const y=B();n(b.CloseStatement,"Expected closing statement token");const E=[];for(;!i("endmacro");)E.push(a());return new gh(x,y,E)}function f(x=!1){const y=x?O:g,E=[y()],U=o(b.Comma);for(;U&&(++s,E.push(y()),!!o(b.Comma)););return U?new ko(E):E[0]}function m(){const x=f(!0);if(!(x instanceof Lt||x instanceof ko))throw new SyntaxError(`Expected identifier/tuple for the loop variable, got ${x.type} instead`);if(!l("in"))throw new SyntaxError("Expected `in` keyword following loop variable");++s;const y=g();n(b.CloseStatement,"Expected closing statement token");const E=[];for(;!i("endfor","else");)E.push(a());const U=[];if(i("else"))for(++s,++s,n(b.CloseStatement,"Expected closing statement token");!i("endfor");)U.push(a());return new hh(x,y,E,U)}function g(){return v()}function v(){const x=M();if(l("if")){++s;const y=M();if(l("else")){++s;const E=v();return new Ih(y,x,E)}else return new Eh(x,y)}return x}function M(){let x=k();for(;l("or");){const y=e[s];++s;const E=k();x=new _s(y,x,E)}return x}function k(){let x=T();for(;l("and");){const y=e[s];++s;const E=T();x=new _s(y,x,E)}return x}function T(){let x;for(;l("not");){const y=e[s];++s;const E=T();x=new Sh(y,E)}return x??S()}function S(){let x=A();for(;;){let y;if(l("not","in"))y=new Oe("not in",b.Identifier),s+=2;else if(l("in"))y=e[s++];else if(o(b.ComparisonBinaryOperator))y=e[s++];else break;const E=A();x=new _s(y,x,E)}return x}function A(){let x=G();for(;o(b.AdditiveBinaryOperator);){const y=e[s];++s;const E=G();x=new _s(y,x,E)}return x}function N(){const x=z(O());return o(b.OpenParen)?I(x):x}function I(x){let y=new yo(x,B());return y=z(y),o(b.OpenParen)&&(y=I(y)),y}function B(){n(b.OpenParen,"Expected opening parenthesis for arguments list");const x=V();return n(b.CloseParen,"Expected closing parenthesis for arguments list"),x}function V(){const x=[];for(;!o(b.CloseParen);){let y;if(e[s].type===b.MultiplicativeBinaryOperator&&e[s].value==="*"){++s;const E=g();y=new Ah(E)}else if(y=g(),o(b.Equals)){if(++s,!(y instanceof Lt))throw new SyntaxError("Expected identifier for keyword argument");const E=g();y=new Fh(y,E)}x.push(y),o(b.Comma)&&++s}return x}function D(){const x=[];let y=!1;for(;!o(b.CloseSquareBracket);)o(b.Colon)?(x.push(void 0),++s,y=!0):(x.push(g()),o(b.Colon)&&(++s,y=!0));if(x.length===0)throw new SyntaxError("Expected at least one argument for member/slice expression");if(y){if(x.length>3)throw new SyntaxError("Expected 0-3 arguments for slice expression");return new Ch(...x)}return x[0]}function z(x){for(;o(b.Dot)||o(b.OpenSquareBracket);){const y=e[s];++s;let E;const U=y.type===b.OpenSquareBracket;if(U)E=D(),n(b.CloseSquareBracket,"Expected closing square bracket");else if(E=O(),E.type!=="Identifier")throw new SyntaxError("Expected identifier following dot operator");x=new vh(x,E,U)}return x}function G(){let x=q();for(;o(b.MultiplicativeBinaryOperator);){const y=e[s++],E=q();x=new _s(y,x,E)}return x}function q(){let x=se();for(;l("is");){++s;const y=l("not");y&&++s;const E=O();if(!(E instanceof Lt))throw new SyntaxError("Expected identifier for the test");x=new Ph(x,y,E)}return x}function se(){let x=N();for(;o(b.Pipe);){++s;let y=O();if(!(y instanceof Lt))throw new SyntaxError("Expected identifier for the filter");o(b.OpenParen)&&(y=I(y)),x=new kh(x,y)}return x}function O(){const x=e[s++];switch(x.type){case b.NumericLiteral:{const y=x.value;return y.includes(".")?new xh(Number(y)):new Mh(Number(y))}case b.StringLiteral:{let y=x.value;for(;o(b.StringLiteral);)y+=e[s++].value;return new bo(y)}case b.Identifier:return new Lt(x.value);case b.OpenParen:{const y=f();return n(b.CloseParen,"Expected closing parenthesis, got ${tokens[current].type} instead."),y}case b.OpenSquareBracket:{const y=[];for(;!o(b.CloseSquareBracket);)y.push(g()),o(b.Comma)&&++s;return++s,new yh(y)}case b.OpenCurlyBracket:{const y=new Map;for(;!o(b.CloseCurlyBracket);){const E=g();n(b.Colon,"Expected colon between key and value in object literal");const U=g();y.set(E,U),o(b.Comma)&&++s}return++s,new bh(y)}default:throw new SyntaxError(`Unexpected token: ${x.type}`)}}for(;s<e.length;)t.body.push(a());return t}function Nh(e,t,s=1){if(t===void 0&&(t=e,e=0),s===0)throw new Error("range() step must not be zero");const n=[];if(s>0)for(let r=e;r<t;r+=s)n.push(r);else for(let r=e;r>t;r+=s)n.push(r);return n}function To(e,t,s,n=1){const r=Math.sign(n);r>=0?(t=(t??=0)<0?Math.max(e.length+t,0):Math.min(t,e.length),s=(s??=e.length)<0?Math.max(e.length+s,0):Math.min(s,e.length)):(t=(t??=e.length-1)<0?Math.max(e.length+t,-1):Math.min(t,e.length-1),s=(s??=-1)<-1?Math.max(e.length+s,-1):Math.min(s,e.length-1));const a=[];for(let o=t;r*o<r*s;o+=n)a.push(e[o]);return a}function zh(e){return e.replace(/\b\w/g,t=>t.toUpperCase())}function Dh(e){return Bh(new Date,e)}function Bh(e,t){const s=new Intl.DateTimeFormat(void 0,{month:"long"}),n=new Intl.DateTimeFormat(void 0,{month:"short"}),r=a=>a<10?"0"+a:a.toString();return t.replace(/%[YmdbBHM%]/g,a=>{switch(a){case"%Y":return e.getFullYear().toString();case"%m":return r(e.getMonth()+1);case"%d":return r(e.getDate());case"%b":return n.format(e);case"%B":return s.format(e);case"%H":return r(e.getHours());case"%M":return r(e.getMinutes());case"%%":return"%";default:return a}})}function Vh(e){return e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&")}function Gh(e,t,s,n){if(n===0)return e;let r=n==null||n<0?1/0:n;const a=t.length===0?new RegExp("(?=)","gu"):new RegExp(Vh(t),"gu");return e.replaceAll(a,o=>r>0?(--r,s):o)}var Eo=class extends Error{},Po=class extends Error{},tt=class{type="RuntimeValue";value;builtins=new Map;constructor(e=void 0){this.value=e}__bool__(){return new $(!!this.value)}toString(){return String(this.value)}},W=class extends tt{type="IntegerValue"},pe=class extends tt{type="FloatValue";toString(){return this.value%1===0?this.value.toFixed(1):this.value.toString()}},L=class extends tt{type="StringValue";builtins=new Map([["upper",new le(()=>new L(this.value.toUpperCase()))],["lower",new le(()=>new L(this.value.toLowerCase()))],["strip",new le(()=>new L(this.value.trim()))],["title",new le(()=>new L(zh(this.value)))],["capitalize",new le(()=>new L(this.value.charAt(0).toUpperCase()+this.value.slice(1)))],["length",new W(this.value.length)],["rstrip",new le(()=>new L(this.value.trimEnd()))],["lstrip",new le(()=>new L(this.value.trimStart()))],["startswith",new le(e=>{if(e.length===0)throw new Error("startswith() requires at least one argument");const t=e[0];if(t instanceof L)return new $(this.value.startsWith(t.value));if(t instanceof Y){for(const s of t.value){if(!(s instanceof L))throw new Error("startswith() tuple elements must be strings");if(this.value.startsWith(s.value))return new $(!0)}return new $(!1)}throw new Error("startswith() argument must be a string or tuple of strings")})],["endswith",new le(e=>{if(e.length===0)throw new Error("endswith() requires at least one argument");const t=e[0];if(t instanceof L)return new $(this.value.endsWith(t.value));if(t instanceof Y){for(const s of t.value){if(!(s instanceof L))throw new Error("endswith() tuple elements must be strings");if(this.value.endsWith(s.value))return new $(!0)}return new $(!1)}throw new Error("endswith() argument must be a string or tuple of strings")})],["split",new le(e=>{const t=e[0]??new de;if(!(t instanceof L||t instanceof de))throw new Error("sep argument must be a string or null");const s=e[1]??new W(-1);if(!(s instanceof W))throw new Error("maxsplit argument must be a number");let n=[];if(t instanceof de){const r=this.value.trimStart();for(const{0:a,index:o}of r.matchAll(/\S+/g)){if(s.value!==-1&&n.length>=s.value&&o!==void 0){n.push(a+r.slice(o+a.length));break}n.push(a)}}else{if(t.value==="")throw new Error("empty separator");n=this.value.split(t.value),s.value!==-1&&n.length>s.value&&n.push(n.splice(s.value).join(t.value))}return new Y(n.map(r=>new L(r)))})],["replace",new le(e=>{if(e.length<2)throw new Error("replace() requires at least two arguments");const t=e[0],s=e[1];if(!(t instanceof L&&s instanceof L))throw new Error("replace() arguments must be strings");let n;if(e.length>2?e[2].type==="KeywordArgumentsValue"?n=e[2].value.get("count")??new de:n=e[2]:n=new de,!(n instanceof W||n instanceof de))throw new Error("replace() count argument must be a number or null");return new L(Gh(this.value,t.value,s.value,n.value))})]])},$=class extends tt{type="BooleanValue"},$h=/[\x7f-\uffff]/g;function So(e){return e.replace($h,t=>"\\u"+t.charCodeAt(0).toString(16).padStart(4,"0"))}function Et(e,t={},s=0,n=!0){const{indent:r=null,ensureAscii:a=!1,separators:o=null,sortKeys:i=!1}=t;let l,c;switch(o?[l,c]=o:r?(l=",",c=": "):(l=", ",c=": "),e.type){case"NullValue":return"null";case"UndefinedValue":return n?"null":"undefined";case"IntegerValue":case"FloatValue":case"BooleanValue":return JSON.stringify(e.value);case"StringValue":{let d=JSON.stringify(e.value);return a&&(d=So(d)),d}case"ArrayValue":case"ObjectValue":{const d=r?" ".repeat(r):"",u=`
 
1
+ import{G as Ed}from"./gpu-ops-BbLjsC0p.js";import{Qwen35Model as Pd}from"./qwen35-model-DrnSsmhP.js";import{loadConfig as Sd,loadQuantConfig as Cd,loadModelWeights as Fd}from"./safetensors-loader-CwGm5mJX.js";class Un{}let ti=class{static create(){throw new Error("ONNX not available")}};class si{}const ni={},Ad={Tensor:Un,InferenceSession:ti,OrtEnv:si,env:ni},Ld=Object.freeze(Object.defineProperty({__proto__:null,InferenceSession:ti,OrtEnv:si,Tensor:Un,default:Ad,env:ni},Symbol.toStringTag,{value:"Module"}));var bn={},Id=Object.defineProperty,Wt=(e,t)=>{for(var s in t)Id(e,s,{get:t[s],enumerable:!0})},Le={},Ze={},Od={},Nd="4.0.0-next.6",Wn=typeof self<"u",Vt=!ii(Le),ri=!ii(Ze),$s=Wn&&"caches"in self,zd=typeof globalThis.Deno<"u",Zs=zd&&$s&&!Vt,ai=typeof process<"u",oi=ai&&process?.release?.name==="node"&&!Zs,Hn=typeof window<"u"&&typeof window.document<"u",Qn=Wn&&["DedicatedWorkerGlobalScope","ServiceWorkerGlobalScope","SharedWorkerGlobalScope"].includes(self.constructor?.name),Dd=Hn||Qn||Zs,Bd=oi||typeof navigator<"u"&&"gpu"in navigator,Vd=typeof navigator<"u"&&"ml"in navigator,Gd=typeof crypto<"u"&&typeof crypto.getRandomValues=="function",$d=typeof chrome<"u"&&typeof chrome.runtime<"u"&&typeof chrome.runtime.id=="string",Rd=typeof ServiceWorkerGlobalScope<"u"&&Wn&&self instanceof ServiceWorkerGlobalScope,jd=()=>{if(typeof navigator>"u")return!1;const e=navigator.userAgent,s=(navigator.vendor||"").indexOf("Apple")>-1,n=!e.match(/CriOS|FxiOS|EdgiOS|OPiOS|mercury|brave/i)&&!e.includes("Chrome")&&!e.includes("Android");return s&&n},qd=jd(),K=Object.freeze({IS_BROWSER_ENV:Hn,IS_WEBWORKER_ENV:Qn,IS_WEB_ENV:Dd,IS_SERVICE_WORKER_ENV:Rd,IS_DENO_WEB_RUNTIME:Zs,IS_WEB_CACHE_AVAILABLE:$s,IS_WEBGPU_AVAILABLE:Bd,IS_WEBNN_AVAILABLE:Vd,IS_SAFARI:qd,IS_PROCESS_AVAILABLE:ai,IS_NODE_ENV:oi,IS_FS_AVAILABLE:Vt,IS_PATH_AVAILABLE:ri,IS_CRYPTO_AVAILABLE:Gd,IS_CHROME_AVAILABLE:$d}),Xn=Vt&&ri,Rs="./";if(Xn){const e=Object(import.meta).url;e?Rs=Ze.dirname(Ze.dirname(Od.fileURLToPath(e))):typeof __dirname<"u"&&(Rs=Ze.dirname(__dirname))}var Ud=Xn?Ze.join(Rs,"/.cache/"):null,lo="/models/",Wd=Xn?Ze.join(Rs,lo):lo,Hd=typeof globalThis.fetch=="function"?globalThis.fetch.bind(globalThis):void 0,$e=Object.freeze({DEBUG:10,INFO:20,WARNING:30,ERROR:40,NONE:50}),co=$e.WARNING,ee={version:Nd,backends:{onnx:{}},get logLevel(){return co},set logLevel(e){co=e,ee.backends.onnx?.setLogLevel?.(e)},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!(Hn||Qn||Zs),localModelPath:Wd,useFS:Vt,useBrowserCache:$s,useFSCache:Vt,cacheDir:Ud,useCustomCache:!1,customCache:null,useWasmCache:$s||Vt,cacheKey:"transformers-cache",fetch:Hd};function ii(e){return Object.keys(e).length===0}function Dt(e,t){e&&e(t)}function Qd(e){return Number.isInteger(e)||typeof e=="bigint"}function uo(e){return e==null||e===-1}function _o(e){const t=[];let s=e;for(;Array.isArray(s);)t.push(s.length),s=s[0];return t}function et(...e){return Array.prototype.concat.apply([],e)}function js(e,t){return Math.abs((e+t)%(2*t)-t)}function Se(e,t){return Object.assign({},...t.map(s=>{if(e[s]!==void 0)return{[s]:e[s]}}))}function Xd(e,t){let s=0;for(const n of e)n===t&&++s;return s}var J={error(...e){ee.logLevel<=$e.ERROR&&console.error(...e)},warn(...e){ee.logLevel<=$e.WARNING&&console.warn(...e)},info(...e){ee.logLevel<=$e.INFO&&console.log(...e)},debug(...e){ee.logLevel<=$e.DEBUG&&console.log(...e)},log(...e){this.info(...e)}},Kd=class{constructor(e){this.trie=this._build_trie(e)}_build_trie(e){const t=Object.create(null);for(const s of e){let n=t;for(let r=0;r<s.length;++r){const a=s[r];n=n[a]??=Object.create(null)}n.end=s}return t}split(e){const t=[],s=e.length;let n=0,r=0;for(;r<s;){let a=this.trie,o=null,i=r;for(;i<s&&(a=a[e[i]]);)a.end&&(o=a.end),++i;o?(r>n&&t.push(e.slice(n,r)),t.push(o),r+=o.length,n=r):++r}return n<s&&t.push(e.slice(n)),t}},ho=Kd,Jd=class{constructor(e){this.content=e.content,this.id=e.id,this.single_word=e.single_word??!1,this.lstrip=e.lstrip??!1,this.rstrip=e.rstrip??!1,this.special=e.special??!1,this.normalized=e.normalized??!this.special}},Yd=Jd,li=(()=>{const e=[...Array.from({length:94},(r,a)=>a+33),...Array.from({length:12},(r,a)=>a+161),...Array.from({length:82},(r,a)=>a+174)],t=e.slice();let s=0;for(let r=0;r<256;++r)e.includes(r)||(e.push(r),t.push(256+s),s+=1);const n=t.map(r=>String.fromCharCode(r));return Object.fromEntries(e.map((r,a)=>[r,n[a]]))})(),Zd=e=>Object.fromEntries(Object.entries(e).map(([t,s])=>[s,t])),eu=Zd(li),po=".,!?…。,、।۔،",tu=new Map([["(?i:'s|'t|'re|'ve|'m|'ll|'d)","(?:'([sS]|[tT]|[rR][eE]|[vV][eE]|[mM]|[lL][lL]|[dD]))"],["(?i:[sdmt]|ll|ve|re)","(?:[sS]|[dD]|[mM]|[tT]|[lL][lL]|[vV][eE]|[rR][eE])"],["[^\\r\\n\\p{L}\\p{N}]?+","[^\\r\\n\\p{L}\\p{N}]?"],["[^\\s\\p{L}\\p{N}]++","[^\\s\\p{L}\\p{N}]+"],["(?>\\p{Nd}{510})","(?:\\p{Nd}{510})"],["\\p{Nd}{3}+","(?:\\p{Nd}{3})+"],["\\G",""],[` ?[^(\\s|[${po}])]+`,` ?[^\\s${po}]+`]]),qs="\\p{P}\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E",Kn=e=>e.replace(/ \./g,".").replace(/ \?/g,"?").replace(/ \!/g,"!").replace(/ ,/g,",").replace(/ \' /g,"'").replace(/ n't/g,"n't").replace(/ 'm/g,"'m").replace(/ 's/g,"'s").replace(/ 've/g,"'ve").replace(/ 're/g,"'re"),en=(e,t=!0)=>{if(e.Regex!==void 0){let s=e.Regex.replace(/\\([#&~])/g,"$1");s=s.replace(/\\A/g,"^").replace(/\\z/g,"$").replace(/\\Z/g,"(?=\\r?\\n?$)");for(const[n,r]of tu)s=s.replaceAll(n,r);try{return new RegExp(s,"gu")}catch(n){if(!(n instanceof SyntaxError)||!n.message.toLowerCase().includes("invalid property name"))throw n;let r=!1;const a=s.replace(/(\\[pP])\{([^}=]+)\}/g,(o,i,l)=>{try{return new RegExp(`\\p{${l}}`,"u"),`${i}{${l}}`}catch{return r=!0,`${i}{Script=${l}}`}});if(!r)throw n;try{return new RegExp(a,"gu")}catch{throw n}}}else if(e.String!==void 0){const s=su(e.String);return new RegExp(t?s:`(${s})`,"gu")}else return console.warn("Unknown pattern type:",e),null},su=e=>e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&"),nu=(e,t,s)=>{const n=[];let r=0;for(;r<e.length;){if(n.push(e[r]),(t.get(e[r])??s)!==s){++r;continue}for(;++r<e.length&&(t.get(e[r])??s)===s;)t.get(n.at(-1))!==s&&(n[n.length-1]+=e[r])}return n},ru=e=>e>=19968&&e<=40959||e>=13312&&e<=19903||e>=131072&&e<=173791||e>=173824&&e<=177983||e>=177984&&e<=178207||e>=178208&&e<=183983||e>=63744&&e<=64255||e>=194560&&e<=195103,au=e=>Number.isInteger(e)||typeof e=="bigint",ou=e=>{let t=0;for(const s of e)++t;return t},iu=e=>ci(e.toLowerCase()),Re=(...e)=>Array.prototype.concat.apply([],e),Jn=e=>new Map(Object.entries(e)),lu=(e,t)=>{const s=[];let n=0;for(const r of e.matchAll(t)){const a=r[0];n<r.index&&s.push(e.slice(n,r.index)),a.length>0&&s.push(a),n=r.index+a.length}return n<e.length&&s.push(e.slice(n)),s},ci=e=>e.replace(new RegExp("\\p{M}","gu"),""),fo=(e,t,s=[])=>{if(!e||Array.isArray(e)||typeof e!="object")return`${t} must be a valid object`;for(const n of s)if(!(n in e))return`${t} must contain a "${n}" property`;return null},cu=e=>e.match(/\S+/g)||[],du=class{constructor(){const e=function(...t){return e._call(...t)};return Object.setPrototypeOf(e,new.target.prototype)}},ws=du,uu=class extends ws{constructor(e){super(),this.config=e}_call(e){return this.normalize(e)}},it=uu,_u=class extends it{tokenize_chinese_chars(e){const t=[];for(let s=0;s<e.length;++s){const n=e[s],r=n.charCodeAt(0);ru(r)?(t.push(" "),t.push(n),t.push(" ")):t.push(n)}return t.join("")}strip_accents(e){return e.normalize("NFD").replace(new RegExp("\\p{Mn}","gu"),"")}is_control(e){switch(e){case" ":case`
2
  `:case"\r":return!1;default:return new RegExp("^\\p{Cc}|\\p{Cf}|\\p{Co}|\\p{Cs}$","u").test(e)}}clean_text(e){const t=[];for(const s of e){const n=s.charCodeAt(0);n===0||n===65533||this.is_control(s)||(/^\s$/.test(s)?t.push(" "):t.push(s))}return t.join("")}normalize(e){return this.config.clean_text&&(e=this.clean_text(e)),this.config.handle_chinese_chars&&(e=this.tokenize_chinese_chars(e)),this.config.lowercase?(e=e.toLowerCase(),this.config.strip_accents!==!1&&(e=this.strip_accents(e))):this.config.strip_accents&&(e=this.strip_accents(e)),e}},hu=_u,pu=class extends it{constructor(e){super(e),this.charsmap=e.precompiled_charsmap??null}normalize(e){return e=e.replace(/[\u0001-\u0008\u000B\u000E-\u001F\u007F\u008F\u009F]/gm,""),e=e.replace(/[\u0009\u000A\u000C\u000D\u00A0\u1680\u2000-\u200F\u2028\u2029\u202F\u205F\u2581\u3000\uFEFF\uFFFD]/gm," "),e.includes("~")?e=e.split("~").map(s=>s.normalize("NFKC")).join("~"):e=e.normalize("NFKC"),e}},fu=pu,mu=class extends it{constructor(e){super(e),this.normalizers=(e.normalizers??[]).map(t=>di(t))}normalize(e){return this.normalizers.reduce((t,s)=>s?s.normalize(t):t,e)}},gu=mu,wu=class extends it{normalize(e){const t=en(this.config.pattern??{});return t===null?e:e.replaceAll(t,this.config.content??"")}},vu=wu,Mu=class extends it{constructor(){super(...arguments),this.form="NFC"}normalize(e){return e=e.normalize(this.form),e}},tn=Mu,xu=class extends tn{constructor(){super(...arguments),this.form="NFC"}},yu=xu,bu=class extends tn{constructor(){super(...arguments),this.form="NFD"}},ku=bu,Tu=class extends tn{constructor(){super(...arguments),this.form="NFKC"}},Eu=Tu,Pu=class extends tn{constructor(){super(...arguments),this.form="NFKD"}},Su=Pu,Cu=class extends it{normalize(e){return this.config.strip_left&&this.config.strip_right?e=e.trim():(this.config.strip_left&&(e=e.trimStart()),this.config.strip_right&&(e=e.trimEnd())),e}},Fu=Cu,Au=class extends it{normalize(e){return ci(e)}},Lu=Au,Iu=class extends it{normalize(e){return e.toLowerCase()}},Ou=Iu,Nu=class extends it{normalize(e){return e=this.config.prepend+e,e}},zu=Nu;function Du(e){if(e===null)return null;switch(e.type){case"BertNormalizer":return new hu(e);case"Precompiled":return new fu(e);case"Sequence":return new gu(e);case"Replace":return new vu(e);case"NFC":return new yu(e);case"NFD":return new ku(e);case"NFKC":return new Eu(e);case"NFKD":return new Su(e);case"Strip":return new Fu(e);case"StripAccents":return new Lu(e);case"Lowercase":return new Ou(e);case"Prepend":return new zu(e);default:throw new Error(`Unknown Normalizer type: ${e.type}`)}}var di=Du,Bu=class extends ws{pre_tokenize(e,t){return(Array.isArray(e)?e.map(s=>this.pre_tokenize_text(s,t)):this.pre_tokenize_text(e,t)).flat()}_call(e,t){return this.pre_tokenize(e,t)}},qe=Bu,Vu=class extends qe{constructor(e){super(),this.config=e,this.add_prefix_space=this.config.add_prefix_space??!1,this.trim_offsets=this.config.trim_offsets??!1,this.use_regex=this.config.use_regex??!0,this.pattern=new RegExp("'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+","gu"),this.byte_encoder=li,this.text_encoder=new TextEncoder}pre_tokenize_text(e,t){return this.add_prefix_space&&!e.startsWith(" ")&&(e=" "+e),(this.use_regex?e.match(this.pattern)||[]:[e]).map(n=>Array.from(this.text_encoder.encode(n),r=>this.byte_encoder[r]).join(""))}},Gu=Vu,$u=class extends qe{pre_tokenize_text(e,t){return e.match(/\w+|[^\w\s]+/g)||[]}},Ru=$u,ju=class extends qe{constructor(e){super(),this.replacement=e.replacement??"▁",this.str_rep=e.str_rep||this.replacement,this.prepend_scheme=e.prepend_scheme??"always"}pre_tokenize_text(e,t){const{section_index:s=void 0}=t??{};let n=e.replaceAll(" ",this.str_rep);return!n.startsWith(this.replacement)&&(this.prepend_scheme==="always"||this.prepend_scheme==="first"&&s===0)&&(n=this.str_rep+n),[n]}},qu=ju,Uu=class extends qe{constructor(e){super(),this.config=e,this.pattern=en(this.config.pattern??{},this.config.invert??!0)}pre_tokenize_text(e){return this.pattern===null?[]:this.config.invert?e.match(this.pattern)||[]:this.config.behavior?.toLowerCase()==="removed"?e.split(this.pattern).filter(t=>t):lu(e,this.pattern)}},Wu=Uu,Hu=class extends qe{constructor(e){super(),this.config=e,this.pattern=new RegExp(`[^${qs}]+|[${qs}]+`,"gu")}pre_tokenize_text(e){return e.match(this.pattern)||[]}},Qu=Hu,Xu=class extends qe{constructor(e){super(),this.config=e;const t=`[^\\d]+|\\d${this.config.individual_digits?"":"+"}`;this.pattern=new RegExp(t,"gu")}pre_tokenize_text(e){return e.match(this.pattern)||[]}},Ku=Xu,Ju=class extends qe{constructor(){super(),this.pattern=new RegExp(`[^\\s${qs}]+|[${qs}]`,"gu")}pre_tokenize_text(e,t){return e.trim().match(this.pattern)||[]}},Yu=Ju,Zu=class extends qe{constructor(e){super(),this.config=e,this.pattern=en(this.config.pattern??{}),this.content=this.config.content??""}pre_tokenize_text(e){return this.pattern===null?[e]:[e.replaceAll(this.pattern,this.config.content??"")]}},e_=Zu,t_=class extends qe{constructor(e){super(),this.tokenizers=(e.pretokenizers??[]).map(t=>ui(t))}pre_tokenize_text(e,t){return this.tokenizers.reduce((s,n)=>n?n.pre_tokenize(s,t):s,[e])}},s_=t_,n_=class extends qe{pre_tokenize_text(e){return cu(e)}},r_=n_,a_=class extends qe{constructor(e){super(),this.config=e,this._length=e.length}pre_tokenize_text(e){const t=[];for(let s=0;s<e.length;s+=this._length)t.push(e.slice(s,s+this._length));return t}},o_=a_;function i_(e){if(e===null)return null;switch(e.type){case"BertPreTokenizer":return new Yu;case"Sequence":return new s_(e);case"Whitespace":return new Ru;case"WhitespaceSplit":return new r_;case"Metaspace":return new qu(e);case"ByteLevel":return new Gu(e);case"Split":return new Wu(e);case"Punctuation":return new Qu(e);case"Digits":return new Ku(e);case"Replace":return new e_(e);case"FixedLength":return new o_(e);default:throw new Error(`Unknown PreTokenizer type: ${e.type}`)}}var ui=i_,l_=class extends ws{constructor(e){super(),this.config=e,this.vocab=[],this.tokens_to_ids=new Map,this.unk_token_id=void 0,this.unk_token=void 0,this.end_of_word_suffix=void 0,this.fuse_unk=this.config.fuse_unk??!1}_call(e){let t=this.encode(e);return this.fuse_unk&&(t=nu(t,this.tokens_to_ids,this.unk_token_id)),t}},sn=l_,c_=class extends sn{constructor(e){super(e),this.max_input_chars_per_word=100,this.tokens_to_ids=Jn(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.max_input_chars_per_word=e.max_input_chars_per_word??100,this.vocab=new Array(this.tokens_to_ids.size);for(const[t,s]of this.tokens_to_ids)this.vocab[s]=t}encode(e){const t=[];for(const s of e){const n=[...s];if(n.length>this.max_input_chars_per_word){t.push(this.unk_token);continue}let r=!1,a=0;const o=[];for(;a<n.length;){let i=n.length,l=null;for(;a<i;){let c=n.slice(a,i).join("");if(a>0&&(c=this.config.continuing_subword_prefix+c),this.tokens_to_ids.has(c)){l=c;break}--i}if(l===null){r=!0;break}o.push(l),a=i}r?t.push(this.unk_token):t.push(...o)}return t}},mo=c_,go=class _i{constructor(t,s){this.is_leaf=t,this.children=s}static default(){return new _i(!1,new Map)}},d_=class{constructor(){this.root=go.default()}extend(e){for(const t of e)this.push(t)}push(e){let t=this.root;for(const s of e){let n=t.children.get(s);n===void 0&&(n=go.default(),t.children.set(s,n)),t=n}t.is_leaf=!0}*common_prefix_search(e){let t=this.root;if(t===void 0)return;let s="";for(const n of e){if(s+=n,t=t.children.get(n),t===void 0)return;t.is_leaf&&(yield s)}}},u_=d_,kn=class hi{constructor(t,s,n,r,a){this.token_id=t,this.node_id=s,this.pos=n,this.length=r,this.score=a,this.prev=null,this.backtrace_score=0}clone(){const t=new hi(this.token_id,this.node_id,this.pos,this.length,this.score);return t.prev=this.prev,t.backtrace_score=this.backtrace_score,t}},__=class{constructor(e,t,s){this.chars=Array.from(e),this.len=this.chars.length,this.bos_token_id=t,this.eos_token_id=s,this.nodes=[],this.begin_nodes=Array.from({length:this.len+1},()=>[]),this.end_nodes=Array.from({length:this.len+1},()=>[]);const n=new kn(this.bos_token_id??0,0,0,0,0),r=new kn(this.eos_token_id??0,1,this.len,0,0);this.nodes.push(n.clone()),this.nodes.push(r.clone()),this.begin_nodes[this.len].push(r),this.end_nodes[0].push(n)}insert(e,t,s,n){const r=this.nodes.length,a=new kn(n,r,e,t,s);this.begin_nodes[e].push(a),this.end_nodes[e+t].push(a),this.nodes.push(a)}viterbi(){const e=this.len;let t=0;for(;t<=e;){if(this.begin_nodes[t].length==0)return[];for(let o of this.begin_nodes[t]){o.prev=null;let i=0,l=null;for(let c of this.end_nodes[t]){const d=c.backtrace_score+o.score;(l===null||d>i)&&(l=c.clone(),i=d)}if(l!==null)o.prev=l,o.backtrace_score=i;else return[]}++t}const s=[],r=this.begin_nodes[e][0].prev;if(r===null)return[];let a=r.clone();for(;a.prev!==null;)s.push(a.clone()),a=a.clone().prev.clone();return s.reverse(),s}piece(e){return this.chars.slice(e.pos,e.pos+e.length).join("")}tokens(){return this.viterbi().map(t=>this.piece(t))}token_ids(){return this.viterbi().map(t=>t.token_id)}},h_=__;function p_(e){if(e.length===0)throw new Error("Array must not be empty");let t=e[0],s=0;for(let n=1;n<e.length;++n)e[n]<t&&(t=e[n],s=n);return[t,s]}var f_=class extends sn{constructor(e,t){super(e);const s=e.vocab.length;this.vocab=new Array(s),this.scores=new Array(s);for(let n=0;n<s;++n)[this.vocab[n],this.scores[n]]=e.vocab[n];this.unk_token_id=e.unk_id,this.unk_token=this.vocab[e.unk_id],this.tokens_to_ids=new Map(this.vocab.map((n,r)=>[n,r])),this.bos_token=" ",this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.unk_token=this.vocab[this.unk_token_id],this.min_score=p_(this.scores)[0],this.unk_score=this.min_score-10,this.scores[this.unk_token_id]=this.unk_score,this.trie=new u_,this.trie.extend(this.vocab),this.fuse_unk=!0}populate_nodes(e){const t=e.chars,s=1;let n=0;for(;n<t.length;){let r=!1;const a=t.slice(n).join(""),o=this.trie.common_prefix_search(a);for(const i of o){const l=this.tokens_to_ids.get(i),c=this.scores[l],d=ou(i);e.insert(n,d,c,l),!r&&d===s&&(r=!0)}r||e.insert(n,s,this.unk_score,this.unk_token_id),n+=s}}tokenize(e){const t=new h_(e,this.bos_token_id,this.eos_token_id);return this.populate_nodes(t),t.tokens()}encode(e){const t=[];for(const s of e){const n=this.tokenize(s);t.push(...n)}return t}},wo=f_,m_=class{constructor(e=(s,n)=>s>n,t=1/0){this._heap=[],this._comparator=e,this._max_size=t}get size(){return this._heap.length}is_empty(){return this.size===0}peek(){return this._heap[0]}push(...e){return this.extend(e)}extend(e){for(const t of e)if(this.size<this._max_size)this._heap.push(t),this._sift_up();else{const s=this._smallest();this._comparator(t,this._heap[s])&&(this._heap[s]=t,this._sift_up_from(s))}return this.size}pop(){const e=this.peek(),t=this.size-1;return t>0&&this._swap(0,t),this._heap.pop(),this._sift_down(),e}replace(e){const t=this.peek();return this._heap[0]=e,this._sift_down(),t}_parent(e){return(e+1>>>1)-1}_left(e){return(e<<1)+1}_right(e){return e+1<<1}_greater(e,t){return this._comparator(this._heap[e],this._heap[t])}_swap(e,t){const s=this._heap[e];this._heap[e]=this._heap[t],this._heap[t]=s}_sift_up(){this._sift_up_from(this.size-1)}_sift_up_from(e){for(;e>0&&this._greater(e,this._parent(e));)this._swap(e,this._parent(e)),e=this._parent(e)}_sift_down(){let e=0;for(;this._left(e)<this.size&&this._greater(this._left(e),e)||this._right(e)<this.size&&this._greater(this._right(e),e);){const t=this._right(e)<this.size&&this._greater(this._right(e),this._left(e))?this._right(e):this._left(e);this._swap(e,t),e=t}}_smallest(){return 2**Math.floor(Math.log2(this.size))-1}},g_=m_,w_=class{constructor(e){this.capacity=e,this.cache=new Map}get(e){if(!this.cache.has(e))return;const t=this.cache.get(e);return this.cache.delete(e),this.cache.set(e,t),t}put(e,t){this.cache.has(e)&&this.cache.delete(e),this.cache.set(e,t),this.cache.size>this.capacity&&this.cache.delete(this.cache.keys().next().value)}clear(){this.cache.clear()}},v_=w_,M_=class extends sn{constructor(e){super(e),this.tokens_to_ids=Jn(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.vocab=new Array(this.tokens_to_ids.size);for(const[s,n]of this.tokens_to_ids)this.vocab[n]=s;const t=Array.isArray(e.merges[0]);this.merges=t?e.merges:e.merges.map(s=>s.split(" ",2)),this.bpe_ranks=new Map(this.merges.map((s,n)=>[JSON.stringify(s),n])),this.end_of_word_suffix=e.end_of_word_suffix,this.continuing_subword_suffix=e.continuing_subword_suffix??null,this.byte_fallback=this.config.byte_fallback??!1,this.byte_fallback&&(this.text_encoder=new TextEncoder),this.ignore_merges=this.config.ignore_merges??!1,this.max_length_to_cache=256,this.cache_capacity=1e4,this.cache=new v_(this.cache_capacity)}clear_cache(){this.cache.clear()}bpe(e){if(e.length===0)return[];const t=this.cache.get(e);if(t!==void 0)return t;const s=Array.from(e);this.end_of_word_suffix&&(s[s.length-1]+=this.end_of_word_suffix);let n=[];if(s.length>1){const r=new g_((i,l)=>i.score<l.score);let a={token:s[0],bias:0,prev:null,next:null},o=a;for(let i=1;i<s.length;++i){const l={bias:i/s.length,token:s[i],prev:o,next:null};o.next=l,this.add_node(r,o),o=l}for(;!r.is_empty();){const i=r.pop();if(i.deleted||!i.next||i.next.deleted)continue;if(i.deleted=!0,i.next.deleted=!0,i.prev){const c={...i.prev};i.prev.deleted=!0,i.prev=c,c.prev?c.prev.next=c:a=c}const l={token:i.token+i.next.token,bias:i.bias,prev:i.prev,next:i.next.next};l.prev?(l.prev.next=l,this.add_node(r,l.prev)):a=l,l.next&&(l.next.prev=l,this.add_node(r,l))}for(let i=a;i!==null;i=i.next)n.push(i.token)}else n=s;if(this.continuing_subword_suffix)for(let r=0;r<n.length-1;++r)n[r]+=this.continuing_subword_suffix;return e.length<this.max_length_to_cache&&this.cache.put(e,n),n}add_node(e,t){const s=this.bpe_ranks.get(JSON.stringify([t.token,t.next.token]));s!==void 0&&(t.score=s+t.bias,e.push(t))}encode(e){const t=[];for(const s of e){if(this.ignore_merges&&this.tokens_to_ids.has(s)){t.push(s);continue}const n=this.bpe(s);for(const r of n)if(this.tokens_to_ids.has(r))t.push(r);else if(this.byte_fallback){const a=Array.from(this.text_encoder.encode(r)).map(o=>`<0x${o.toString(16).toUpperCase().padStart(2,"0")}>`);a.every(o=>this.tokens_to_ids.has(o))?t.push(...a):t.push(this.unk_token)}else t.push(this.unk_token)}return t}},vo=M_,x_=class extends sn{constructor(e,t){super(e);const s=e.vocab;this.tokens_to_ids=Jn(t.target_lang?s[t.target_lang]:s),this.bos_token=t.bos_token,this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.pad_token=t.pad_token,this.pad_token_id=this.tokens_to_ids.get(this.pad_token),this.unk_token=t.unk_token,this.unk_token_id=this.tokens_to_ids.get(this.unk_token),this.vocab=new Array(this.tokens_to_ids.size);for(const[n,r]of this.tokens_to_ids)this.vocab[r]=n}encode(e){return e}},y_=x_;function b_(e,t){switch(e.type){case"WordPiece":return new mo(e);case"Unigram":return new wo(e,t.eos_token);case"BPE":return new vo(e);default:if(e.vocab)return Array.isArray(e.vocab)?new wo(e,t.eos_token):Object.hasOwn(e,"continuing_subword_prefix")&&Object.hasOwn(e,"unk_token")?Object.hasOwn(e,"merges")?new vo(e):new mo(e):new y_(e,{target_lang:t.target_lang,bos_token:t.bos_token,eos_token:t.eos_token,pad_token:t.pad_token,unk_token:t.unk_token});throw new Error(`Unknown TokenizerModel type: ${e?.type}`)}}var k_=b_,T_=class extends ws{constructor(e){super(),this.config=e}_call(e,...t){return this.post_process(e,...t)}},vs=T_,E_=class extends vs{post_process(e,t=null,s=!0){const n=t===null?this.config.single:this.config.pair;let r=[],a=[];for(const o of n)"SpecialToken"in o?s&&(r.push(o.SpecialToken.id),a.push(o.SpecialToken.type_id)):"Sequence"in o&&(o.Sequence.id==="A"?(r=Re(r,e),a=Re(a,new Array(e.length).fill(o.Sequence.type_id))):o.Sequence.id==="B"&&(r=Re(r,t),a=Re(a,new Array(t.length).fill(o.Sequence.type_id))));return{tokens:r,token_type_ids:a}}},P_=E_,S_=class extends vs{post_process(e,t=null){return{tokens:e,tokens_pair:t}}},C_=S_,F_=class extends vs{constructor(e){super(e),this.sep=e.sep,this.cls=e.cls}post_process(e,t=null,s=!0){s&&(e=Re([this.cls[0]],e,[this.sep[0]]));let n=new Array(e.length).fill(0);if(t){const r=[],a=s?[this.sep[0]]:[];e=Re(e,r,t,a),n=Re(n,new Array(t.length+r.length+a.length).fill(1))}return{tokens:e,token_type_ids:n}}},A_=F_,L_=class extends vs{constructor(e){super(e),this.sep=e.sep,this.cls=e.cls}post_process(e,t,s=!0){s&&(e=Re([this.cls[0]],e,[this.sep[0]]));let n=new Array(e.length).fill(0);if(t){const r=s?[this.sep[0]]:[],a=s?[this.sep[0]]:[];e=Re(e,r,t,a),n=Re(n,new Array(t.length+r.length+a.length).fill(1))}return{tokens:e,token_type_ids:n}}},I_=L_,O_=class extends vs{constructor(e){super(e),this.processors=(e.processors??[]).map(t=>pi(t))}post_process(e,t=null,s=!0){let n={tokens:e,tokens_pair:t};for(const r of this.processors)n=r.post_process(n.tokens,n.tokens_pair,s);return n}},N_=O_;function z_(e){if(e===null)return null;switch(e.type){case"TemplateProcessing":return new P_(e);case"ByteLevel":return new C_(e);case"BertProcessing":return new A_(e);case"RobertaProcessing":return new I_(e);case"Sequence":return new N_(e);default:throw new Error(`Unknown PostProcessor type: ${e.type}`)}}var pi=z_,D_=class extends ws{constructor(e){super(),this.config=e,this.added_tokens=[],this.end_of_word_suffix=null,this.trim_offsets="trim_offsets"in e?e.trim_offsets:!1}_call(e){return this.decode(e)}decode(e){return this.decode_chain(e).join("")}},Ue=D_,B_=class extends Ue{constructor(e){super(e),this.byte_decoder=eu,this.text_decoder=new TextDecoder("utf-8",{fatal:!1,ignoreBOM:!0}),this.end_of_word_suffix=null}convert_tokens_to_string(e){const t=e.join(""),s=new Uint8Array([...t].map(n=>this.byte_decoder[n]));return this.text_decoder.decode(s)}decode_chain(e){const t=[];let s=[];for(const n of e)this.added_tokens.find(r=>r.content===n)!==void 0?(s.length>0&&(t.push(this.convert_tokens_to_string(s)),s=[]),t.push(n)):s.push(n);return s.length>0&&t.push(this.convert_tokens_to_string(s)),t}},V_=B_,G_=class extends Ue{constructor(e){super(e),this.cleanup=e.cleanup}decode_chain(e){return e.map((t,s)=>{if(s!==0){const n=this.config.prefix;n&&t.startsWith(n)?t=t.replace(n,""):t=" "+t}return this.cleanup&&(t=Kn(t)),t})}},$_=G_,R_=class extends Ue{constructor(e){super(e),this.replacement=e.replacement??"▁"}decode_chain(e){const t=[];for(let s=0;s<e.length;++s){let n=e[s].replaceAll(this.replacement," ");s==0&&n.startsWith(" ")&&(n=n.substring(1)),t.push(n)}return t}},j_=R_,q_=class extends Ue{constructor(e){super(e),this.suffix=e.suffix??""}decode_chain(e){return e.map((t,s)=>t.replaceAll(this.suffix,s===e.length-1?"":" "))}},U_=q_,W_=class extends Ue{constructor(e){super(e),this.pad_token=e.pad_token??"",this.word_delimiter_token=e.word_delimiter_token??"",this.cleanup=e.cleanup}convert_tokens_to_string(e){if(e.length===0)return"";const t=[e[0]];for(let r=1;r<e.length;++r)e[r]!==t.at(-1)&&t.push(e[r]);let n=t.filter(r=>r!==this.pad_token).join("");return this.cleanup&&(n=Kn(n).replaceAll(this.word_delimiter_token," ").trim()),n}decode_chain(e){return[this.convert_tokens_to_string(e)]}},H_=W_,Q_=class extends Ue{constructor(e){super(e),this.decoders=(e.decoders??[]).map(t=>fi(t))}decode_chain(e){return this.decoders.reduce((t,s)=>s.decode_chain(t),e)}},X_=Q_,K_=class extends Ue{decode_chain(e){const t=en(this.config.pattern),s=this.config.content??"";return t===null?e:e.map(n=>n.replaceAll(t,s))}},J_=K_,Y_=class extends Ue{decode_chain(e){return[e.join("")]}},Z_=Y_,eh=class extends Ue{constructor(e){super(e),this.content=e.content??"",this.start=e.start??0,this.stop=e.stop??0}decode_chain(e){return e.map(t=>{let s=0;for(let r=0;r<this.start&&t[r]===this.content;++r){s=r+1;continue}let n=t.length;for(let r=0;r<this.stop;++r){const a=t.length-r-1;if(t[a]===this.content){n=a;continue}else break}return t.slice(s,n)})}},th=eh,sh=class extends Ue{constructor(e){super(e),this.text_decoder=new TextDecoder}decode_chain(e){const t=[];let s=[];for(const n of e){let r=null;if(n.length===6&&n.startsWith("<0x")&&n.endsWith(">")){const a=parseInt(n.slice(3,5),16);isNaN(a)||(r=a)}if(r!==null)s.push(r);else{if(s.length>0){const a=this.text_decoder.decode(Uint8Array.from(s));t.push(a),s=[]}t.push(n)}}if(s.length>0){const n=this.text_decoder.decode(Uint8Array.from(s));t.push(n),s=[]}return t}},nh=sh;function rh(e){if(e===null)return null;switch(e.type){case"ByteLevel":return new V_(e);case"WordPiece":return new $_(e);case"Metaspace":return new j_(e);case"BPEDecoder":return new U_(e);case"CTC":return new H_(e);case"Sequence":return new X_(e);case"Replace":return new J_(e);case"Fuse":return new Z_(e);case"Strip":return new th(e);case"ByteFallback":return new nh(e);default:throw new Error(`Unknown Decoder type: ${e.type}`)}}var fi=rh,ah=class{constructor(e,t){const s=fo(e,"Tokenizer",["model","decoder","post_processor","pre_tokenizer","normalizer"]);if(s)throw new Error(s);const n=fo(t,"Config");if(n)throw new Error(n);this.tokenizer=e,this.config=t,this.normalizer=di(this.tokenizer.normalizer),this.pre_tokenizer=ui(this.tokenizer.pre_tokenizer),this.model=k_(this.tokenizer.model,this.config),this.post_processor=pi(this.tokenizer.post_processor),this.decoder=fi(this.tokenizer.decoder),this.special_tokens=[],this.all_special_ids=[],this.added_tokens=[];const r=[],a=[];this.added_tokens_map=new Map;for(const o of this.tokenizer.added_tokens){const i=new Yd(o);if(this.added_tokens.push(i),this.model.tokens_to_ids.set(i.content,i.id),this.model.vocab[i.id]=i.content,i.special&&(this.special_tokens.push(i.content),this.all_special_ids.push(i.id)),this.added_tokens_map.set(i.content,i),i.normalized&&this.normalizer!==null){const l=this.normalizer(i.content);a.push(l),this.added_tokens_map.set(l,i)}else r.push(i.content)}(this.config.additional_special_tokens??[]).forEach(o=>{this.special_tokens.includes(o)||this.special_tokens.push(o)}),this.decoder&&(this.decoder.added_tokens=this.added_tokens,this.decoder.end_of_word_suffix=this.model.end_of_word_suffix),this.splitter_unnormalized=new ho(r),this.splitter_normalized=new ho(a),this.remove_space=this.config.remove_space,this.clean_up_tokenization_spaces=this.config.clean_up_tokenization_spaces??!0,this.do_lowercase_and_remove_accent=this.config.do_lowercase_and_remove_accent??!1}encode(e,{text_pair:t=null,add_special_tokens:s=!0,return_token_type_ids:n=null}={}){const{tokens:r,token_type_ids:a}=this.tokenize_helper(e,{text_pair:t,add_special_tokens:s}),o=r.map(l=>this.added_tokens_map.get(l)?.id??this.model.tokens_to_ids.get(l)??this.model.unk_token_id),i={ids:o,tokens:r,attention_mask:new Array(o.length).fill(1)};return n&&a&&(i.token_type_ids=a),i}decode(e,t={}){if(!Array.isArray(e)||e.length===0||!au(e[0]))throw Error("token_ids must be a non-empty array of integers.");let s=e.map(r=>this.model.vocab[Number(r)]??this.model.unk_token);t.skip_special_tokens&&(s=s.filter(r=>!this.special_tokens.includes(r)));let n=this.decoder?this.decoder(s):s.join(" ");return this.decoder&&this.decoder.end_of_word_suffix&&(n=n.replaceAll(this.decoder.end_of_word_suffix," "),t.skip_special_tokens&&(n=n.trim())),(t.clean_up_tokenization_spaces??this.clean_up_tokenization_spaces)&&(n=Kn(n)),n}tokenize(e,{text_pair:t=null,add_special_tokens:s=!1}={}){return this.tokenize_helper(e,{text_pair:t,add_special_tokens:s}).tokens}encode_text(e){if(e===null)return null;const t=this.splitter_unnormalized.split(e);return t.forEach((s,n)=>{const r=this.added_tokens_map.get(s);r&&(r.lstrip&&n>0&&(t[n-1]=t[n-1].trimEnd()),r.rstrip&&n<t.length-1&&(t[n+1]=t[n+1].trimStart()))}),t.flatMap((s,n)=>{if(s.length===0)return[];if(this.added_tokens_map.has(s))return[s];if(this.remove_space===!0&&(s=s.trim().split(/\s+/).join(" ")),this.do_lowercase_and_remove_accent&&(s=iu(s)),this.normalizer!==null&&(s=this.normalizer(s)),s.length===0)return[];const r=this.splitter_normalized.split(s);return r.forEach((a,o)=>{const i=this.added_tokens_map.get(a);i&&(i.lstrip&&o>0&&(r[o-1]=r[o-1].trimEnd()),i.rstrip&&o<r.length-1&&(r[o+1]=r[o+1].trimStart()))}),r.flatMap(a=>{if(a.length===0)return[];if(this.added_tokens_map.has(a))return[a];const o=this.pre_tokenizer!==null?this.pre_tokenizer(a,{section_index:n}):[a];return this.model(o)})})}tokenize_helper(e,{text_pair:t=null,add_special_tokens:s=!0}){const n=this.encode_text(e),r=this.encode_text(t||null);return this.post_processor?this.post_processor(n,r,s):{tokens:Re(n??[],r??[])}}token_to_id(e){return this.model.tokens_to_ids.get(e)}id_to_token(e){return this.model.vocab[e]}get_added_tokens_decoder(){const e=new Map;for(const t of this.added_tokens)e.set(t.id,t);return e}get_vocab(e=!0){const t=new Map;for(let s=0;s<this.model.vocab.length;++s){const n=this.model.vocab[s];(e||!this.added_tokens_map.has(n))&&t.set(n,s)}return t}},oh=ah,b=Object.freeze({Text:"Text",NumericLiteral:"NumericLiteral",StringLiteral:"StringLiteral",Identifier:"Identifier",Equals:"Equals",OpenParen:"OpenParen",CloseParen:"CloseParen",OpenStatement:"OpenStatement",CloseStatement:"CloseStatement",OpenExpression:"OpenExpression",CloseExpression:"CloseExpression",OpenSquareBracket:"OpenSquareBracket",CloseSquareBracket:"CloseSquareBracket",OpenCurlyBracket:"OpenCurlyBracket",CloseCurlyBracket:"CloseCurlyBracket",Comma:"Comma",Dot:"Dot",Colon:"Colon",Pipe:"Pipe",CallOperator:"CallOperator",AdditiveBinaryOperator:"AdditiveBinaryOperator",MultiplicativeBinaryOperator:"MultiplicativeBinaryOperator",ComparisonBinaryOperator:"ComparisonBinaryOperator",UnaryOperator:"UnaryOperator",Comment:"Comment"}),Oe=class{constructor(e,t){this.value=e,this.type=t}};function Mo(e){return/\w/.test(e)}function us(e){return/[0-9]/.test(e)}function xo(e){return/\s/.test(e)}var ih=[["{%",b.OpenStatement],["%}",b.CloseStatement],["{{",b.OpenExpression],["}}",b.CloseExpression],["(",b.OpenParen],[")",b.CloseParen],["{",b.OpenCurlyBracket],["}",b.CloseCurlyBracket],["[",b.OpenSquareBracket],["]",b.CloseSquareBracket],[",",b.Comma],[".",b.Dot],[":",b.Colon],["|",b.Pipe],["<=",b.ComparisonBinaryOperator],[">=",b.ComparisonBinaryOperator],["==",b.ComparisonBinaryOperator],["!=",b.ComparisonBinaryOperator],["<",b.ComparisonBinaryOperator],[">",b.ComparisonBinaryOperator],["+",b.AdditiveBinaryOperator],["-",b.AdditiveBinaryOperator],["~",b.AdditiveBinaryOperator],["*",b.MultiplicativeBinaryOperator],["/",b.MultiplicativeBinaryOperator],["%",b.MultiplicativeBinaryOperator],["=",b.Equals]],lh=new Map([["n",`
3
  `],["t"," "],["r","\r"],["b","\b"],["f","\f"],["v","\v"],["'","'"],['"','"'],["\\","\\"]]);function ch(e,t={}){return e.endsWith(`
4
  `)&&(e=e.slice(0,-1)),t.lstrip_blocks&&(e=e.replace(/^[ \t]*({[#%-])/gm,"$1")),t.trim_blocks&&(e=e.replace(/([#%-]})\n/g,"$1")),e.replace(/{%\s*(end)?generation\s*%}/gs,"")}function dh(e,t={}){const s=[],n=ch(e,t);let r=0,a=0;const o=c=>{let d="";for(;c(n[r]);){if(n[r]==="\\"){if(++r,r>=n.length)throw new SyntaxError("Unexpected end of input");const u=n[r++],_=lh.get(u);if(_===void 0)throw new SyntaxError(`Unexpected escaped character: ${u}`);d+=_;continue}if(d+=n[r++],r>=n.length)throw new SyntaxError("Unexpected end of input")}return d},i=()=>{const c=s.at(-1);c&&c.type===b.Text&&(c.value=c.value.trimEnd(),c.value===""&&s.pop())},l=()=>{for(;r<n.length&&xo(n[r]);)++r};e:for(;r<n.length;){const c=s.at(-1)?.type;if(c===void 0||c===b.CloseStatement||c===b.CloseExpression||c===b.Comment){let u="";for(;r<n.length&&!(n[r]==="{"&&(n[r+1]==="%"||n[r+1]==="{"||n[r+1]==="#"));)u+=n[r++];if(u.length>0){s.push(new Oe(u,b.Text));continue}}if(n[r]==="{"&&n[r+1]==="#"){r+=2;const u=n[r]==="-";u&&++r;let _="";for(;n[r]!=="#"||n[r+1]!=="}";){if(r+2>=n.length)throw new SyntaxError("Missing end of comment tag");_+=n[r++]}const h=_.endsWith("-");h&&(_=_.slice(0,-1)),u&&i(),s.push(new Oe(_,b.Comment)),r+=2,h&&l();continue}if(n.slice(r,r+3)==="{%-"){i(),s.push(new Oe("{%",b.OpenStatement)),r+=3;continue}if(n.slice(r,r+3)==="{{-"){i(),s.push(new Oe("{{",b.OpenExpression)),a=0,r+=3;continue}if(o(xo),n.slice(r,r+3)==="-%}"){s.push(new Oe("%}",b.CloseStatement)),r+=3,l();continue}if(n.slice(r,r+3)==="-}}"){s.push(new Oe("}}",b.CloseExpression)),r+=3,l();continue}const d=n[r];if(d==="-"||d==="+"){const u=s.at(-1)?.type;if(u===b.Text||u===void 0)throw new SyntaxError(`Unexpected character: ${d}`);switch(u){case b.Identifier:case b.NumericLiteral:case b.StringLiteral:case b.CloseParen:case b.CloseSquareBracket:break;default:{++r;const _=o(us);s.push(new Oe(`${d}${_}`,_.length>0?b.NumericLiteral:b.UnaryOperator));continue}}}for(const[u,_]of ih){if(u==="}}"&&a>0)continue;if(n.slice(r,r+u.length)===u){s.push(new Oe(u,_)),_===b.OpenExpression?a=0:_===b.OpenCurlyBracket?++a:_===b.CloseCurlyBracket&&--a,r+=u.length;continue e}}if(d==="'"||d==='"'){++r;const u=o(_=>_!==d);s.push(new Oe(u,b.StringLiteral)),++r;continue}if(us(d)){let u=o(us);if(n[r]==="."&&us(n[r+1])){++r;const _=o(us);u=`${u}.${_}`}s.push(new Oe(u,b.NumericLiteral));continue}if(Mo(d)){const u=o(Mo);s.push(new Oe(u,b.Identifier));continue}throw new SyntaxError(`Unexpected character: ${d}`)}return s}var We=class{type="Statement"},uh=class extends We{constructor(e){super(),this.body=e}type="Program"},_h=class extends We{constructor(e,t,s){super(),this.test=e,this.body=t,this.alternate=s}type="If"},hh=class extends We{constructor(e,t,s,n){super(),this.loopvar=e,this.iterable=t,this.body=s,this.defaultBlock=n}type="For"},ph=class extends We{type="Break"},fh=class extends We{type="Continue"},mh=class extends We{constructor(e,t,s){super(),this.assignee=e,this.value=t,this.body=s}type="Set"},gh=class extends We{constructor(e,t,s){super(),this.name=e,this.args=t,this.body=s}type="Macro"},wh=class extends We{constructor(e){super(),this.value=e}type="Comment"},Ie=class extends We{type="Expression"},vh=class extends Ie{constructor(e,t,s){super(),this.object=e,this.property=t,this.computed=s}type="MemberExpression"},yo=class extends Ie{constructor(e,t){super(),this.callee=e,this.args=t}type="CallExpression"},Lt=class extends Ie{constructor(e){super(),this.value=e}type="Identifier"},Ht=class extends Ie{constructor(e){super(),this.value=e}type="Literal"},Mh=class extends Ht{type="IntegerLiteral"},xh=class extends Ht{type="FloatLiteral"},bo=class extends Ht{type="StringLiteral"},yh=class extends Ht{type="ArrayLiteral"},ko=class extends Ht{type="TupleLiteral"},bh=class extends Ht{type="ObjectLiteral"},_s=class extends Ie{constructor(e,t,s){super(),this.operator=e,this.left=t,this.right=s}type="BinaryExpression"},kh=class extends Ie{constructor(e,t){super(),this.operand=e,this.filter=t}type="FilterExpression"},Th=class extends We{constructor(e,t){super(),this.filter=e,this.body=t}type="FilterStatement"},Eh=class extends Ie{constructor(e,t){super(),this.lhs=e,this.test=t}type="SelectExpression"},Ph=class extends Ie{constructor(e,t,s){super(),this.operand=e,this.negate=t,this.test=s}type="TestExpression"},Sh=class extends Ie{constructor(e,t){super(),this.operator=e,this.argument=t}type="UnaryExpression"},Ch=class extends Ie{constructor(e=void 0,t=void 0,s=void 0){super(),this.start=e,this.stop=t,this.step=s}type="SliceExpression"},Fh=class extends Ie{constructor(e,t){super(),this.key=e,this.value=t}type="KeywordArgumentExpression"},Ah=class extends Ie{constructor(e){super(),this.argument=e}type="SpreadExpression"},Lh=class extends We{constructor(e,t,s){super(),this.call=e,this.callerArgs=t,this.body=s}type="CallStatement"},Ih=class extends Ie{constructor(e,t,s){super(),this.condition=e,this.trueExpr=t,this.falseExpr=s}type="Ternary"};function Oh(e){const t=new uh([]);let s=0;function n(x,y){const E=e[s++];if(!E||E.type!==x)throw new Error(`Parser Error: ${y}. ${E.type} !== ${x}.`);return E}function r(x){if(!l(x))throw new SyntaxError(`Expected ${x}`);++s}function a(){switch(e[s].type){case b.Comment:return new wh(e[s++].value);case b.Text:return c();case b.OpenStatement:return d();case b.OpenExpression:return u();default:throw new SyntaxError(`Unexpected token type: ${e[s].type}`)}}function o(...x){return s+x.length<=e.length&&x.every((y,E)=>y===e[s+E].type)}function i(...x){return e[s]?.type===b.OpenStatement&&e[s+1]?.type===b.Identifier&&x.includes(e[s+1]?.value)}function l(...x){return s+x.length<=e.length&&x.every((y,E)=>e[s+E].type==="Identifier"&&y===e[s+E].value)}function c(){return new bo(n(b.Text,"Expected text token").value)}function d(){if(n(b.OpenStatement,"Expected opening statement token"),e[s].type!==b.Identifier)throw new SyntaxError(`Unknown statement, got ${e[s].type}`);const x=e[s].value;let y;switch(x){case"set":++s,y=_();break;case"if":++s,y=h(),n(b.OpenStatement,"Expected {% token"),r("endif"),n(b.CloseStatement,"Expected %} token");break;case"macro":++s,y=p(),n(b.OpenStatement,"Expected {% token"),r("endmacro"),n(b.CloseStatement,"Expected %} token");break;case"for":++s,y=m(),n(b.OpenStatement,"Expected {% token"),r("endfor"),n(b.CloseStatement,"Expected %} token");break;case"call":{++s;let E=null;o(b.OpenParen)&&(E=B());const U=O();if(U.type!=="Identifier")throw new SyntaxError("Expected identifier following call statement");const Q=B();n(b.CloseStatement,"Expected closing statement token");const fe=[];for(;!i("endcall");)fe.push(a());n(b.OpenStatement,"Expected '{%'"),r("endcall"),n(b.CloseStatement,"Expected closing statement token");const oe=new yo(U,Q);y=new Lh(oe,E,fe);break}case"break":++s,n(b.CloseStatement,"Expected closing statement token"),y=new ph;break;case"continue":++s,n(b.CloseStatement,"Expected closing statement token"),y=new fh;break;case"filter":{++s;let E=O();E instanceof Lt&&o(b.OpenParen)&&(E=I(E)),n(b.CloseStatement,"Expected closing statement token");const U=[];for(;!i("endfilter");)U.push(a());n(b.OpenStatement,"Expected '{%'"),r("endfilter"),n(b.CloseStatement,"Expected '%}'"),y=new Th(E,U);break}default:throw new SyntaxError(`Unknown statement type: ${x}`)}return y}function u(){n(b.OpenExpression,"Expected opening expression token");const x=g();return n(b.CloseExpression,"Expected closing expression token"),x}function _(){const x=f();let y=null;const E=[];if(o(b.Equals))++s,y=f();else{for(n(b.CloseStatement,"Expected %} token");!i("endset");)E.push(a());n(b.OpenStatement,"Expected {% token"),r("endset")}return n(b.CloseStatement,"Expected closing statement token"),new mh(x,y,E)}function h(){const x=g();n(b.CloseStatement,"Expected closing statement token");const y=[],E=[];for(;!i("elif","else","endif");)y.push(a());if(i("elif")){++s,++s;const U=h();E.push(U)}else if(i("else"))for(++s,++s,n(b.CloseStatement,"Expected closing statement token");!i("endif");)E.push(a());return new _h(x,y,E)}function p(){const x=O();if(x.type!=="Identifier")throw new SyntaxError("Expected identifier following macro statement");const y=B();n(b.CloseStatement,"Expected closing statement token");const E=[];for(;!i("endmacro");)E.push(a());return new gh(x,y,E)}function f(x=!1){const y=x?O:g,E=[y()],U=o(b.Comma);for(;U&&(++s,E.push(y()),!!o(b.Comma)););return U?new ko(E):E[0]}function m(){const x=f(!0);if(!(x instanceof Lt||x instanceof ko))throw new SyntaxError(`Expected identifier/tuple for the loop variable, got ${x.type} instead`);if(!l("in"))throw new SyntaxError("Expected `in` keyword following loop variable");++s;const y=g();n(b.CloseStatement,"Expected closing statement token");const E=[];for(;!i("endfor","else");)E.push(a());const U=[];if(i("else"))for(++s,++s,n(b.CloseStatement,"Expected closing statement token");!i("endfor");)U.push(a());return new hh(x,y,E,U)}function g(){return v()}function v(){const x=M();if(l("if")){++s;const y=M();if(l("else")){++s;const E=v();return new Ih(y,x,E)}else return new Eh(x,y)}return x}function M(){let x=k();for(;l("or");){const y=e[s];++s;const E=k();x=new _s(y,x,E)}return x}function k(){let x=T();for(;l("and");){const y=e[s];++s;const E=T();x=new _s(y,x,E)}return x}function T(){let x;for(;l("not");){const y=e[s];++s;const E=T();x=new Sh(y,E)}return x??S()}function S(){let x=A();for(;;){let y;if(l("not","in"))y=new Oe("not in",b.Identifier),s+=2;else if(l("in"))y=e[s++];else if(o(b.ComparisonBinaryOperator))y=e[s++];else break;const E=A();x=new _s(y,x,E)}return x}function A(){let x=G();for(;o(b.AdditiveBinaryOperator);){const y=e[s];++s;const E=G();x=new _s(y,x,E)}return x}function N(){const x=z(O());return o(b.OpenParen)?I(x):x}function I(x){let y=new yo(x,B());return y=z(y),o(b.OpenParen)&&(y=I(y)),y}function B(){n(b.OpenParen,"Expected opening parenthesis for arguments list");const x=V();return n(b.CloseParen,"Expected closing parenthesis for arguments list"),x}function V(){const x=[];for(;!o(b.CloseParen);){let y;if(e[s].type===b.MultiplicativeBinaryOperator&&e[s].value==="*"){++s;const E=g();y=new Ah(E)}else if(y=g(),o(b.Equals)){if(++s,!(y instanceof Lt))throw new SyntaxError("Expected identifier for keyword argument");const E=g();y=new Fh(y,E)}x.push(y),o(b.Comma)&&++s}return x}function D(){const x=[];let y=!1;for(;!o(b.CloseSquareBracket);)o(b.Colon)?(x.push(void 0),++s,y=!0):(x.push(g()),o(b.Colon)&&(++s,y=!0));if(x.length===0)throw new SyntaxError("Expected at least one argument for member/slice expression");if(y){if(x.length>3)throw new SyntaxError("Expected 0-3 arguments for slice expression");return new Ch(...x)}return x[0]}function z(x){for(;o(b.Dot)||o(b.OpenSquareBracket);){const y=e[s];++s;let E;const U=y.type===b.OpenSquareBracket;if(U)E=D(),n(b.CloseSquareBracket,"Expected closing square bracket");else if(E=O(),E.type!=="Identifier")throw new SyntaxError("Expected identifier following dot operator");x=new vh(x,E,U)}return x}function G(){let x=q();for(;o(b.MultiplicativeBinaryOperator);){const y=e[s++],E=q();x=new _s(y,x,E)}return x}function q(){let x=se();for(;l("is");){++s;const y=l("not");y&&++s;const E=O();if(!(E instanceof Lt))throw new SyntaxError("Expected identifier for the test");x=new Ph(x,y,E)}return x}function se(){let x=N();for(;o(b.Pipe);){++s;let y=O();if(!(y instanceof Lt))throw new SyntaxError("Expected identifier for the filter");o(b.OpenParen)&&(y=I(y)),x=new kh(x,y)}return x}function O(){const x=e[s++];switch(x.type){case b.NumericLiteral:{const y=x.value;return y.includes(".")?new xh(Number(y)):new Mh(Number(y))}case b.StringLiteral:{let y=x.value;for(;o(b.StringLiteral);)y+=e[s++].value;return new bo(y)}case b.Identifier:return new Lt(x.value);case b.OpenParen:{const y=f();return n(b.CloseParen,"Expected closing parenthesis, got ${tokens[current].type} instead."),y}case b.OpenSquareBracket:{const y=[];for(;!o(b.CloseSquareBracket);)y.push(g()),o(b.Comma)&&++s;return++s,new yh(y)}case b.OpenCurlyBracket:{const y=new Map;for(;!o(b.CloseCurlyBracket);){const E=g();n(b.Colon,"Expected colon between key and value in object literal");const U=g();y.set(E,U),o(b.Comma)&&++s}return++s,new bh(y)}default:throw new SyntaxError(`Unexpected token: ${x.type}`)}}for(;s<e.length;)t.body.push(a());return t}function Nh(e,t,s=1){if(t===void 0&&(t=e,e=0),s===0)throw new Error("range() step must not be zero");const n=[];if(s>0)for(let r=e;r<t;r+=s)n.push(r);else for(let r=e;r>t;r+=s)n.push(r);return n}function To(e,t,s,n=1){const r=Math.sign(n);r>=0?(t=(t??=0)<0?Math.max(e.length+t,0):Math.min(t,e.length),s=(s??=e.length)<0?Math.max(e.length+s,0):Math.min(s,e.length)):(t=(t??=e.length-1)<0?Math.max(e.length+t,-1):Math.min(t,e.length-1),s=(s??=-1)<-1?Math.max(e.length+s,-1):Math.min(s,e.length-1));const a=[];for(let o=t;r*o<r*s;o+=n)a.push(e[o]);return a}function zh(e){return e.replace(/\b\w/g,t=>t.toUpperCase())}function Dh(e){return Bh(new Date,e)}function Bh(e,t){const s=new Intl.DateTimeFormat(void 0,{month:"long"}),n=new Intl.DateTimeFormat(void 0,{month:"short"}),r=a=>a<10?"0"+a:a.toString();return t.replace(/%[YmdbBHM%]/g,a=>{switch(a){case"%Y":return e.getFullYear().toString();case"%m":return r(e.getMonth()+1);case"%d":return r(e.getDate());case"%b":return n.format(e);case"%B":return s.format(e);case"%H":return r(e.getHours());case"%M":return r(e.getMinutes());case"%%":return"%";default:return a}})}function Vh(e){return e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&")}function Gh(e,t,s,n){if(n===0)return e;let r=n==null||n<0?1/0:n;const a=t.length===0?new RegExp("(?=)","gu"):new RegExp(Vh(t),"gu");return e.replaceAll(a,o=>r>0?(--r,s):o)}var Eo=class extends Error{},Po=class extends Error{},tt=class{type="RuntimeValue";value;builtins=new Map;constructor(e=void 0){this.value=e}__bool__(){return new $(!!this.value)}toString(){return String(this.value)}},W=class extends tt{type="IntegerValue"},pe=class extends tt{type="FloatValue";toString(){return this.value%1===0?this.value.toFixed(1):this.value.toString()}},L=class extends tt{type="StringValue";builtins=new Map([["upper",new le(()=>new L(this.value.toUpperCase()))],["lower",new le(()=>new L(this.value.toLowerCase()))],["strip",new le(()=>new L(this.value.trim()))],["title",new le(()=>new L(zh(this.value)))],["capitalize",new le(()=>new L(this.value.charAt(0).toUpperCase()+this.value.slice(1)))],["length",new W(this.value.length)],["rstrip",new le(()=>new L(this.value.trimEnd()))],["lstrip",new le(()=>new L(this.value.trimStart()))],["startswith",new le(e=>{if(e.length===0)throw new Error("startswith() requires at least one argument");const t=e[0];if(t instanceof L)return new $(this.value.startsWith(t.value));if(t instanceof Y){for(const s of t.value){if(!(s instanceof L))throw new Error("startswith() tuple elements must be strings");if(this.value.startsWith(s.value))return new $(!0)}return new $(!1)}throw new Error("startswith() argument must be a string or tuple of strings")})],["endswith",new le(e=>{if(e.length===0)throw new Error("endswith() requires at least one argument");const t=e[0];if(t instanceof L)return new $(this.value.endsWith(t.value));if(t instanceof Y){for(const s of t.value){if(!(s instanceof L))throw new Error("endswith() tuple elements must be strings");if(this.value.endsWith(s.value))return new $(!0)}return new $(!1)}throw new Error("endswith() argument must be a string or tuple of strings")})],["split",new le(e=>{const t=e[0]??new de;if(!(t instanceof L||t instanceof de))throw new Error("sep argument must be a string or null");const s=e[1]??new W(-1);if(!(s instanceof W))throw new Error("maxsplit argument must be a number");let n=[];if(t instanceof de){const r=this.value.trimStart();for(const{0:a,index:o}of r.matchAll(/\S+/g)){if(s.value!==-1&&n.length>=s.value&&o!==void 0){n.push(a+r.slice(o+a.length));break}n.push(a)}}else{if(t.value==="")throw new Error("empty separator");n=this.value.split(t.value),s.value!==-1&&n.length>s.value&&n.push(n.splice(s.value).join(t.value))}return new Y(n.map(r=>new L(r)))})],["replace",new le(e=>{if(e.length<2)throw new Error("replace() requires at least two arguments");const t=e[0],s=e[1];if(!(t instanceof L&&s instanceof L))throw new Error("replace() arguments must be strings");let n;if(e.length>2?e[2].type==="KeywordArgumentsValue"?n=e[2].value.get("count")??new de:n=e[2]:n=new de,!(n instanceof W||n instanceof de))throw new Error("replace() count argument must be a number or null");return new L(Gh(this.value,t.value,s.value,n.value))})]])},$=class extends tt{type="BooleanValue"},$h=/[\x7f-\uffff]/g;function So(e){return e.replace($h,t=>"\\u"+t.charCodeAt(0).toString(16).padStart(4,"0"))}function Et(e,t={},s=0,n=!0){const{indent:r=null,ensureAscii:a=!1,separators:o=null,sortKeys:i=!1}=t;let l,c;switch(o?[l,c]=o:r?(l=",",c=": "):(l=", ",c=": "),e.type){case"NullValue":return"null";case"UndefinedValue":return n?"null":"undefined";case"IntegerValue":case"FloatValue":case"BooleanValue":return JSON.stringify(e.value);case"StringValue":{let d=JSON.stringify(e.value);return a&&(d=So(d)),d}case"ArrayValue":case"ObjectValue":{const d=r?" ".repeat(r):"",u=`
assets/qwen35-model-BwnUri7A.js DELETED
@@ -1 +0,0 @@
1
- import{S as H,a as W,_ as A}from"./gpu-ops-DKsrMEcC.js";function K(E){const a=E<<16,e=new ArrayBuffer(4);new Uint32Array(e)[0]=a,new Float32Array(e)[0];const t=a>>>31&1,s=a>>>23&255,i=a&8388607;if(s===0)return t<<15;if(s===255)return t<<15|31744|(i?512:0);const n=s-127+15;return n>=31?t<<15|31744:n<=0?t<<15:t<<15|n<<10|i>>>13}class N{constructor(a,e,t){if(this.gpu=a,this.config=e,this.textCfg=e.text_config,this.quantConfig=t,this.hiddenSize=this.textCfg.hidden_size,this.intermediateSize=this.textCfg.intermediate_size,this.numLayers=this.textCfg.num_hidden_layers,this.numHeads=this.textCfg.num_attention_heads,this.numKVHeads=this.textCfg.num_key_value_heads,this.headDim=this.textCfg.head_dim,this.vocabSize=this.textCfg.vocab_size,this.rmsEps=this.textCfg.rms_norm_eps,this.ropeTheta=this.textCfg.rope_parameters?.rope_theta||1e7,this.partialRotary=this.textCfg.partial_rotary_factor,this.partialDim=Math.floor(this.headDim*this.partialRotary),this.mropeSection=this.textCfg.rope_parameters?.mrope_section||[11,11,10],this.layerTypes=this.textCfg.layer_types,this.groupSize=t?.group_size||t?.config_groups?.group_0?.weights?.group_size||128,this.fp16Layers=new Set,t?.extra_config)for(const u of Object.keys(t.extra_config))this.fp16Layers.add(u);this.weights={},this.kvCache={},this.seqLen=0,this.pipelines={},this.singlePassMode=!0,this._replayFlat=null,this._forceMinSplits=0,this._ropeDelta=0;const s=this.numHeads*this.headDim,i=this.numKVHeads*this.headDim,n=(s+i)/2,r=Math.ceil(n/4);this._splitQKNormShaderKey=`fused_split_qknorm_kvstore_${r}`,H[this._splitQKNormShaderKey]||(H[this._splitQKNormShaderKey]=W(r))}compilePipelines(){const a=this.gpu;for(const[e,t]of Object.entries(H))this.pipelines[e]=a.getOrCreatePipeline(e,t)}uploadTensors(a){this.quantConfig?.quant_method==="compressed-tensors"&&(a=this._convertCompressedTensors(a));for(const[t,s]of Object.entries(a)){let i=s.data;if(s.dtype==="F32"&&!t.endsWith(".qweight")&&!t.endsWith(".qzeros")&&!t.endsWith(".scales")){const n=new Float32Array(i.buffer,i.byteOffset,i.byteLength/4),r=new Uint16Array(n.length);for(let u=0;u<n.length;u++){const o=new Uint32Array(n.buffer,n.byteOffset+u*4,1)[0];r[u]=o>>>16}i=new Uint8Array(r.buffer)}if(s._partial){let{offset:n,totalSize:r}=s._partial;if(s.dtype==="F32"&&!t.endsWith(".qweight")&&!t.endsWith(".qzeros")&&!t.endsWith(".scales")&&(n/=2,r/=2),n===0){const o=this.gpu.createBuffer(t,r,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC);this.gpu.device.queue.writeBuffer(o,0,i),this.weights[t]=o}else{const o=this.weights[t];o&&this.gpu.device.queue.writeBuffer(o,n,i)}}else this.weights[t]=this.gpu.createBufferFromData(t,i);(t.includes("post_attention_layernorm.weight")||t.includes("input_layernorm.weight")||t.includes("q_norm.weight")||t.includes("k_norm.weight")||t==="model.language_model.norm.weight")&&(this._normWeightRaw||(this._normWeightRaw={}),this._normWeightRaw[t]=new Uint32Array(i.buffer.slice(i.byteOffset,i.byteOffset+i.byteLength))),this._uploadedCount=(this._uploadedCount||0)+1}}_convertCompressedTensors(a){const e={};for(const[t,s]of Object.entries(a))if(!t.endsWith(".weight_shape")){if(t.endsWith(".weight_packed")){const i=t.slice(0,-14),n=s.shape,r=n[0],u=n[1],o=new Int32Array(s.data.buffer,s.data.byteOffset,s.data.byteLength/4),h=new Int32Array(r*u);for(let m=0;m<r;m++)for(let c=0;c<u;c++)h[c*r+m]=o[m*u+c];e[`${i}.qweight`]={dtype:"I32",shape:[u,r],data:new Uint8Array(h.buffer)};continue}if(t.endsWith(".weight_scale")){const i=t.slice(0,-13),n=s.shape,r=n[0],u=n[1],o=new Uint16Array(s.data.buffer,s.data.byteOffset,s.data.byteLength/2),h=Math.ceil(r/2),m=new Uint32Array(u*h);for(let c=0;c<u;c++)for(let f=0;f<r;f+=2){const p=o[f*u+c],w=f+1<r?o[(f+1)*u+c]:0,B=K(p),q=K(w);m[c*h+(f>>1)]=B|q<<16}e[`${i}.scales`]={dtype:"I32",shape:[u,h],data:new Uint8Array(m.buffer)};continue}e[t]=s}return e}async postProcessWeights(){const a=this.textCfg.linear_num_key_heads||0,e=this.hiddenSize,t=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,s=this.layerTypes.indexOf("linear_attention"),i=s>=0?`model.language_model.layers.${s}.linear_attn`:"";if(this.abQuantized=i&&!!this.weights[`${i}.in_proj_a.qweight`],this.abQuantized){const g=this.textCfg.linear_num_value_heads??a;this.linAlpha=this.gpu.createBuffer("lin_alpha",g*4,t),this.linBeta=this.gpu.createBuffer("lin_beta",g*4,t)}else{this.linABWeight={};const g=this.textCfg.linear_num_value_heads??a,P=e*2,S=g*P,b=2*S;for(let v=0;v<this.numLayers;v++)if(this.layerTypes[v]==="linear_attention"){const C=`model.language_model.layers.${v}.linear_attn`,D=this.weights[`${C}.in_proj_a.weight`],k=this.weights[`${C}.in_proj_b.weight`];if(D&&k){const z=this.gpu.createBuffer(`ab_merged_${v}`,b,t),M=this.gpu.device.createCommandEncoder();M.copyBufferToBuffer(D,0,z,0,S),M.copyBufferToBuffer(k,0,z,S,S),this.gpu.device.queue.submit([M.finish()]),this.linABWeight[v]=z}}}{const g=[];for(let b=0;b<this.numLayers;b++){if(this.layerTypes[b]==="linear_attention"){const v=`model.language_model.layers.${b}.linear_attn`,C=this.textCfg.linear_num_key_heads||0,D=this.textCfg.linear_key_head_dim||128,k=this.textCfg.linear_value_head_dim||128,z=this.textCfg.linear_num_value_heads??C,M=z*k,$=z/C*k,O=C*(D+D+$);g.push({prefix:`${v}.in_proj_qkv`,K:e,N:O}),g.push({prefix:`${v}.in_proj_z`,K:e,N:M}),g.push({prefix:`${v}.out_proj`,K:M,N:e})}else{const v=`model.language_model.layers.${b}.self_attn`,C=this.numHeads*this.headDim*2,D=this.numKVHeads*this.headDim;g.push({prefix:`${v}.q_proj`,K:e,N:C}),g.push({prefix:`${v}.k_proj`,K:e,N:D}),g.push({prefix:`${v}.v_proj`,K:e,N:D}),g.push({prefix:`${v}.o_proj`,K:this.numHeads*this.headDim,N:e})}g.push({prefix:`model.language_model.layers.${b}.mlp.gate_proj`,K:e,N:this.intermediateSize}),g.push({prefix:`model.language_model.layers.${b}.mlp.up_proj`,K:e,N:this.intermediateSize}),g.push({prefix:`model.language_model.layers.${b}.mlp.down_proj`,K:this.intermediateSize,N:e})}let P=0;const S=performance.now();for(const{prefix:b,K:v,N:C}of g)if(!this.weights[`${b}.qweight`]&&this.weights[`${b}.weight`]){const{qweight:D,scales:k}=await this._quantizeBF16ToINT4(this.weights[`${b}.weight`],v,C,this.groupSize,b.replace(/\./g,"_"));this.weights[`${b}.qweight`]=D,this.weights[`${b}.scales`]=k,P++}P>0&&console.log(`[QUANT] GPU-quantized ${P} BF16 projections to INT4 in ${(performance.now()-S).toFixed(0)}ms`)}this._mergedGateUp={};const n=this.intermediateSize,r=e/8,u=r*n*4,h=r/(this.groupSize/8)*n*2;for(let g=0;g<this.numLayers;g++){const P=`model.language_model.layers.${g}.mlp`,S=this.getQWeight(`${P}.gate_proj`),b=this.getQWeight(`${P}.up_proj`);if(S.qweight&&b.qweight){const v=this.gpu.createBuffer(`merged_qw_${g}`,u*2,t),C=this.gpu.createBuffer(`merged_sc_${g}`,h*2,t),D=this.gpu.device.createCommandEncoder();D.copyBufferToBuffer(S.qweight,0,v,0,u),D.copyBufferToBuffer(b.qweight,0,v,u,u),D.copyBufferToBuffer(S.scales,0,C,0,h),D.copyBufferToBuffer(b.scales,0,C,h,h),this.gpu.device.queue.submit([D.finish()]),this._mergedGateUp[g]={qweight:v,scales:C}}}this._fusedMLPParams={};const m=16+512*16;for(let g=0;g<this.numLayers;g++){const P=`model.language_model.layers.${g}.post_attention_layernorm.weight`,S=this._normWeightRaw?.[P];if(!S||!this._mergedGateUp[g])continue;const b=new ArrayBuffer(m),v=new Uint32Array(b),C=new Float32Array(b);v[0]=e,v[1]=n,v[2]=this.groupSize,C[3]=this.rmsEps;for(let D=0;D<S.length;D++)v[4+D]=S[D];this._fusedMLPParams[g]=this.gpu.createBufferFromData(`fused_mlp_params_${g}`,new Uint32Array(b),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}this._fusedSQKParams={};const c=this.headDim,f=this.numHeads,p=this.numKVHeads,w=f*c,B=p*c,q=(w+B)/2,_=Math.ceil(q/4),l=32+_*16,d=this.mropeSection[1]*3,y=this.mropeSection[2]*3,U=`fused_split_qknorm_kvstore_${_}`;H[U]||(H[U]=W(_,this.ropeTheta,d,y,this.partialDim)),this.pipelines[U]||(this.pipelines[U]=this.gpu.getOrCreatePipeline(U,H[U])),this._splitQKNormShaderKey=U;for(let g=0;g<this.numLayers;g++){if(this.layerTypes[g]!=="full_attention")continue;const P=`model.language_model.layers.${g}.self_attn`,S=`${P}.q_norm.weight`,b=`${P}.k_norm.weight`,v=this._normWeightRaw?.[S],C=this._normWeightRaw?.[b],D=new ArrayBuffer(l),k=new DataView(D);if(k.setUint32(0,f,!0),k.setUint32(4,p,!0),k.setUint32(8,c,!0),k.setFloat32(12,this.rmsEps,!0),k.setUint32(16,0,!0),k.setUint32(20,0,!0),k.setUint32(24,0,!0),k.setUint32(28,0,!0),v)for(let M=0;M<w/2;M++){const $=Math.floor(M/4),O=M%4;k.setUint32(32+$*16+O*4,v[M],!0)}if(C){const M=w/2;for(let $=0;$<B/2;$++){const O=M+$,G=Math.floor(O/4),R=O%4;k.setUint32(32+G*16+R*4,C[$],!0)}}const z=this.gpu.device.createBuffer({size:l,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:`fused_sqk_${g}`});this.gpu.device.queue.writeBuffer(z,0,new Uint8Array(D)),this._fusedSQKParams[g]=z}await this._quantizeLmHead()}async _quantizeLmHead(){const e=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight";if(!this.weights[e])return;const t=this.hiddenSize,s=this.vocabSize,i=this.groupSize,n=t/8,r=t/i,u=performance.now(),o=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,h=this.weights[e];this._lmHeadQWeight=this.gpu.createBuffer("lmhead_qweight",n*s*4,o);const m=this.gpu.createBuffer("lmhead_scales_f32",r*s*4,o),c=Math.ceil(r*s/2)*4;this._lmHeadScales=this.gpu.createBuffer("lmhead_scales",c,o);const f=this.gpu.getOrCreatePipeline("quantize_bf16_to_int4",(await A(async()=>{const{SHADERS:D}=await import("./gpu-ops-DKsrMEcC.js").then(k=>k.b);return{SHADERS:D}},[],import.meta.url)).SHADERS.quantize_bf16_to_int4),p=this.gpu.createBufferFromData("quant_params",new Uint32Array([t,s,i]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),w=this.gpu.createBindGroup(f,0,[h,this._lmHeadQWeight,m,p]),B=65535,q=Math.min(s,B),_=Math.ceil(s/B),l=this.gpu.device.createCommandEncoder(),d=l.beginComputePass();d.setPipeline(f),d.setBindGroup(0,w),d.dispatchWorkgroups(q,_),d.end(),this.gpu.device.queue.submit([l.finish()]);const y=this.gpu.getOrCreatePipeline("pack_f32_to_f16_pairs",(await A(async()=>{const{SHADERS:D}=await import("./gpu-ops-DKsrMEcC.js").then(k=>k.b);return{SHADERS:D}},[],import.meta.url)).SHADERS.pack_f32_to_f16_pairs),U=Math.ceil(r*s/2),g=this.gpu.createBufferFromData("pack_params",new Uint32Array([U]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),P=this.gpu.createBindGroup(y,0,[m,this._lmHeadScales,g]),S=this.gpu.device.createCommandEncoder(),b=S.beginComputePass();b.setPipeline(y),b.setBindGroup(0,P),b.dispatchWorkgroups(Math.ceil(U/256)),b.end(),this.gpu.device.queue.submit([S.finish()]),await this.gpu.device.queue.onSubmittedWorkDone(),m.destroy(),p.destroy(),g.destroy();const v=(n*s*4/1e6).toFixed(0),C=(c/1e6).toFixed(0);console.log(`[QUANT] lm_head INT4 (GPU): ${(performance.now()-u).toFixed(0)}ms, ${v}MB qw + ${C}MB sc`)}async _quantizeBF16ToINT4(a,e,t,s,i){const n=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,r=e/8,u=e/s,o=this.gpu.createBuffer(`${i}_qweight`,r*t*4,n),h=this.gpu.createBuffer(`${i}_scales_f32`,u*t*4,n),m=Math.ceil(u*t/2)*4,c=this.gpu.createBuffer(`${i}_scales`,m,n),f=this.gpu.getOrCreatePipeline("quantize_bf16_to_int4",(await A(async()=>{const{SHADERS:v}=await import("./gpu-ops-DKsrMEcC.js").then(C=>C.b);return{SHADERS:v}},[],import.meta.url)).SHADERS.quantize_bf16_to_int4),p=this.gpu.createBufferFromData(`${i}_qp`,new Uint32Array([e,t,s]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),w=this.gpu.createBindGroup(f,0,[a,o,h,p]),B=65535,q=Math.min(t,B),_=Math.ceil(t/B),l=this.gpu.device.createCommandEncoder(),d=l.beginComputePass();d.setPipeline(f),d.setBindGroup(0,w),d.dispatchWorkgroups(q,_),d.end(),this.gpu.device.queue.submit([l.finish()]);const y=this.gpu.getOrCreatePipeline("pack_f32_to_f16_pairs",(await A(async()=>{const{SHADERS:v}=await import("./gpu-ops-DKsrMEcC.js").then(C=>C.b);return{SHADERS:v}},[],import.meta.url)).SHADERS.pack_f32_to_f16_pairs),U=Math.ceil(u*t/2),g=this.gpu.createBufferFromData(`${i}_pp`,new Uint32Array([U]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),P=this.gpu.createBindGroup(y,0,[h,c,g]),S=this.gpu.device.createCommandEncoder(),b=S.beginComputePass();return b.setPipeline(y),b.setBindGroup(0,P),b.dispatchWorkgroups(Math.ceil(U/256)),b.end(),this.gpu.device.queue.submit([S.finish()]),await this.gpu.device.queue.onSubmittedWorkDone(),h.destroy(),p.destroy(),g.destroy(),{qweight:o,scales:c}}initBuffers(a){a||(a=this.textCfg.max_position_embeddings||4096),this.maxSeqLen=a;const e=this.gpu,t=this.hiddenSize,s=this.intermediateSize,i=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC;this.hidden=e.createBuffer("hidden_a",t*4,i),this.hiddenB=e.createBuffer("hidden_b",t*4,i),this.normed=e.createBuffer("normed",t*4,i),this.normedB=e.createBuffer("normed_b",t*4,i),this.mlpIntermediate=e.createBuffer("mlp_inter",s*4,i),this.mlpOut=e.createBuffer("mlp_out",t*4,i),this.logits=e.createBuffer("logits",this.vocabSize*4,i),this.argmaxResult=e.createBuffer("argmax_result",8,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC),this._argmaxReadback=e.device.createBuffer({size:8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"argmax_readback"}),this._topkResult=e.createBuffer("topk_result",2048,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this._repMaxTokens=64,this._recentTokens=new Uint32Array(this._repMaxTokens),this._recentTokenCount=0,this._gpuRecentTokens=e.createBuffer("recent_tokens",this._repMaxTokens*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST);for(let q=0;q<this.numLayers;q++)this.layerTypes[q]==="full_attention"&&(this.kvCache[q]={keys:e.createBuffer(`kv_k_${q}`,a*this.numKVHeads*this.headDim*4),values:e.createBuffer(`kv_v_${q}`,a*this.numKVHeads*this.headDim*4)});this.qProjFull=e.createBuffer("q_proj_full",this.numHeads*this.headDim*2*4,i),this.qProj=e.createBuffer("q_proj_out",this.numHeads*this.headDim*4,i),this.qGate=e.createBuffer("q_gate",this.numHeads*this.headDim*4,i),this.kProj=e.createBuffer("k_proj_out",this.numKVHeads*this.headDim*4,i),this.vProj=e.createBuffer("v_proj_out",this.numKVHeads*this.headDim*4,i);const n=this.textCfg.linear_num_key_heads,r=this.textCfg.linear_key_head_dim,u=this.textCfg.linear_value_head_dim,o=this.textCfg.linear_num_value_heads??n,m=o/n*u,c=n*(r+r+m),f=o*u;this.linValueDim=f,this.linValueHeads=o,this.linQKV=e.createBuffer("lin_qkv",c*4,i),this.linZ=e.createBuffer("lin_z",f*4,i),this.linOut=e.createBuffer("lin_out",f*4,i);const p=Math.max(t,this.numHeads*this.headDim,f)*4;this.attnOut=e.createBuffer("attn_out",p,i),this._maxGqaSplits=64;const w=this.numHeads*this._maxGqaSplits*(this.headDim+2)*4;this._gqaPartials=e.createBuffer("gqa_partials",w,i),this.linState={},this.linConvHist={};for(let q=0;q<this.numLayers;q++)this.layerTypes[q]==="linear_attention"&&(this.linState[q]=e.createBuffer(`lin_state_${q}`,n*r*m*4,i),this.linConvHist[q]=e.createBuffer(`lin_conv_hist_${q}`,3*c*4,i));this.zeroBuf=e.createBuffer("zero_buf",t*4,i),this.useSplitK=!1,this.splitKSplits=1;const B=Math.max(t,s);this.splitKPartials=e.createBuffer("splitk_partials",B*this.splitKSplits*4,i),this.paramBufs={},this._gqaParamBuf=e.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"gqa_params"}),this._kvStoreParamBuf=e.device.createBuffer({size:16,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"kv_store_params"}),this._gqaData=new Uint8Array(32),this._gqaDv=new DataView(this._gqaData.buffer),this._kvData=new Uint8Array(16),this._kvDv=new DataView(this._kvData.buffer)}initVision(){const a=this.config.vision_config;if(!a){this.vision=null;return}const e=this.gpu,t=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,s=a.hidden_size,i=a.intermediate_size,n=a.out_hidden_size,r=a.depth,u=a.num_heads,o=s/u,h=a.patch_size,m=a.temporal_patch_size,c=a.spatial_merge_size,f=3*m*h*h,p=4096,w=s*c*c;this.vision={V:s,Vi:i,Vo:n,depth:r,heads:u,headDim:o,patchSize:h,temporalPatchSize:m,mergeSize:c,patchInputDim:f,maxVitTokens:p,mergedHidden:w,numPosEmbeddings:a.num_position_embeddings,numGridPerSide:Math.round(Math.sqrt(a.num_position_embeddings)),hidden:e.createBuffer("vit_hidden",p*s*4,t),hiddenB:e.createBuffer("vit_hidden_b",p*s*4,t),normed:e.createBuffer("vit_normed",p*s*4,t),q:e.createBuffer("vit_q",p*s*4,t),k:e.createBuffer("vit_k",p*s*4,t),v:e.createBuffer("vit_v",p*s*4,t),attnOut:e.createBuffer("vit_attn_out",p*s*4,t),mlpInter:e.createBuffer("vit_mlp_inter",p*i*4,t),mlpOut:e.createBuffer("vit_mlp_out",p*s*4,t),cos:e.createBuffer("vit_cos",p*o*4,t),sin:e.createBuffer("vit_sin",p*o*4,t),posEmbed:e.createBuffer("vit_pos_embed",p*s*4,t),merged:e.createBuffer("vit_merged",p/4*n*4,t),mergerNormed:e.createBuffer("vit_merger_normed",p*s*4,t),mergerInter:e.createBuffer("vit_merger_inter",p/4*w*4,t),patchInput:e.createBuffer("vit_patch_input",p*f*4,t),qkv:e.createBuffer("vit_qkv",p*3*s*4,t)},this.imageTokenId=this.config.image_token_id,this.visionStartTokenId=this.config.vision_start_token_id,this.visionEndTokenId=this.config.vision_end_token_id,console.log(`[VISION] Initialized: depth=${r}, hidden=${s}, heads=${u}, headDim=${o}, out=${n}`),console.log(`[VISION] Buffer allocation: ~${((p*s*4*12+p*i*4)/1024/1024).toFixed(0)} MB transient`)}async _readVisionPosEmbed(){const a=this.vision,e=this.weights["model.visual.pos_embed.weight"];if(!e)throw new Error("Vision pos_embed weight not found");const t=a.numPosEmbeddings,s=a.V,i=t*s*2,n=this.gpu.createReadbackBuffer("vit_pos_embed_readback",i),r=this.gpu.device.createCommandEncoder();r.copyBufferToBuffer(e,0,n,0,i),this.gpu.device.queue.submit([r.finish()]),await n.mapAsync(GPUMapMode.READ);const u=new Uint16Array(n.getMappedRange().slice(0));n.unmap(),n.destroy();const o=new Float32Array(t*s);for(let h=0;h<u.length;h++){const m=u[h]<<16,c=new ArrayBuffer(4);new Uint32Array(c)[0]=m,o[h]=new Float32Array(c)[0]}this._vitPosEmbedF32=o,console.log(`[VISION] Read pos_embed: ${t}×${s} (${(i/1024).toFixed(0)} KB)`)}_interpolatePosEmbed(a,e){const t=this.vision,s=t.V,i=t.numGridPerSide,n=this._vitPosEmbedF32,r=t.mergeSize,u=a*e,o=new Float32Array(u*s);for(let p=0;p<a;p++){const w=a===1?0:p*(i-1)/(a-1),B=Math.min(Math.floor(w),i-1),q=Math.min(B+1,i-1),_=w-B;for(let l=0;l<e;l++){const d=e===1?0:l*(i-1)/(e-1),y=Math.min(Math.floor(d),i-1),U=Math.min(y+1,i-1),g=d-y,P=B*i+y,S=B*i+U,b=q*i+y,v=q*i+U,C=(1-_)*(1-g),D=(1-_)*g,k=_*(1-g),z=_*g,M=p*e+l;for(let $=0;$<s;$++)o[M*s+$]=C*n[P*s+$]+D*n[S*s+$]+k*n[b*s+$]+z*n[v*s+$]}}const h=a/r,m=e/r,c=new Float32Array(u*s);let f=0;for(let p=0;p<h;p++)for(let w=0;w<m;w++)for(let B=0;B<r;B++)for(let q=0;q<r;q++){const _=p*r+B,l=w*r+q,d=_*e+l;c.set(o.subarray(d*s,d*s+s),f*s),f++}return c}_computeVisionRoPE(a,e){const t=this.vision,s=t.headDim/2,i=t.mergeSize,n=a/i,r=e/i,u=a*e,o=Math.max(a,e),h=new Float32Array(o*s);for(let p=0;p<o;p++)for(let w=0;w<s;w++){const B=1/Math.pow(1e4,2*w/t.headDim);h[p*s+w]=p*B}const m=new Float32Array(u*t.headDim),c=new Float32Array(u*t.headDim);let f=0;for(let p=0;p<n;p++)for(let w=0;w<r;w++)for(let B=0;B<i;B++)for(let q=0;q<i;q++){const _=p*i+B,l=w*i+q;for(let U=0;U<s;U++){const g=h[_*s+U];h[l*s+U];const P=f*t.headDim;U<s/2&&(m[P+U]=Math.cos(g),c[P+U]=Math.sin(g),m[P+s+U]=Math.cos(g),c[P+s+U]=Math.sin(g))}const d=f*t.headDim,y=s/2;for(let U=0;U<y;U++){const g=h[_*s+U],P=h[l*s+U];m[d+U]=Math.cos(g),m[d+y+U]=Math.cos(P),m[d+s+U]=Math.cos(g),m[d+s+y+U]=Math.cos(P),c[d+U]=Math.sin(g),c[d+y+U]=Math.sin(P),c[d+s+U]=Math.sin(g),c[d+s+y+U]=Math.sin(P)}f++}return{cos:m,sin:c}}async preprocessImage(a){const e=this.vision,t=e.patchSize,s=e.mergeSize,i=t*s,n=256*256,r=1280*1280,u=new Image;await new Promise((v,C)=>{u.onload=v,u.onerror=C,u.src=a});let{width:o,height:h}=u,m=1;o*h>r?m=Math.sqrt(r/(o*h)):o*h<n&&(m=Math.sqrt(n/(o*h)));let c=Math.round(o*m/i)*i,f=Math.round(h*m/i)*i;c=Math.max(i,c),f=Math.max(i,f);const w=new OffscreenCanvas(c,f).getContext("2d");w.drawImage(u,0,0,c,f);const q=w.getImageData(0,0,c,f).data,_=f/t,l=c/t,d=_*l,y=_/s,U=l/s,g=e.temporalPatchSize,P=3*g*t*t,S=new Float32Array(d*P);let b=0;for(let v=0;v<y;v++)for(let C=0;C<U;C++)for(let D=0;D<s;D++)for(let k=0;k<s;k++){const z=v*s+D,M=C*s+k,$=z*t,O=M*t,G=b*P;for(let R=0;R<g;R++)for(let F=0;F<3;F++)for(let T=0;T<t;T++)for(let x=0;x<t;x++){const V=(($+T)*c+(O+x))*4+F,j=q[V]/127.5-1,L=((F*g+R)*t+T)*t+x;S[G+L]=j}b++}return console.log(`[VISION] Preprocessed: ${o}x${h} → ${c}x${f}, ${d} patches (${_}x${l}), merge→${d/4} tokens`),{pixels:S,gridH:_,gridW:l,numPatches:d,imgW:c,imgH:f}}async visionForward(a){const e=this.vision,t=this.gpu,{pixels:s,gridH:i,gridW:n,numPatches:r}=a;this._vitPosEmbedF32||await this._readVisionPosEmbed(),t.device.queue.writeBuffer(e.patchInput,0,s);const u=this._interpolatePosEmbed(i,n);t.device.queue.writeBuffer(e.posEmbed,0,u);const{cos:o,sin:h}=this._computeVisionRoPE(i,n);t.device.queue.writeBuffer(e.cos,0,o),t.device.queue.writeBuffer(e.sin,0,h),t.beginBatch();const m=this.weights["model.visual.patch_embed.proj.weight"],c=this.weights["model.visual.patch_embed.proj.bias"],f=this.makeUniform("vit_patch_params",[e.patchInputDim,e.V]);t.dispatch(this.pipelines.vit_bf16_matvec_bias,[t.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[e.patchInput,m,c,e.hidden,f])],Math.ceil(e.V/32),r);const p=this.makeUniform("vit_add_len",[r*e.V]);t.dispatch(this.pipelines.vit_add,[t.createBindGroup(this.pipelines.vit_add,0,[e.hidden,e.posEmbed,p])],Math.ceil(r*e.V/256));for(let b=0;b<e.depth;b++)t.endBatch(),t.beginBatch(),this._vitBlock(b,r);const w=this.weights["model.visual.merger.norm.weight"],B=this.weights["model.visual.merger.norm.bias"],q=this.makeUniform("vit_merger_ln_params",[e.V,1e-6]);t.dispatch(this.pipelines.vit_layernorm,[t.createBindGroup(this.pipelines.vit_layernorm,0,[e.hidden,w,B,e.mergerNormed,q])],r);const _=r/4,l=this.weights["model.visual.merger.linear_fc1.weight"],d=this.weights["model.visual.merger.linear_fc1.bias"],y=this.makeUniform("vit_merger_fc1_params",[e.mergedHidden,e.mergedHidden]);t.dispatch(this.pipelines.vit_bf16_matvec_bias,[t.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[e.mergerNormed,l,d,e.mergerInter,y])],Math.ceil(e.mergedHidden/32),_);const U=this.makeUniform("vit_gelu_len",[_*e.mergedHidden]);t.dispatch(this.pipelines.vit_gelu,[t.createBindGroup(this.pipelines.vit_gelu,0,[e.mergerInter,U])],Math.ceil(_*e.mergedHidden/256));const g=this.weights["model.visual.merger.linear_fc2.weight"],P=this.weights["model.visual.merger.linear_fc2.bias"],S=this.makeUniform("vit_merger_fc2_params",[e.mergedHidden,e.Vo]);return t.dispatch(this.pipelines.vit_bf16_matvec_bias,[t.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[e.mergerInter,g,P,e.merged,S])],Math.ceil(e.Vo/32),_),t.endBatch(),await t.device.queue.onSubmittedWorkDone(),console.log(`[VISION] Forward done: ${r} patches → ${_} merged tokens (dim=${e.Vo})`),{numMergedTokens:_,embedBuffer:e.merged}}_vitBlock(a,e){const t=this.vision,s=this.gpu,i=`model.visual.blocks.${a}`,n=this.weights[`${i}.norm1.weight`],r=this.weights[`${i}.norm1.bias`],u=this.makeUniform(`vit_ln1_${a}`,[t.V,1e-6]);s.dispatch(this.pipelines.vit_layernorm,[s.createBindGroup(this.pipelines.vit_layernorm,0,[t.hidden,n,r,t.normed,u])],e);const o=this.weights[`${i}.attn.qkv.weight`],h=this.weights[`${i}.attn.qkv.bias`],m=this.makeUniform(`vit_qkv_${a}`,[t.V,3*t.V]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[s.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[t.normed,o,h,t.qkv,m])],Math.ceil(3*t.V/32),e);const c=t.V,f=c*c*2,p=c*2,w=this.makeUniform(`vit_qkv_mv_${a}`,[c,c]);for(let $=0;$<3;$++){const O=[t.q,t.k,t.v][$],G=s.createBindGroupWithOffsets(this.pipelines.vit_bf16_matvec_bias,0,[t.normed,{buffer:o,offset:$*f,size:f},{buffer:h,offset:$*p,size:p},O,w]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[G],Math.ceil(c/32),e)}const B=this.makeUniform(`vit_rope_${a}`,[e,t.heads,t.headDim]);s.dispatch(this.pipelines.vit_rope,[s.createBindGroup(this.pipelines.vit_rope,0,[t.q,t.k,t.attnOut,t.mlpOut,t.cos,t.sin,B])],Math.ceil(e*t.heads*t.headDim/256));const q=1/Math.sqrt(t.headDim),_=this.makeUniform(`vit_attn_${a}`,[e,t.heads,t.headDim,q]);s.dispatch(this.pipelines.vit_attention,[s.createBindGroup(this.pipelines.vit_attention,0,[t.attnOut,t.mlpOut,t.v,t.q,_])],e,t.heads);const l=this.weights[`${i}.attn.proj.weight`],d=this.weights[`${i}.attn.proj.bias`],y=this.makeUniform(`vit_proj_${a}`,[c,c]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[s.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[t.q,l,d,t.mlpOut,y])],Math.ceil(c/32),e);const U=this.makeUniform(`vit_res1_${a}`,[e*c]);s.dispatch(this.pipelines.vit_add,[s.createBindGroup(this.pipelines.vit_add,0,[t.hidden,t.mlpOut,U])],Math.ceil(e*c/256));const g=this.weights[`${i}.norm2.weight`],P=this.weights[`${i}.norm2.bias`];s.dispatch(this.pipelines.vit_layernorm,[s.createBindGroup(this.pipelines.vit_layernorm,0,[t.hidden,g,P,t.normed,u])],e);const S=this.weights[`${i}.mlp.linear_fc1.weight`],b=this.weights[`${i}.mlp.linear_fc1.bias`],v=this.makeUniform(`vit_mlp_fc1_${a}`,[c,t.Vi]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[s.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[t.normed,S,b,t.mlpInter,v])],Math.ceil(t.Vi/32),e);const C=this.makeUniform(`vit_gelu_${a}`,[e*t.Vi]);s.dispatch(this.pipelines.vit_gelu_tanh,[s.createBindGroup(this.pipelines.vit_gelu_tanh,0,[t.mlpInter,C])],Math.ceil(e*t.Vi/256));const D=this.weights[`${i}.mlp.linear_fc2.weight`],k=this.weights[`${i}.mlp.linear_fc2.bias`],z=this.makeUniform(`vit_mlp_fc2_${a}`,[t.Vi,c]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[s.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[t.mlpInter,D,k,t.mlpOut,z])],Math.ceil(c/32),e);const M=this.makeUniform(`vit_res2_${a}`,[e*c]);s.dispatch(this.pipelines.vit_add,[s.createBindGroup(this.pipelines.vit_add,0,[t.hidden,t.mlpOut,M])],Math.ceil(e*c/256))}computeMultimodalPositions(a,e,t,s){const i=this.vision.mergeSize,n=t/i,r=s/i,u=a.length,o=new Array(3);for(let f=0;f<3;f++)o[f]=new Int32Array(u);let h=0,m=0;for(let f=0;f<u;f++)if(a[f]===this.imageTokenId){const w=m,B=Math.floor(w/r),q=w%r;o[0][f]=h,o[1][f]=h+B,o[2][f]=h+q,m++,m===e&&(h+=Math.max(n,r))}else o[0][f]=h,o[1][f]=h,o[2][f]=h,h++;const c=h-u;return{positionIds3D:o,ropeDelta:c}}initB2Buffers(){const a=this.gpu,e=this.hiddenSize,t=this.intermediateSize,s=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC;this.b2={},this.b2.hidden=a.createBuffer("b2_hidden_a",2*e*4,s),this.b2.hiddenB=a.createBuffer("b2_hidden_b",2*e*4,s),this.b2.normed=a.createBuffer("b2_normed",2*e*4,s),this.b2.mlpIntermediate=a.createBuffer("b2_mlp_inter",2*t*4,s),this.b2.mlpOut=a.createBuffer("b2_mlp_out",2*e*4,s),this.b2.logits=a.createBuffer("b2_logits",2*this.vocabSize*4,s),this.b2.zeroBuf=a.createBuffer("b2_zero_buf",2*e*4,s),this.b2.qProjFull=a.createBuffer("b2_q_proj_full",2*this.numHeads*this.headDim*2*4,s),this.b2.qProj=a.createBuffer("b2_q_proj_out",2*this.numHeads*this.headDim*4,s),this.b2.qGate=a.createBuffer("b2_q_gate",2*this.numHeads*this.headDim*4,s),this.b2.kProj=a.createBuffer("b2_k_proj_out",2*this.numKVHeads*this.headDim*4,s),this.b2.vProj=a.createBuffer("b2_v_proj_out",2*this.numKVHeads*this.headDim*4,s);const i=this.textCfg.linear_num_key_heads,n=this.textCfg.linear_key_head_dim,r=this.textCfg.linear_value_head_dim,h=this.linValueHeads/i*r,m=i*(n+n+h),c=this.linValueDim;this.b2.linQKV=a.createBuffer("b2_lin_qkv",2*m*4,s),this.b2.linZ=a.createBuffer("b2_lin_z",2*c*4,s),this.b2.linOut=a.createBuffer("b2_lin_out",2*c*4,s);const f=Math.max(e,this.numHeads*this.headDim,c)*4;if(this.b2.attnOut=a.createBuffer("b2_attn_out",2*f,s),this.abQuantized){const p=this.textCfg.linear_num_value_heads??i;this.b2.linAlpha=a.createBuffer("b2_lin_alpha",2*p*4,s),this.b2.linBeta=a.createBuffer("b2_lin_beta",2*p*4,s)}this.b2.topkResult0=a.createBuffer("b2_topk0",2048,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this.b2.topkResult1=a.createBuffer("b2_topk1",2048,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this.b2.topkReadback0=a.device.createBuffer({size:2048,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_topk_rb0"}),this.b2.topkReadback1=a.device.createBuffer({size:2048,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_topk_rb1"}),this.b2.argmaxResult0=a.createBuffer("b2_argmax0",8,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC),this.b2.argmaxResult1=a.createBuffer("b2_argmax1",8,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC),this.b2.argmaxReadback0=a.device.createBuffer({size:8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_argmax_rb0"}),this.b2.argmaxReadback1=a.device.createBuffer({size:8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_argmax_rb1"}),this.b2._gqaParamBuf0=a.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"b2_gqa_params0"}),this.b2._gqaParamBuf1=a.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"b2_gqa_params1"}),this.b2._dims={H:e,I:t,linQKVDim:m,valueDim:c,linHeads:i,linKeyDim:n,linValDim:r,linEVD:h,qProjSize:this.numHeads*this.headDim,qProjFullSize:this.numHeads*this.headDim*2,kProjSize:this.numKVHeads*this.headDim,vProjSize:this.numKVHeads*this.headDim,attnOutElems:f/4},console.log(`[B2] Allocated batch=2 buffers (${((2*(e+e+e+t+e+this.vocabSize)*4+2*f)/1e6).toFixed(1)} MB activations)`)}runWithOffsets(a,e,t,s=1){const i=this.pipelines[a],n=this.gpu.createBindGroupWithOffsets(i,0,e);this.gpu.dispatch(i,[n],t,s)}makeUniform(a,e){const t=new ArrayBuffer(Math.max(16,Math.ceil(e.length*4/16)*16)),s=new DataView(t);for(let r=0;r<e.length;r++)Number.isInteger(e[r])?s.setUint32(r*4,e[r],!0):s.setFloat32(r*4,e[r],!0);const i=a+"_"+e.join("_");if(this.paramBufs[i])return this.paramBufs[i];const n=this.gpu.device.createBuffer({size:t.byteLength,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:a});return this.gpu.device.queue.writeBuffer(n,0,new Uint8Array(t)),this.paramBufs[i]=n,n}_makeMixedUniform(a,e){const t=new ArrayBuffer(Math.max(16,Math.ceil(e.length*4/16)*16)),s=new DataView(t);for(let n=0;n<e.length;n++){const r=e[n];r.u!==void 0?s.setUint32(n*4,r.u,!0):s.setFloat32(n*4,r.f,!0)}this._mixedUniformBufs||(this._mixedUniformBufs={});let i=this._mixedUniformBufs[a];return i||(i=this.gpu.device.createBuffer({size:t.byteLength,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:a}),this._mixedUniformBufs[a]=i),this.gpu.device.queue.writeBuffer(i,0,new Uint8Array(t)),i}makeUniformTyped(a,e,t){const s=e.length+t.length,i=new ArrayBuffer(Math.max(16,Math.ceil(s*4/16)*16)),n=new DataView(i);let r=0;for(const h of e)n.setUint32(r,h,!0),r+=4;for(const h of t)n.setFloat32(r,h,!0),r+=4;const u=a+"_"+[...e,...t].join("_");if(this.paramBufs[u])return this.paramBufs[u];const o=this.gpu.device.createBuffer({size:i.byteLength,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:a});return this.gpu.device.queue.writeBuffer(o,0,new Uint8Array(i)),this.paramBufs[u]=o,o}makeFusedNormMLPUniform(a){const e=`fused_anorm_mlp_${a}`;if(this.paramBufs[e])return this.paramBufs[e];const t=this.hiddenSize,s=this.intermediateSize,i=16+640*16,n=new ArrayBuffer(i),r=new DataView(n);r.setUint32(0,t,!0),r.setUint32(4,s,!0),r.setUint32(8,this.groupSize,!0),r.setFloat32(12,this.rmsEps,!0);const u=`model.language_model.layers.${a}.post_attention_layernorm.weight`,o=this._normWeightRaw?.[u];if(!o)throw new Error(`Norm weight not cached for layer ${a}`);for(let m=0;m<t/2;m++){const c=Math.floor(m/4),f=m%4;r.setUint32(16+c*16+f*4,o[m],!0)}const h=this.gpu.device.createBuffer({size:i,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:e});return this.gpu.device.queue.writeBuffer(h,0,new Uint8Array(n)),this.paramBufs[e]=h,h}fusedNormGptqOp(a,e,t,s,i,n,r,u){const o=this.getQWeight(s);if(!o.qweight)return null;const h=this.makeFusedNormGPTQUniform(i,r),m=u?"fused_norm_gptq":"fused_norm_gptq_noadd",c=u?"fnorm_gptq_":"fnorm_gptqna_",f=u?[a,e,o.qweight,o.scales,t,h]:[a,o.qweight,o.scales,t,h];return this.prepOpCached(`${c}${s}`,m,f,this.wg4(r))}makeFusedNormGPTQUniform(a,e){const t=`fused_norm_gptq_${a}_${e}`;if(this.paramBufs[t])return this.paramBufs[t];const s=this.hiddenSize,i=16+512*16,n=new ArrayBuffer(i),r=new DataView(n);r.setUint32(0,s,!0),r.setUint32(4,e,!0),r.setUint32(8,this.groupSize,!0),r.setFloat32(12,this.rmsEps,!0);const u=this._normWeightRaw?.[a];if(!u)throw new Error(`Norm weight not cached: ${a}`);for(let h=0;h<s/2;h++){const m=Math.floor(h/4),c=h%4;r.setUint32(16+m*16+c*4,u[h],!0)}const o=this.gpu.device.createBuffer({size:i,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:t});return this.gpu.device.queue.writeBuffer(o,0,new Uint8Array(n)),this.paramBufs[t]=o,o}run(a,e,t,s=1){const i=this.pipelines[a],n=this.gpu.createBindGroup(i,0,e);this.gpu.dispatch(i,[n],t,s)}runCached(a,e,t,s,i=1){const n=this.pipelines[e],r=this.gpu.getCachedBindGroup(a,n,0,t);this.gpu.dispatch(n,[r],s,i)}prepOp(a,e,t,s=1){const i=this.pipelines[a],n=this.gpu.createBindGroup(i,0,e);return{pipeline:i,bindGroups:[n],workgroupsX:t,workgroupsY:s}}prepOpCached(a,e,t,s,i=1){const n=this.pipelines[e],r=this.gpu.getCachedBindGroup(a,n,0,t);return{pipeline:n,bindGroups:[r],workgroupsX:s,workgroupsY:i}}wg(a){return Math.ceil(a/256)}wg4(a){return Math.ceil(a/32)}wg8(a){return Math.ceil(a/8)}getQWeight(a){return{qweight:this.weights[a+".qweight"],scales:this.weights[a+".scales"],qzeros:this.weights[a+".qzeros"]}}embedding(a){const e=this.weights["model.language_model.embed_tokens.weight"],t=this.makeUniform("emb_params",[a,this.hiddenSize]);this.run("embedding",[e,this.hidden,t],this.wg(this.hiddenSize))}embeddingFromVisionBuffer(a,e){const t=this.hiddenSize;this.gpu.copyBuffer(a,this.hidden,t*4,e*t*4,0)}embeddingFromArgmax(){const a=this.weights["model.language_model.embed_tokens.weight"];this._embedArgmaxParams||(this._embedArgmaxParams=this.makeUniform("emb_argmax_params",[this.hiddenSize])),this.runCached("embed_from_argmax","embed_from_argmax",[a,this.hidden,this.argmaxResult,this._embedArgmaxParams],this.wg(this.hiddenSize))}_dispatchLmHead(){if(this._lmHeadQWeight){const a=this.hiddenSize,e=this.vocabSize,s=a/this.groupSize%4===0,i=this.makeUniform(`mv_${a}_${e}`,[a,e,this.groupSize]);if(s){const n=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";this.runCached("lm_head",n,[this.normed,this._lmHeadQWeight,this._lmHeadScales,this.logits,i],this.wg8(e))}else{const n=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";this.runCached("lm_head",n,[this.normed,this._lmHeadQWeight,this._lmHeadScales,this.logits,i],this.wg4(e))}}else{const e=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight",t=this.weights[e],s=this.makeUniform("lmhead_params",[this.hiddenSize,this.vocabSize]);this.runCached("lm_head","bf16_matvec",[this.normed,t,this.logits,s],this.wg(this.vocabSize))}}rmsNorm(a,e,t,s){const i=this.makeUniform("rmsnorm_params",[this.hiddenSize,this.rmsEps]);s?this.runCached(s,"rmsnorm",[a,t,e,i],1):this.run("rmsnorm",[a,t,e,i],1)}gptqMatvec(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return;const r=s/this.groupSize,u=r%4===0;if(this.useSplitK&&u){let o=this.splitKSplits;for(;o>1&&r%(o*4)!==0;)o>>=1;if(o>1){const h=this.makeUniform(`mv_sk_${s}_${i}_${o}`,[s,i,this.groupSize,o]);this.runCached(`sk4tf16_${t}`,"gptq_matvec_4t_f16_sk",[a,n.qweight,n.scales,this.splitKPartials,h],this.wg8(i),o);const m=this.makeUniform(`rsk_${i}_${o}`,[i,o]);this.runCached(`rsk_${t}`,"reduce_splitk",[this.splitKPartials,e,m],this.wg(i));return}}if(u){const o=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t",h=this.gpu.hasF16?"gptq4tf16_":"gptq4t_",m=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);this.runCached(`${h}${t}`,o,[a,n.qweight,n.scales,e,m],this.wg8(i))}else{const o=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec",h=this.gpu.hasF16?"gptqf16_":"gptq_",m=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);this.runCached(`${h}${t}`,o,[a,n.qweight,n.scales,e,m],this.wg4(i))}}gptqMatvecOp(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return null;if(s/this.groupSize%4===0){const o=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t",h=this.gpu.hasF16?"gptq4tf16_":"gptq4t_",m=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);return this.prepOpCached(`${h}${t}`,o,[a,n.qweight,n.scales,e,m],this.wg8(i))}else{const o=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec",h=this.gpu.hasF16?"gptqf16_":"gptq_",m=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);return this.prepOpCached(`${h}${t}`,o,[a,n.qweight,n.scales,e,m],this.wg4(i))}}bf16Matvec(a,e,t,s,i,n){const r=this.makeUniform(`bf16mv_${s}_${i}`,[s,i]);n?this.runCached(n,"bf16_matvec",[a,t,e,r],this.wg(i)):this.run("bf16_matvec",[a,t,e,r],this.wg(i))}siluMul(a,e,t,s,i){const n=this.makeUniform(`silu_${s}`,[s]);i?this.runCached(i,"silu_mul",[a,e,t,n],this.wg(s)):this.run("silu_mul",[a,e,t,n],this.wg(s))}addVectors(a,e,t,s){const i=this.makeUniform(`add_${t}`,[t]);s?this.runCached(s,"add",[a,e,i],this.wg(t)):this.run("add",[a,e,i],this.wg(t))}addAndRmsNorm(a,e,t,s,i){const n=this.makeUniform("add_rmsnorm_params",[this.hiddenSize,this.rmsEps]);i?this.runCached(i,"add_rmsnorm",[a,e,s,t,n],1):this.run("add_rmsnorm",[a,e,s,t,n],1)}mlp(a,e){const t=`model.language_model.layers.${e}.mlp`,s=this.hiddenSize,i=this.intermediateSize,n=this.getQWeight(`${t}.gate_proj`),r=this.getQWeight(`${t}.up_proj`),u=this.makeUniform(`fused_mv_${s}_${i}`,[s,i,this.groupSize]);if(s/this.groupSize%4===0){const m=this.gpu.hasF16?"fused_gate_up_silu_4t_f16":"fused_gate_up_silu_4t",c=this.gpu.hasF16?"fused_gus4t16_":"fused_gus4t_";this.runCached(`${c}${e}`,m,[a,n.qweight,n.scales,r.qweight,r.scales,this.mlpIntermediate,u],this.wg8(i))}else{const m=this.gpu.hasF16?"fused_gate_up_silu_f16":"fused_gate_up_silu",c=this.gpu.hasF16?"fused_gus16_":"fused_gus_";this.runCached(`${c}${e}`,m,[a,n.qweight,n.scales,r.qweight,r.scales,this.mlpIntermediate,u],this.wg4(i))}this.gptqMatvec(this.mlpIntermediate,this.mlpOut,`${t}.down_proj`,i,s)}fullAttentionFused(a,e,t,s,i,n,r,u){i=i||this.normed;const o=`model.language_model.layers.${t}.self_attn`,h=this.hiddenSize,m=this.headDim,c=this.numHeads,f=this.numKVHeads,p=c/f,w=t===0,B=`model.language_model.layers.${t}.input_layernorm.weight`,q=this.weights[B];if(w){const $=this.makeUniform("add_rmsnorm_params",[h,this.rmsEps]);this.runCached(`fa_norm_${t}`,"rmsnorm",[a,q,i,$],1)}else{const $=this.makeUniform("add_rmsnorm_params",[h,this.rmsEps]);this.runCached(`fa_norm_${t}`,"add_rmsnorm_ro",[a,e,q,i,$],1)}const _=this.gptqMatvecOp(i,this.qProjFull,`${o}.q_proj`,h,c*m*2),l=this.gptqMatvecOp(i,this.kProj,`${o}.k_proj`,h,f*m),d=this.gptqMatvecOp(i,this.vProj,`${o}.v_proj`,h,f*m);this.gpu.dispatchMulti([_,l,d].filter(Boolean));const y=this.kvCache[t],U=this._fusedSQKParams[t],g=u??s;this._gqaDv.setUint32(0,g,!0),this._gqaDv.setUint32(4,s,!0),this._gqaDv.setUint32(8,n??s,!0),this._gqaDv.setUint32(12,r??s,!0),this.gpu.device.queue.writeBuffer(U,16,this._gqaData,0,16),this.runCached(`fa_sqk_${t}`,this._splitQKNormShaderKey,[this.qProjFull,this.kProj,this.vProj,this.qProj,this.qGate,y.keys,y.values,U],c+f);const P=(u??s)+1,S=this._forceMinSplits||1,b=Math.max(S,Math.min(Math.max(1,Math.ceil(P/32)),this._maxGqaSplits)),v=b>1?this._gqaPartials:this.attnOut;if(this._gqaDv.setUint32(0,P,!0),this._gqaDv.setUint32(4,m,!0),this._gqaDv.setUint32(8,f,!0),this._gqaDv.setUint32(12,c,!0),this._gqaDv.setUint32(16,p,!0),this._gqaDv.setUint32(20,b,!0),this.gpu.device.queue.writeBuffer(this._gqaParamBuf,0,this._gqaData),this.run("gqa_attention_head",[this.qProj,y.keys,y.values,v,this._gqaParamBuf],c,b),b>1){this._gqaReduceParams||(this._gqaReduceParams=this.gpu.device.createBuffer({size:16,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"gqa_reduce_params"}));const $=new Uint8Array(16),O=new DataView($.buffer);O.setUint32(0,m,!0),O.setUint32(4,b,!0),O.setUint32(8,c,!0),this.gpu.device.queue.writeBuffer(this._gqaReduceParams,0,$),this.run("gqa_reduce",[this._gqaPartials,this.attnOut,this._gqaReduceParams],c)}const C=this.getQWeight(`${o}.o_proj`),D=c*m,z=D/this.groupSize%4===0,M=this.makeUniform(`fused_sig_mv_${D}_${h}`,[D,h,this.groupSize]);if(z){const $=this.gpu.hasF16?"fused_sigmoid_gptq_4t_f16":"fused_sigmoid_gptq_4t",O=this.gpu.hasF16?"fused_sig4t16_oproj_":"fused_sig4t_oproj_";this.runCached(`${O}${t}`,$,[this.attnOut,this.qGate,C.qweight,C.scales,this.qProj,M],this.wg8(h))}else{const $=this.gpu.hasF16?"fused_sigmoid_gptq_f16":"fused_sigmoid_gptq",O=this.gpu.hasF16?"fused_sig16_oproj_":"fused_sig_oproj_";this.runCached(`${O}${t}`,$,[this.attnOut,this.qGate,C.qweight,C.scales,this.qProj,M],this.wg4(h))}}linearAttentionFused(a,e,t,s){s=s||this.normed;const i=`model.language_model.layers.${t}.linear_attn`,n=this.hiddenSize,r=this.textCfg.linear_num_key_heads,u=this.textCfg.linear_key_head_dim,o=this.textCfg.linear_value_head_dim,h=this.linValueHeads,c=h/r*o,f=r*(u+u+c),p=this.linValueDim,w=t===0,B=`model.language_model.layers.${t}.input_layernorm.weight`,q=this.weights[B];if(w){const b=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);this.runCached(`la_norm_${t}`,"rmsnorm",[a,q,s,b],1)}else{const b=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);this.runCached(`la_norm_${t}`,"add_rmsnorm_ro",[a,e,q,s,b],1)}{const b=[this.gptqMatvecOp(s,this.linQKV,`${i}.in_proj_qkv`,n,f),this.gptqMatvecOp(s,this.linZ,`${i}.in_proj_z`,n,p)];this.abQuantized&&(b.push(this.gptqMatvecOp(s,this.linAlpha,`${i}.in_proj_a`,n,h)),b.push(this.gptqMatvecOp(s,this.linBeta,`${i}.in_proj_b`,n,h))),this.gpu.dispatchMulti(b.filter(Boolean))}const _=this.weights[`${i}.conv1d.weight`],l=this.weights[`${i}.A_log`],d=this.weights[`${i}.dt_bias`],y=this.weights[`${i}.norm.weight`];if(this.abQuantized){const b=`fused_cdn_q_${r}_${u}_${o}_${f}_${h}`;let v=this.paramBufs[b];if(!v){const C=new ArrayBuffer(32),D=new DataView(C);D.setUint32(0,r,!0),D.setUint32(4,u,!0),D.setUint32(8,o,!0),D.setUint32(12,f,!0),D.setFloat32(16,this.rmsEps,!0),D.setUint32(20,0,!0),D.setUint32(24,h,!0),v=this.gpu.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"fused_cdn_q_params"}),this.gpu.device.queue.writeBuffer(v,0,new Uint8Array(C)),this.paramBufs[b]=v}this.runCached(`fused_cdn_${t}`,"fused_conv_deltanet_norm",[this.linQKV,this.linConvHist[t],_,this.linState[t],this.linOut,this.linAlpha,this.linBeta,l,d,y,v],r)}else{const b=`fused_cdn_ext_${r}_${u}_${o}_${f}_${n}_${h}`;let v=this.paramBufs[b];if(!v){const D=new ArrayBuffer(32),k=new DataView(D);k.setUint32(0,r,!0),k.setUint32(4,u,!0),k.setUint32(8,o,!0),k.setUint32(12,f,!0),k.setFloat32(16,this.rmsEps,!0),k.setUint32(20,n,!0),k.setUint32(24,h,!0),v=this.gpu.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"fused_cdn_ext_params"}),this.gpu.device.queue.writeBuffer(v,0,new Uint8Array(D)),this.paramBufs[b]=v}const C=this.linABWeight[t];this.runCached(`fused_cdn_${t}`,"fused_conv_deltanet_norm",[this.linQKV,this.linConvHist[t],_,this.linState[t],this.linOut,s,C,l,d,y,v],r)}const U=this.getQWeight(`${i}.out_proj`),P=p/this.groupSize%4===0,S=this.makeUniform(`fused_silu_mv_${p}_${n}`,[p,n,this.groupSize]);if(P){const b=this.gpu.hasF16?"fused_silu_gptq_4t_f16":"fused_silu_gptq_4t",v=this.gpu.hasF16?"fused_silu4t16_oproj_":"fused_silu4t_oproj_";this.runCached(`${v}${t}`,b,[this.linZ,this.linOut,U.qweight,U.scales,this.attnOut,S],this.wg8(n))}else{const b=this.gpu.hasF16?"fused_silu_gptq_f16":"fused_silu_gptq",v=this.gpu.hasF16?"fused_silu16_oproj_":"fused_silu_oproj_";this.runCached(`${v}${t}`,b,[this.linZ,this.linOut,U.qweight,U.scales,this.attnOut,S],this.wg4(n))}}fusedNormMLP(a,e,t,s,i,n){n=n||this.normed;const r=this.hiddenSize,u=`model.language_model.layers.${i}.post_attention_layernorm.weight`,o=this.weights[u],h=this.makeUniform("add_rmsnorm_params",[r,this.rmsEps]),m=`mlp_norm_${i}_${a===this.hidden?"a":"b"}`;this.runCached(m,"three_way_add_rmsnorm",[a,t,s,o,e,n,h],1),this.mlp(n,i)}decoderLayer(a,e,t,s,i,n,r,u){let o;this.layerTypes[a]==="full_attention"?(this.fullAttentionFused(t,i,a,e,void 0,n,r,u),o=this.qProj):(this.linearAttentionFused(t,i,a),o=this.attnOut),this.fusedNormMLP(t,s,i,o,a)}async forward(a,e){this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embedding(a);let t=this.hidden,s=this.hiddenB,i=this.zeroBuf;for(let o=0;o<this.numLayers;o++){this.decoderLayer(o,e,t,s,i),i=this.mlpOut;const h=t;t=s,s=h}this.mtp&&this._mtpSaveHidden(t,this.mlpOut);const n=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm(t,this.mlpOut,this.normed,n,"add_final_norm"),this._dispatchLmHead();const r=this.temperature??.7;if(r>0){const o=this.repetitionPenalty??1,h=this.presencePenalty??0;if(this._recentTokenCount>0&&(o>1||h>0)){this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4);const p=this._makeMixedUniform("penalty_params",[{u:this._recentTokenCount},{f:o},{f:h},{u:0}]);this.run("rep_penalty",[this.logits,this._gpuRecentTokens,p],Math.ceil(this._recentTokenCount/256))}const m=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,m],1);const c=Math.random()*4294967295>>>0,f=this._makeMixedUniform("sample_params",[{f:r},{u:this.topK??20},{f:this.topP??.8},{u:c}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,f],1)}else{const o=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,o],1)}return this.gpu._passCount!==void 0&&!this._passCountLogged&&(console.log(`[PERF] ${this.gpu._passCount} compute passes per token`),this._passCountLogged=!0),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8),this.gpu.endBatch(),this._readAndSample()}forwardSubmit(a,e){this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embedding(a);let t=this.hidden,s=this.hiddenB,i=this.zeroBuf;for(let o=0;o<this.numLayers;o++){this.decoderLayer(o,e,t,s,i),i=this.mlpOut;const h=t;t=s,s=h}this.mtp&&this._mtpSaveHidden(t,this.mlpOut);const n=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm(t,this.mlpOut,this.normed,n,"add_final_norm"),this._dispatchLmHead();const r=this.temperature??.7;if(r>0){const o=this.repetitionPenalty??1,h=this.presencePenalty??0;if(this._recentTokenCount>0&&(o>1||h>0)){this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4);const p=this._makeMixedUniform("penalty_params",[{u:this._recentTokenCount},{f:o},{f:h},{u:0}]);this.run("rep_penalty",[this.logits,this._gpuRecentTokens,p],Math.ceil(this._recentTokenCount/256))}const m=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,m],1);const c=Math.random()*4294967295>>>0,f=this._makeMixedUniform("sample_params",[{f:r},{u:this.topK??20},{f:this.topP??.8},{u:c}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,f],1)}else{const o=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,o],1)}this.gpu._passCount!==void 0&&!this._passCountLogged&&(console.log(`[PERF] ${this.gpu._passCount} compute passes per token`),this._passCountLogged=!0),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8),this.gpu.endBatch()}_buildReplayFromRecording(){const a=this.gpu.stopRecording(),e=this.pipelines.gqa_attention_head,t=[];for(const s of a)if(s.multi)for(const i of s.ops)t.push({p:i.pipeline,bg:i.bindGroup,x:i.wgX,y:i.wgY,gqa:i.pipeline===e?1:0});else t.push({p:s.pipeline,bg:s.bindGroup,x:s.wgX,y:s.wgY,gqa:s.pipeline===e?1:0});this._replayFlat=t,console.log(`[PERF] Recorded ${t.length} flat replay ops for core forward`)}_replayCoreForward(a){const e=this.gpu,t=a-this._ropeDelta,s=t+1,i=Math.min(Math.max(1,Math.ceil(s/32)),this._maxGqaSplits);this._gqaDv.setUint32(0,t,!0),this._gqaDv.setUint32(4,a,!0),this._gqaDv.setUint32(8,a,!0),this._gqaDv.setUint32(12,a,!0);for(let o=0;o<this.numLayers;o++)this.layerTypes[o]==="full_attention"&&e.device.queue.writeBuffer(this._fusedSQKParams[o],16,this._gqaData,0,16);this._gqaDv.setUint32(0,s,!0),this._gqaDv.setUint32(4,this.headDim,!0),this._gqaDv.setUint32(8,this.numKVHeads,!0),this._gqaDv.setUint32(12,this.numHeads,!0),this._gqaDv.setUint32(16,this.numHeads/this.numKVHeads,!0),this._gqaDv.setUint32(20,i,!0),e.device.queue.writeBuffer(this._gqaParamBuf,0,this._gqaData,0,24),this._gqaReduceParams&&(this._gqaDv.setUint32(0,this.headDim,!0),this._gqaDv.setUint32(4,i,!0),this._gqaDv.setUint32(8,this.numHeads,!0),this._gqaDv.setUint32(12,0,!0),e.device.queue.writeBuffer(this._gqaReduceParams,0,this._gqaData,0,16));const n=e._singlePass,r=this._replayFlat,u=r.length;for(let o=0;o<u;o++){const h=r[o];n.setPipeline(h.p),n.setBindGroup(0,h.bg),n.dispatchWorkgroups(h.x,h.gqa?i:h.y)}}embeddingB2(a,e){const t=this.hiddenSize,s=this.weights["model.language_model.embed_tokens.weight"],i=this.makeUniform("emb_params",[a,t]);this.runWithOffsets("embedding",[s,{buffer:this.b2.hidden,offset:0,size:t*4},i],this.wg(t));const n=this.makeUniform(`emb_params_${e}`,[e,t]);this.runWithOffsets("embedding",[s,{buffer:this.b2.hidden,offset:t*4,size:t*4},n],this.wg(t))}gptqMatvecB2(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return;const r=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]),u=this.gpu.hasF16?"gptq_matmul_b2_f16":"gptq_matmul_b2",o=this.gpu.hasF16?"b2gptqf16_":"b2gptq_";this.runCached(`${o}${t}`,u,[a,n.qweight,n.scales,e,r],this.wg4(i))}gptqMatvecB2Op(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return null;const r=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]),u=this.gpu.hasF16?"gptq_matmul_b2_f16":"gptq_matmul_b2",o=this.gpu.hasF16?"b2gptqf16_":"b2gptq_";return this.prepOpCached(`${o}${t}`,u,[a,n.qweight,n.scales,e,r],this.wg4(i))}fullAttentionB2(a,e,t,s){const i=`model.language_model.layers.${t}.self_attn`,n=this.hiddenSize,r=this.b2._dims,u=this.headDim,o=this.numHeads,h=this.numKVHeads,m=o/h,c=t===0,f=`model.language_model.layers.${t}.input_layernorm.weight`,p=this.weights[f],w=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);c?(this.runWithOffsets("rmsnorm",[{buffer:a,offset:0,size:n*4},p,{buffer:this.b2.normed,offset:0,size:n*4},w],1),this.runWithOffsets("rmsnorm",[{buffer:a,offset:n*4,size:n*4},p,{buffer:this.b2.normed,offset:n*4,size:n*4},w],1)):this.run("add_rmsnorm_ro_b2",[a,e,p,this.b2.normed,w],2);const B=this.gptqMatvecB2Op(this.b2.normed,this.b2.qProjFull,`${i}.q_proj`,n,o*u*2),q=this.gptqMatvecB2Op(this.b2.normed,this.b2.kProj,`${i}.k_proj`,n,h*u),_=this.gptqMatvecB2Op(this.b2.normed,this.b2.vProj,`${i}.v_proj`,n,h*u);this.gpu.dispatchMulti([B,q,_].filter(Boolean));const l=this.kvCache[t],d=this._fusedSQKParams[t],y=r.qProjFullSize*4,U=r.kProjSize*4,g=r.vProjSize*4,P=r.qProjSize*4;this._gqaDv.setUint32(0,s,!0),this._gqaDv.setUint32(4,s,!0),this._gqaDv.setUint32(8,s,!0),this._gqaDv.setUint32(12,s,!0),this.gpu.device.queue.writeBuffer(d,16,this._gqaData,0,16),this.runWithOffsets(this._splitQKNormShaderKey,[{buffer:this.b2.qProjFull,offset:0,size:y},{buffer:this.b2.kProj,offset:0,size:U},{buffer:this.b2.vProj,offset:0,size:g},{buffer:this.b2.qProj,offset:0,size:P},{buffer:this.b2.qGate,offset:0,size:P},l.keys,l.values,d],o+h),this._gqaDv.setUint32(0,s+1,!0),this._gqaDv.setUint32(4,s+1,!0),this._gqaDv.setUint32(8,s+1,!0),this._gqaDv.setUint32(12,s+1,!0),this.gpu.device.queue.writeBuffer(d,16,this._gqaData,0,16),this.runWithOffsets(this._splitQKNormShaderKey,[{buffer:this.b2.qProjFull,offset:y,size:y},{buffer:this.b2.kProj,offset:U,size:U},{buffer:this.b2.vProj,offset:g,size:g},{buffer:this.b2.qProj,offset:P,size:P},{buffer:this.b2.qGate,offset:P,size:P},l.keys,l.values,d],o+h);const S=s+1,b=s+2;this._gqaDv.setUint32(0,S,!0),this._gqaDv.setUint32(4,u,!0),this._gqaDv.setUint32(8,h,!0),this._gqaDv.setUint32(12,o,!0),this._gqaDv.setUint32(16,m,!0),this._gqaDv.setUint32(20,this.partialDim,!0),this._gqaDv.setUint32(24,s,!0),this._gqaDv.setFloat32(28,this.ropeTheta,!0),this.gpu.device.queue.writeBuffer(this.b2._gqaParamBuf0,0,this._gqaData),this.runWithOffsets("gqa_attention_head",[{buffer:this.b2.qProj,offset:0,size:P},l.keys,l.values,{buffer:this.b2.attnOut,offset:0,size:r.qProjSize*4},this.b2._gqaParamBuf0],o),this._gqaDv.setUint32(0,b,!0),this._gqaDv.setUint32(24,s+1,!0),this.gpu.device.queue.writeBuffer(this.b2._gqaParamBuf1,0,this._gqaData),this.runWithOffsets("gqa_attention_head",[{buffer:this.b2.qProj,offset:P,size:P},l.keys,l.values,{buffer:this.b2.attnOut,offset:r.qProjSize*4,size:r.qProjSize*4},this.b2._gqaParamBuf1],o);const v=this.getQWeight(`${i}.o_proj`),C=o*u,D=this.makeUniform(`fused_sig_mv_${C}_${n}`,[C,n,this.groupSize]);this.runCached(`b2_fused_sig_oproj_${t}`,"fused_sigmoid_gptq_b2_f16",[this.b2.attnOut,this.b2.qGate,v.qweight,v.scales,this.b2.qProj,D],this.wg4(n))}linearAttentionB2(a,e,t){const s=`model.language_model.layers.${t}.linear_attn`,i=this.hiddenSize,n=this.b2._dims,r=n.linHeads,u=n.linKeyDim,o=n.linValDim;n.linEVD;const h=n.linQKVDim,m=n.valueDim,c=t===0,f=`model.language_model.layers.${t}.input_layernorm.weight`,p=this.weights[f],w=this.makeUniform("add_rmsnorm_params",[i,this.rmsEps]);c?(this.runWithOffsets("rmsnorm",[{buffer:a,offset:0,size:i*4},p,{buffer:this.b2.normed,offset:0,size:i*4},w],1),this.runWithOffsets("rmsnorm",[{buffer:a,offset:i*4,size:i*4},p,{buffer:this.b2.normed,offset:i*4,size:i*4},w],1)):this.run("add_rmsnorm_ro_b2",[a,e,p,this.b2.normed,w],2);{const P=[this.gptqMatvecB2Op(this.b2.normed,this.b2.linQKV,`${s}.in_proj_qkv`,i,h),this.gptqMatvecB2Op(this.b2.normed,this.b2.linZ,`${s}.in_proj_z`,i,m)];this.abQuantized&&(P.push(this.gptqMatvecB2Op(this.b2.normed,this.b2.linAlpha,`${s}.in_proj_a`,i,this.linValueHeads)),P.push(this.gptqMatvecB2Op(this.b2.normed,this.b2.linBeta,`${s}.in_proj_b`,i,this.linValueHeads))),this.gpu.dispatchMulti(P.filter(Boolean))}const B=this.weights[`${s}.conv1d.weight`],q=this.weights[`${s}.A_log`],_=this.weights[`${s}.dt_bias`],l=this.weights[`${s}.norm.weight`],d=h*4,y=m*4;if(this.abQuantized){const P=this.linValueHeads,S=P*4,b=`fused_cdn_q_${r}_${u}_${o}_${h}_${P}`,v=this.paramBufs[b];this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:0,size:d},this.linConvHist[t],B,this.linState[t],{buffer:this.b2.linOut,offset:0,size:y},{buffer:this.b2.linAlpha,offset:0,size:S},{buffer:this.b2.linBeta,offset:0,size:S},q,_,l,v],r),this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:d,size:d},this.linConvHist[t],B,this.linState[t],{buffer:this.b2.linOut,offset:y,size:y},{buffer:this.b2.linAlpha,offset:S,size:S},{buffer:this.b2.linBeta,offset:S,size:S},q,_,l,v],r)}else{const P=`fused_cdn_ext_${r}_${u}_${o}_${h}_${i}_${this.linValueHeads}`,S=this.paramBufs[P],b=this.linABWeight[t];this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:0,size:d},this.linConvHist[t],B,this.linState[t],{buffer:this.b2.linOut,offset:0,size:y},{buffer:this.b2.normed,offset:0,size:i*4},b,q,_,l,S],r),this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:d,size:d},this.linConvHist[t],B,this.linState[t],{buffer:this.b2.linOut,offset:y,size:y},{buffer:this.b2.normed,offset:i*4,size:i*4},b,q,_,l,S],r)}const U=this.getQWeight(`${s}.out_proj`),g=this.makeUniform(`fused_silu_mv_${m}_${i}`,[m,i,this.groupSize]);this.runCached(`b2_fused_silu_oproj_${t}`,"fused_silu_gptq_b2_f16",[this.b2.linZ,this.b2.linOut,U.qweight,U.scales,this.b2.attnOut,g],this.wg4(i))}fusedNormMLPB2(a,e,t,s,i){const n=this.hiddenSize,r=this.intermediateSize,u=`model.language_model.layers.${i}.mlp`,o=`model.language_model.layers.${i}.post_attention_layernorm.weight`,h=this.weights[o],m=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);this.run("three_way_add_rmsnorm_b2",[a,t,s,h,e,this.b2.normed,m],2);const c=this.getQWeight(`${u}.gate_proj`),f=this.getQWeight(`${u}.up_proj`),p=this.makeUniform(`fused_mv_${n}_${r}`,[n,r,this.groupSize]);this.runCached(`b2_fused_gus_${i}`,"fused_gate_up_silu_b2_f16",[this.b2.normed,c.qweight,c.scales,f.qweight,f.scales,this.b2.mlpIntermediate,p],this.wg4(r)),this.gptqMatvecB2(this.b2.mlpIntermediate,this.b2.mlpOut,`${u}.down_proj`,r,n)}decoderLayerB2(a,e,t,s,i){let n;this.layerTypes[a]==="full_attention"?(this.fullAttentionB2(t,i,a,e),n=this.b2.qProj):(this.linearAttentionB2(t,i,a),n=this.b2.attnOut),this.fusedNormMLPB2(t,s,i,n,a)}_dispatchLmHeadB2(){if(this._lmHeadQWeight){const a=this.hiddenSize,e=this.vocabSize,t=this.makeUniform(`mv_${a}_${e}`,[a,e,this.groupSize]),s=this.gpu.hasF16?"gptq_matmul_b2_f16":"gptq_matmul_b2";this.runCached("b2_lm_head",s,[this.b2.normed,this._lmHeadQWeight,this._lmHeadScales,this.b2.logits,t],this.wg4(e))}else{const e=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight",t=this.weights[e],s=this.hiddenSize,i=this.vocabSize,n=this.makeUniform("lmhead_params",[s,i]);this.runWithOffsets("bf16_matvec",[{buffer:this.b2.normed,offset:0,size:s*4},t,{buffer:this.b2.logits,offset:0,size:i*4},n],this.wg(i)),this.runWithOffsets("bf16_matvec",[{buffer:this.b2.normed,offset:s*4,size:s*4},t,{buffer:this.b2.logits,offset:i*4,size:i*4},n],this.wg(i))}}async forwardB2(a,e,t){this.b2||this.initB2Buffers(),this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embeddingB2(a,e);let s=this.b2.hidden,i=this.b2.hiddenB,n=this.b2.zeroBuf;for(let f=0;f<this.numLayers;f++){this.decoderLayerB2(f,t,s,i,n),n=this.b2.mlpOut;const p=s;s=i,i=p}if(this.mtp){const f=this.hiddenSize;this.gpu.copyBuffer(s,this.mtp.savedHidden,f*4,f*4,0),this.gpu.copyBuffer(this.b2.mlpOut,this.mtp.savedMlpOut,f*4,f*4,0)}const r=this.weights["model.language_model.norm.weight"],u=this.makeUniform("add_rmsnorm_params",[this.hiddenSize,this.rmsEps]);this.run("add_rmsnorm_b2",[s,this.b2.mlpOut,r,this.b2.normed,u],2),this._dispatchLmHeadB2();const h=(this.temperature??.7)>0,m=this.vocabSize,c=this.makeUniform("argmax_params",[m]);return h?(this.runWithOffsets("topk_extract",[{buffer:this.b2.logits,offset:0,size:m*4},this.b2.topkResult0,c],1),this.runWithOffsets("topk_extract",[{buffer:this.b2.logits,offset:m*4,size:m*4},this.b2.topkResult1,c],1)):(this.runWithOffsets("argmax",[{buffer:this.b2.logits,offset:0,size:m*4},this.b2.argmaxResult0,c],1),this.runWithOffsets("argmax",[{buffer:this.b2.logits,offset:m*4,size:m*4},this.b2.argmaxResult1,c],1)),this.gpu._passCount!==void 0&&!this._b2PassCountLogged&&(console.log(`[PERF] B2: ${this.gpu._passCount} compute passes per 2-token step`),this._b2PassCountLogged=!0),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),h?(this.gpu.copyBuffer(this.b2.topkResult0,this.b2.topkReadback0,2048),this.gpu.copyBuffer(this.b2.topkResult1,this.b2.topkReadback1,2048)):(this.gpu.copyBuffer(this.b2.argmaxResult0,this.b2.argmaxReadback0,8),this.gpu.copyBuffer(this.b2.argmaxResult1,this.b2.argmaxReadback1,8)),this.gpu.endBatch(),this._readAndSampleB2(h)}async _readAndSampleB2(a){const e=[0,0];a?(await Promise.all([this.b2.topkReadback0.mapAsync(GPUMapMode.READ),this.b2.topkReadback1.mapAsync(GPUMapMode.READ)]),e[0]=this._sampleFromTopkReadback(this.b2.topkReadback0),e[1]=this._sampleFromTopkReadback(this.b2.topkReadback1)):(await Promise.all([this.b2.argmaxReadback0.mapAsync(GPUMapMode.READ),this.b2.argmaxReadback1.mapAsync(GPUMapMode.READ)]),e[0]=new Uint32Array(this.b2.argmaxReadback0.getMappedRange().slice(0))[0],e[1]=new Uint32Array(this.b2.argmaxReadback1.getMappedRange().slice(0))[0],this.b2.argmaxReadback0.unmap(),this.b2.argmaxReadback1.unmap());for(const t of e)this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=t:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=t);return e}_sampleFromTopkReadback(a){const e=a.getMappedRange(),t=new Uint32Array(e),s=new Float32Array(e),i=new Uint32Array(256),n=new Float32Array(256);for(let o=0;o<256;o++)i[o]=t[o*2],n[o]=s[o*2+1];a.unmap();const r=this.presencePenalty??0,u=this.repetitionPenalty??1;if((r>0||u>1)&&this._recentTokenCount>0){const o=new Set;for(let h=0;h<this._recentTokenCount;h++)o.add(this._recentTokens[h]);for(let h=0;h<256;h++)o.has(i[h])&&(r>0&&(n[h]-=r),u>1&&(n[h]=n[h]>0?n[h]/u:n[h]*u))}return this._sampleFromArrays(i,n,256)}async _readAndSample(){await this._argmaxReadback.mapAsync(GPUMapMode.READ);const a=new Uint32Array(this._argmaxReadback.getMappedRange().slice(0));this._argmaxReadback.unmap();const e=a[0];return this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=e:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=e),e}_sampleFromArrays(a,e,t){const s=this.temperature??.7,i=this.topP??.8,n=this.topK??20,r=Math.min(n,t),u=this._selBuf||(this._selBuf=new Uint32Array(64)),o=this._selValBuf||(this._selValBuf=new Float32Array(64)),h=this._usedBuf||(this._usedBuf=new Uint8Array(256));h.fill(0);for(let l=0;l<r;l++){let d=-1,y=-1/0;for(let U=0;U<t;U++)!h[U]&&e[U]>y&&(y=e[U],d=U);if(d<0)break;u[l]=a[d],o[l]=y,h[d]=1}const m=o[0],c=this._probBuf||(this._probBuf=new Float32Array(64));let f=0;for(let l=0;l<r;l++)c[l]=Math.exp((o[l]-m)/s),f+=c[l];for(let l=0;l<r;l++)c[l]/=f;let p=0,w=r;for(let l=0;l<r;l++)if(p+=c[l],p>=i){w=l+1;break}let B=0;for(let l=0;l<w;l++)B+=c[l];const q=Math.random()*B;let _=0;for(let l=0;l<w;l++)if(_+=c[l],_>=q)return u[l];return u[0]}_sample(a){const e=this.temperature??.7,t=this.topP??.8,s=this.topK??20,i=a.length;if(e<=0){let _=0,l=a[0];for(let d=1;d<i;d++)a[d]>l&&(l=a[d],_=d);return _}const n=Math.max(s,64),r=new Int32Array(n).fill(-1),u=new Float32Array(n).fill(-1/0);let o=-1/0;for(let _=0;_<i;_++){const l=a[_];if(l>o&&(o=l),l>u[n-1]){let d=n-1;for(;d>0&&l>u[d-1];)u[d]=u[d-1],r[d]=r[d-1],d--;u[d]=l,r[d]=_}}const h=Math.min(s,n),m=new Float32Array(h);let c=0;for(let _=0;_<h&&!(r[_]<0);_++)m[_]=Math.exp((u[_]-o)/e),c+=m[_];for(let _=0;_<h;_++)m[_]/=c;let f=0,p=h;for(let _=0;_<h;_++){if(r[_]<0){p=_;break}if(f+=m[_],f>=t){p=_+1;break}}let w=0;for(let _=0;_<p;_++)w+=m[_];const B=Math.random()*w;let q=0;for(let _=0;_<p;_++)if(q+=m[_],q>=B)return r[_];return r[0]}async generate(a,e=512,t,s){this.seqLen=0,this._recentTokenCount=0,this._replayFlat=null,this._ropeDelta=s?.ropeDelta??0;const i=this.maxSeqLen||4096;if(e=Math.min(e,i-a.length),e<=0)throw new Error(`Prompt (${a.length} tokens) exceeds context size (${i})`);const n=[...a],r=this.textCfg.linear_num_key_heads,u=this.textCfg.linear_key_head_dim,o=this.textCfg.linear_value_head_dim,c=this.linValueHeads/r*o,f=r*(u+u+c);for(let P=0;P<this.numLayers;P++)if(this.layerTypes[P]==="linear_attention"){const S=r*u*c*4,b=3*f*4;this.gpu.device.queue.writeBuffer(this.linState[P],0,new Uint8Array(S)),this.gpu.device.queue.writeBuffer(this.linConvHist[P],0,new Uint8Array(b))}let p=null;if(s){let P=0;const S=s.imageTokenId,b=s.positionIds3D;for(let v=0;v<a.length;v++){const C=b[0][v],D=b[1][v],k=b[2][v];this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),a[v]===S?this.embeddingFromVisionBuffer(s.embedBuffer,P++):this.embedding(a[v]);let z=this.hidden,M=this.hiddenB,$=this.zeroBuf;for(let O=0;O<this.numLayers;O++){this.decoderLayer(O,C,z,M,$,D,k,v),$=this.mlpOut;const G=z;z=M,M=G}if(v===a.length-1){const O=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm(z,this.mlpOut,this.normed,O,"add_final_norm"),this._dispatchLmHead();const G=this.temperature??.7;if(G>0){const R=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,R],1);const F=Math.random()*4294967295>>>0,T=this._makeMixedUniform("sample_params",[{f:G},{u:this.topK??20},{f:this.topP??.8},{u:F}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,T],1)}else{const R=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,R],1)}this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8)}this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.endBatch(),this.seqLen=v+1}p=await this._readAndSample()}else for(let P=0;P<a.length;P++)p=await this.forward(a[P],P),this.seqLen=P+1;n.push(p);const w=this.config.eos_token_id??this.textCfg.eos_token_id,B=Array.isArray(w)?w:w!=null?[w]:[248044,248046];if(t?.(p,0)||B.includes(p))return n;this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=p:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=p),this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4);const _=8;this._tokenHistoryBuf||(this._tokenHistoryBuf=this.gpu.createBuffer("token_history",_*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this._tokenHistoryReadback=this.gpu.device.createBuffer({size:_*4,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"token_history_readback"}));let l=0,d=0,y=1,U=p,g=!1;for(;y<e;){const P=performance.now(),S=Math.min(_,e-y);for(let k=0;k<S;k++){const z=this.seqLen+k+this._ropeDelta;if(this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),k===0?this.embedding(U):this.embeddingFromArgmax(),g)this._replayCoreForward(z);else{let G=this.hidden,R=this.hiddenB,F=this.zeroBuf;k===0&&!this._replayFlat&&(this._forceMinSplits=2,this.gpu.startRecording());for(let x=0;x<this.numLayers;x++){this.decoderLayer(x,z,G,R,F),F=this.mlpOut;const V=G;G=R,R=V}const T=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm(G,this.mlpOut,this.normed,T,"add_final_norm"),this._dispatchLmHead(),k===0&&this.gpu._recording&&(this._forceMinSplits=0,this._buildReplayFromRecording())}const M=this.temperature??.7;if(M>0){const G=this.repetitionPenalty??1,R=this.presencePenalty??0,F=this._recentTokenCount+k;if(F>0&&(G>1||R>0)){const j=this._makeMixedUniform("penalty_params",[{u:Math.min(F,this._repMaxTokens)},{f:G},{f:R},{u:0}]);this.run("rep_penalty",[this.logits,this._gpuRecentTokens,j],Math.ceil(Math.min(F,this._repMaxTokens)/256))}const T=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,T],1);const x=Math.random()*4294967295>>>0,V=this._makeMixedUniform("sample_params",[{f:M},{u:this.topK??20},{f:this.topP??.8},{u:x}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,V],1)}else{const G=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,G],1)}const $=(this._recentTokenCount+k)%this._repMaxTokens,O=this.makeUniform(`append_${k}`,[$,k]);this.run("append_token",[this.argmaxResult,this._gpuRecentTokens,this._tokenHistoryBuf,O],1),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.endBatch()}!g&&this._replayFlat&&(g=!0);const b=this.gpu.device.createCommandEncoder();b.copyBufferToBuffer(this._tokenHistoryBuf,0,this._tokenHistoryReadback,0,S*4),this.gpu.device.queue.submit([b.finish()]),await this._tokenHistoryReadback.mapAsync(GPUMapMode.READ);const v=new Uint32Array(this._tokenHistoryReadback.getMappedRange().slice(0,S*4));this._tokenHistoryReadback.unmap();const C=performance.now();l+=C-P,d+=S;let D=!1;for(let k=0;k<S;k++){const z=v[k];n.push(z),this.seqLen++,this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=z:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=z);const M=t?.(z,y);if(y++,M||B.includes(z)){D=!0;break}}if(d%50<_&&console.log(`[T @${d}] ${(l/d).toFixed(1)}ms/tok (batch=${_})`),D)break;U=v[S-1],this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4)}return d>0&&console.log(`[T final @${d}] ${(l/d).toFixed(1)}ms/tok (batch=${_})`),n}async _quantizeBF16Weight(a,e,t,s){const i=this.groupSize,n=e/8,r=e/i,u=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,o=this.gpu.createBuffer(`${s}_qweight`,n*t*4,u),h=this.gpu.createBuffer(`${s}_scales_f32`,r*t*4,u),m=Math.ceil(r*t/2)*4,c=this.gpu.createBuffer(`${s}_scales`,m,u),f=this.gpu.getOrCreatePipeline("quantize_bf16_to_int4",(await A(async()=>{const{SHADERS:v}=await import("./gpu-ops-DKsrMEcC.js").then(C=>C.b);return{SHADERS:v}},[],import.meta.url)).SHADERS.quantize_bf16_to_int4),p=this.gpu.createBufferFromData(`${s}_qparams`,new Uint32Array([e,t,i]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),w=this.gpu.createBindGroup(f,0,[a,o,h,p]),B=65535,q=Math.min(t,B),_=Math.ceil(t/B),l=this.gpu.device.createCommandEncoder(),d=l.beginComputePass();d.setPipeline(f),d.setBindGroup(0,w),d.dispatchWorkgroups(q,_),d.end(),this.gpu.device.queue.submit([l.finish()]);const y=this.gpu.getOrCreatePipeline("pack_f32_to_f16_pairs",(await A(async()=>{const{SHADERS:v}=await import("./gpu-ops-DKsrMEcC.js").then(C=>C.b);return{SHADERS:v}},[],import.meta.url)).SHADERS.pack_f32_to_f16_pairs),U=Math.ceil(r*t/2),g=this.gpu.createBufferFromData(`${s}_pparams`,new Uint32Array([U]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),P=this.gpu.createBindGroup(y,0,[h,c,g]),S=this.gpu.device.createCommandEncoder(),b=S.beginComputePass();return b.setPipeline(y),b.setBindGroup(0,P),b.dispatchWorkgroups(Math.ceil(U/256)),b.end(),this.gpu.device.queue.submit([S.finish()]),await this.gpu.device.queue.onSubmittedWorkDone(),h.destroy(),p.destroy(),g.destroy(),{qweight:o,scales:c}}async initMTP(a){if(this.mtp)return;const e=this.hiddenSize,t=this.intermediateSize,s=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,i=performance.now(),n="Qwen/Qwen3.5-2B";console.log(`[MTP] Downloading MTP weights from ${n}...`);const{loadMTPWeights:r}=await A(async()=>{const{loadMTPWeights:B}=await import("./safetensors-loader-CwGm5mJX.js");return{loadMTPWeights:B}},[],import.meta.url),u=await r(n,a);console.log(`[MTP] Downloaded ${Object.keys(u).length} MTP tensors`),this.mtp={weights:{},qweights:{}};const o={};for(const[B,q]of Object.entries(u)){const _=this.gpu.createBufferFromData(`mtp_${B}`,q.data);o[B]=_,this.mtp.weights[B]=_}const h=[{name:"mtp.fc.weight",K:e*2,N:e},{name:"mtp.layers.0.self_attn.q_proj.weight",K:e,N:this.numHeads*this.headDim*2},{name:"mtp.layers.0.self_attn.k_proj.weight",K:e,N:this.numKVHeads*this.headDim},{name:"mtp.layers.0.self_attn.v_proj.weight",K:e,N:this.numKVHeads*this.headDim},{name:"mtp.layers.0.self_attn.o_proj.weight",K:e,N:e},{name:"mtp.layers.0.mlp.gate_proj.weight",K:e,N:t},{name:"mtp.layers.0.mlp.up_proj.weight",K:e,N:t},{name:"mtp.layers.0.mlp.down_proj.weight",K:t,N:e}];for(const{name:B,K:q,N:_}of h){const{qweight:l,scales:d}=await this._quantizeBF16Weight(o[B],q,_,`mtp_${B}`);this.mtp.qweights[B]={qweight:l,scales:d},o[B].destroy(),delete this.mtp.weights[B]}this.mtp.normRaw={};const m=["mtp.pre_fc_norm_hidden.weight","mtp.pre_fc_norm_embedding.weight","mtp.layers.0.input_layernorm.weight","mtp.layers.0.post_attention_layernorm.weight","mtp.norm.weight","mtp.layers.0.self_attn.q_norm.weight","mtp.layers.0.self_attn.k_norm.weight"];for(const B of m){const q=u[B];q&&(this.mtp.normRaw[B]=new Uint32Array(q.data.buffer.slice(q.data.byteOffset,q.data.byteOffset+q.data.byteLength)))}const f=4096*this.numKVHeads*this.headDim*4;this.mtp.kvCache={keys:this.gpu.createBuffer("mtp_kv_keys",f,s),values:this.gpu.createBuffer("mtp_kv_values",f,s)},this.mtp.concatBuf=this.gpu.createBuffer("mtp_concat",e*2*4,s),this.mtp.fcOut=this.gpu.createBuffer("mtp_fc_out",e*4,s),this.mtp.savedHidden=this.gpu.createBuffer("mtp_saved_hidden",e*4,s),this.mtp.savedMlpOut=this.gpu.createBuffer("mtp_saved_mlp_out",e*4,s),this.mtp.seqLen=0,this._buildMTPSplitQKNormUniform(),this._buildMTPFusedMLPUniform(),await this._buildTrimmedLmHead(8e3);const p=((performance.now()-i)/1e3).toFixed(1),w=h.length;console.log(`[MTP] Initialized in ${p}s: ${w} INT4 weights, 1 KV cache layer`)}_buildMTPSplitQKNormUniform(){const a=this.numHeads,e=this.numKVHeads,t=this.headDim,s=a*t,i=e*t,n=(s+i)/2,u=32+Math.ceil(n/4)*16,o=new ArrayBuffer(u),h=new DataView(o);h.setUint32(0,a,!0),h.setUint32(4,e,!0),h.setUint32(8,t,!0),h.setFloat32(12,this.rmsEps,!0),h.setUint32(16,0,!0),h.setUint32(20,0,!0),h.setUint32(24,0,!0),h.setUint32(28,0,!0);const m=this.mtp.normRaw["mtp.layers.0.self_attn.q_norm.weight"],c=this.mtp.normRaw["mtp.layers.0.self_attn.k_norm.weight"];if(m)for(let p=0;p<s/2;p++){const w=Math.floor(p/4),B=p%4;h.setUint32(32+w*16+B*4,m[p],!0)}if(c){const p=s/2;for(let w=0;w<i/2;w++){const B=p+w,q=Math.floor(B/4),_=B%4;h.setUint32(32+q*16+_*4,c[w],!0)}}const f=this.gpu.device.createBuffer({size:u,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"mtp_fused_sqk"});this.gpu.device.queue.writeBuffer(f,0,new Uint8Array(o)),this.mtp.fusedSQKParams=f}_buildMTPFusedMLPUniform(){}async _buildTrimmedLmHead(a=8e3){const e=this.hiddenSize,t=this.gpu,s=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,i=new Uint32Array(a);for(let y=0;y<a;y++)i[y]=y;this.mtp.trimmedToFull=i,this.mtp.trimmedVocabSize=a;const n=t.createBufferFromData("mtp_trim_indices",i),u=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight",o=this.weights[u],h=e/2,m=a*h*4,c=t.createBuffer("mtp_trim_gathered",m,s),f=(await A(async()=>{const{SHADERS:y}=await import("./gpu-ops-DKsrMEcC.js").then(U=>U.b);return{SHADERS:y}},[],import.meta.url)).SHADERS.gather_rows_bf16,p=t.getOrCreatePipeline("gather_rows_bf16",f),w=t.createBufferFromData("mtp_trim_gparams",new Uint32Array([h,a]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),B=t.createBindGroup(p,0,[o,n,c,w]),q=t.device.createCommandEncoder(),_=q.beginComputePass();_.setPipeline(p),_.setBindGroup(0,B),_.dispatchWorkgroups(Math.ceil(h/256),a),_.end(),t.device.queue.submit([q.finish()]);const{qweight:l,scales:d}=await this._quantizeBF16Weight(c,e,a,"mtp_trim_lmhead");this.mtp.trimmedLmHead={qweight:l,scales:d},this.mtp.trimmedLogits=t.createBuffer("mtp_trimmed_logits",a*4,s),c.destroy(),n.destroy(),w.destroy(),console.log(`[MTP] Trimmed lm_head: ${a} tokens, ${(l.size/1024/1024).toFixed(1)}MB qw + ${(d.size/1024/1024).toFixed(1)}MB sc`)}_dispatchTrimmedLmHead(){const a=this.hiddenSize,e=this.mtp.trimmedVocabSize,t=this.mtp.trimmedLmHead,i=a/this.groupSize%4===0,n=this.makeUniform(`mv_${a}_${e}`,[a,e,this.groupSize]);if(i){const r=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";this.runCached("mtp_trim_lmhead",r,[this.normed,t.qweight,t.scales,this.mtp.trimmedLogits,n],this.wg8(e))}else{const r=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";this.runCached("mtp_trim_lmhead",r,[this.normed,t.qweight,t.scales,this.mtp.trimmedLogits,n],this.wg4(e))}}_mtpGetQWeight(a){return this.mtp.qweights[a]}_mtpGptqMatvec(a,e,t,s,i){const n=this._mtpGetQWeight(t);if(!n)throw new Error(`MTP weight not found: ${t}`);const u=s/this.groupSize%4===0,o=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);if(u){const h=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";this.runCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,o],this.wg8(i))}else{const h=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";this.runCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,o],this.wg4(i))}}_mtpSaveHidden(a,e){this.mtp&&(this.gpu.copyBuffer(a,this.mtp.savedHidden,this.hiddenSize*4),this.gpu.copyBuffer(e,this.mtp.savedMlpOut,this.hiddenSize*4))}async mtpForward(a){if(!this.mtp)throw new Error("MTP not initialized. Call initMTP() first.");return this.mtpForwardSubmit(a),await this._readAndSample()}mtpForwardSubmit(a){const e=this.hiddenSize,t=this.intermediateSize,s=this.mtp.seqLen;this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embedding(a);const i=this.makeUniform("add_rmsnorm_params",[e,this.rmsEps]);this.runCached("mtp_emb_norm","rmsnorm",[this.hidden,this.mtp.weights["mtp.pre_fc_norm_embedding.weight"],this.normed,i],1),this.gpu.copyBuffer(this.normed,this.mtp.concatBuf,e*4),this.runCached("mtp_hid_norm","add_rmsnorm_ro",[this.mtp.savedHidden,this.mtp.savedMlpOut,this.mtp.weights["mtp.pre_fc_norm_hidden.weight"],this.normed,i],1),this.gpu.copyBuffer(this.normed,this.mtp.concatBuf,e*4,0,e*4),this._mtpGptqMatvec(this.mtp.concatBuf,this.mtp.fcOut,"mtp.fc.weight",e*2,e),this.runCached("mtp_input_norm","rmsnorm",[this.mtp.fcOut,this.mtp.weights["mtp.layers.0.input_layernorm.weight"],this.normed,i],1);const n="mtp.layers.0.self_attn",r=this._mtpGptqMatvecOp(this.normed,this.qProjFull,`${n}.q_proj.weight`,e,this.numHeads*this.headDim*2),u=this._mtpGptqMatvecOp(this.normed,this.kProj,`${n}.k_proj.weight`,e,this.numKVHeads*this.headDim),o=this._mtpGptqMatvecOp(this.normed,this.vProj,`${n}.v_proj.weight`,e,this.numKVHeads*this.headDim);this.gpu.dispatchMulti([r,u,o].filter(Boolean)),this._gqaDv.setUint32(0,s,!0),this._gqaDv.setUint32(4,s,!0),this._gqaDv.setUint32(8,s,!0),this._gqaDv.setUint32(12,s,!0),this.gpu.device.queue.writeBuffer(this.mtp.fusedSQKParams,16,this._gqaData,0,16),this.runCached("mtp_sqk",this._splitQKNormShaderKey,[this.qProjFull,this.kProj,this.vProj,this.qProj,this.qGate,this.mtp.kvCache.keys,this.mtp.kvCache.values,this.mtp.fusedSQKParams],this.numHeads+this.numKVHeads);const h=s+1;this._gqaDv.setUint32(0,h,!0),this._gqaDv.setUint32(4,this.headDim,!0),this._gqaDv.setUint32(8,this.numKVHeads,!0),this._gqaDv.setUint32(12,this.numHeads,!0),this._gqaDv.setUint32(16,this.numHeads/this.numKVHeads,!0),this._gqaDv.setUint32(20,this.partialDim,!0),this._gqaDv.setUint32(24,s,!0),this._gqaDv.setFloat32(28,this.ropeTheta,!0),this.gpu.device.queue.writeBuffer(this._gqaParamBuf,0,this._gqaData),this.runCached("mtp_gqa","gqa_attention_head",[this.qProj,this.mtp.kvCache.keys,this.mtp.kvCache.values,this.attnOut,this._gqaParamBuf],this.numHeads);const m=this._mtpGetQWeight(`${n}.o_proj.weight`),c=this.numHeads*this.headDim,f=this.makeUniform(`fused_sig_mv_${c}_${e}`,[c,e,this.groupSize]);c/this.groupSize%4===0&&this.gpu.hasF16?this.runCached("mtp_sig_oproj","fused_sigmoid_gptq_4t_f16",[this.attnOut,this.qGate,m.qweight,m.scales,this.qProj,f],this.wg8(e)):this.gpu.hasF16?this.runCached("mtp_sig_oproj","fused_sigmoid_gptq_f16",[this.attnOut,this.qGate,m.qweight,m.scales,this.qProj,f],this.wg4(e)):this.runCached("mtp_sig_oproj","fused_sigmoid_gptq",[this.attnOut,this.qGate,m.qweight,m.scales,this.qProj,f],this.wg4(e)),this.runCached("mtp_post_norm","add_rmsnorm_ro",[this.mtp.fcOut,this.qProj,this.mtp.weights["mtp.layers.0.post_attention_layernorm.weight"],this.normed,i],1);const B=this.gpu.hasF16?"fused_gate_up_silu_f16":"fused_gate_up_silu",q=this.makeUniform(`fused_mv_${e}_${t}`,[e,t,this.groupSize]),_=this.mtp.qweights["mtp.layers.0.mlp.gate_proj.weight"],l=this.mtp.qweights["mtp.layers.0.mlp.up_proj.weight"];this.runCached("mtp_gate_up_silu",B,[this.normed,_.qweight,_.scales,l.qweight,l.scales,this.mlpIntermediate,q],this.wg4(t)),this._mtpGptqMatvec(this.mlpIntermediate,this.mlpOut,"mtp.layers.0.mlp.down_proj.weight",t,e),this.runCached("mtp_final_norm","three_way_add_rmsnorm",[this.mtp.fcOut,this.qProj,this.mlpOut,this.mtp.weights["mtp.norm.weight"],this.hidden,this.normed,i],1),this._dispatchLmHead();const d=this.makeUniform("mtp_argmax_params",[this.vocabSize]);this.runCached("mtp_argmax","argmax",[this.logits,this.argmaxResult,d],1),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8),this.gpu.endBatch(),this.mtp.seqLen++}_mtpSnapshotDeltaNet(){if(!this.mtp._deltaNetSnapshot){const e=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC;this.mtp._deltaNetSnapshot={states:{},convs:{}};for(let t=0;t<this.numLayers;t++)this.layerTypes[t]==="linear_attention"&&(this.mtp._deltaNetSnapshot.states[t]=this.gpu.createBuffer(`mtp_snap_state_${t}`,this.linState[t].size,e),this.mtp._deltaNetSnapshot.convs[t]=this.gpu.createBuffer(`mtp_snap_conv_${t}`,this.linConvHist[t].size,e))}const a=this.mtp._deltaNetSnapshot;a.mtpSeqLen=this.mtp.seqLen;for(const e of Object.keys(a.states))this.gpu.copyBuffer(this.linState[e],a.states[e],this.linState[e].size),this.gpu.copyBuffer(this.linConvHist[e],a.convs[e],this.linConvHist[e].size)}_mtpRestoreDeltaNet(){const a=this.mtp._deltaNetSnapshot;if(a){a.mtpSeqLen!==void 0&&(this.mtp.seqLen=a.mtpSeqLen);for(const e of Object.keys(a.states))this.gpu.copyBuffer(a.states[e],this.linState[e],this.linState[e].size),this.gpu.copyBuffer(a.convs[e],this.linConvHist[e],this.linConvHist[e].size)}}async generateWithMTP(a,e=512,t){if(!this.mtp)throw new Error("MTP not initialized. Call initMTP() first.");this.seqLen=0,this.mtp.seqLen=0,this._recentTokenCount=0;const s=[...a],i=this.textCfg.linear_num_key_heads,n=this.textCfg.linear_key_head_dim,r=this.textCfg.linear_value_head_dim,o=this.linValueHeads/i*r,h=i*(n+n+o);for(let g=0;g<this.numLayers;g++)if(this.layerTypes[g]==="linear_attention"){const P=i*n*o*4,S=3*h*4;this.gpu.device.queue.writeBuffer(this.linState[g],0,new Uint8Array(P)),this.gpu.device.queue.writeBuffer(this.linConvHist[g],0,new Uint8Array(S))}let m=null;for(let g=0;g<a.length;g++)m=await this.forward(a[g],g),this.seqLen=g+1;s.push(m);const c=this.config.eos_token_id??this.textCfg.eos_token_id,f=Array.isArray(c)?c:c!=null?[c]:[248044,248046];if(t?.(m,0)||f.includes(m))return s;let w=1,B=0,q=0,_=m,l=0,d=0;for(;w<e;){const g=performance.now(),P=await this.mtpForward(_);this._mtpSnapshotDeltaNet();const S=this.seqLen,b=await this.forwardB2(_,P,this.seqLen);this.seqLen+=2;const v=b[0],C=b[1];if(v===P){B++,s.push(P),w++;let k=t?.(P,w-1);if(k||f.includes(P)||(s.push(C),w++,k=t?.(C,w-1),k||f.includes(C)))break;_=C}else{q++,this._mtpRestoreDeltaNet(),this.seqLen=S;const k=await this.forward(_,this.seqLen);if(this.seqLen++,s.push(k),w++,t?.(k,w-1)||f.includes(k))break;_=k}const D=performance.now();if(l+=D-g,d++,d%25===0){const k=B/(B+q)*100,z=w/d;console.log(`[MTP @${d}] ${(l/d).toFixed(1)}ms/step, ${(w/(l/1e3)).toFixed(0)} tok/s, accept=${k.toFixed(0)}%, ${z.toFixed(1)} tok/step`)}}const y=B/Math.max(1,B+q)*100;return console.log(`[MTP final] ${(w/((l||1)/1e3)).toFixed(0)} tok/s, accept=${y.toFixed(0)}% (${B}/${B+q}), ${w} tokens`),s}_mtpGptqMatvecOp(a,e,t,s,i){const n=this._mtpGetQWeight(t);if(!n)return null;const u=s/this.groupSize%4===0,o=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);if(u){const h=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";return this.prepOpCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,o],this.wg8(i))}else{const h=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";return this.prepOpCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,o],this.wg4(i))}}}export{N as Qwen35Model};
 
 
assets/qwen35-model-DrnSsmhP.js ADDED
@@ -0,0 +1 @@
 
 
1
+ import{S as H,a as W,_ as A}from"./gpu-ops-BbLjsC0p.js";function K(E){const a=E<<16,e=new ArrayBuffer(4);new Uint32Array(e)[0]=a,new Float32Array(e)[0];const t=a>>>31&1,s=a>>>23&255,i=a&8388607;if(s===0)return t<<15;if(s===255)return t<<15|31744|(i?512:0);const n=s-127+15;return n>=31?t<<15|31744:n<=0?t<<15:t<<15|n<<10|i>>>13}class N{constructor(a,e,t){if(this.gpu=a,this.config=e,this.textCfg=e.text_config,this.quantConfig=t,this.hiddenSize=this.textCfg.hidden_size,this.intermediateSize=this.textCfg.intermediate_size,this.numLayers=this.textCfg.num_hidden_layers,this.numHeads=this.textCfg.num_attention_heads,this.numKVHeads=this.textCfg.num_key_value_heads,this.headDim=this.textCfg.head_dim,this.vocabSize=this.textCfg.vocab_size,this.rmsEps=this.textCfg.rms_norm_eps,this.ropeTheta=this.textCfg.rope_parameters?.rope_theta||1e7,this.partialRotary=this.textCfg.partial_rotary_factor,this.partialDim=Math.floor(this.headDim*this.partialRotary),this.mropeSection=this.textCfg.rope_parameters?.mrope_section||[11,11,10],this.layerTypes=this.textCfg.layer_types,this.groupSize=t?.group_size||t?.config_groups?.group_0?.weights?.group_size||128,this.fp16Layers=new Set,t?.extra_config)for(const u of Object.keys(t.extra_config))this.fp16Layers.add(u);this.weights={},this.kvCache={},this.seqLen=0,this.pipelines={},this.singlePassMode=!0,this._replayFlat=null,this._forceMinSplits=0,this._ropeDelta=0;const s=this.numHeads*this.headDim,i=this.numKVHeads*this.headDim,n=(s+i)/2,r=Math.ceil(n/4);this._splitQKNormShaderKey=`fused_split_qknorm_kvstore_${r}`,H[this._splitQKNormShaderKey]||(H[this._splitQKNormShaderKey]=W(r))}compilePipelines(){const a=this.gpu;for(const[e,t]of Object.entries(H))this.pipelines[e]=a.getOrCreatePipeline(e,t)}uploadTensors(a){this.quantConfig?.quant_method==="compressed-tensors"&&(a=this._convertCompressedTensors(a));for(const[t,s]of Object.entries(a)){let i=s.data;if(s.dtype==="F32"&&!t.endsWith(".qweight")&&!t.endsWith(".qzeros")&&!t.endsWith(".scales")){const n=new Float32Array(i.buffer,i.byteOffset,i.byteLength/4),r=new Uint16Array(n.length);for(let u=0;u<n.length;u++){const o=new Uint32Array(n.buffer,n.byteOffset+u*4,1)[0];r[u]=o>>>16}i=new Uint8Array(r.buffer)}if(s._partial){let{offset:n,totalSize:r}=s._partial;if(s.dtype==="F32"&&!t.endsWith(".qweight")&&!t.endsWith(".qzeros")&&!t.endsWith(".scales")&&(n/=2,r/=2),n===0){const o=this.gpu.createBuffer(t,r,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC);this.gpu.device.queue.writeBuffer(o,0,i),this.weights[t]=o}else{const o=this.weights[t];o&&this.gpu.device.queue.writeBuffer(o,n,i)}}else this.weights[t]=this.gpu.createBufferFromData(t,i);(t.includes("post_attention_layernorm.weight")||t.includes("input_layernorm.weight")||t.includes("q_norm.weight")||t.includes("k_norm.weight")||t==="model.language_model.norm.weight")&&(this._normWeightRaw||(this._normWeightRaw={}),this._normWeightRaw[t]=new Uint32Array(i.buffer.slice(i.byteOffset,i.byteOffset+i.byteLength))),this._uploadedCount=(this._uploadedCount||0)+1}}_convertCompressedTensors(a){const e={};for(const[t,s]of Object.entries(a))if(!t.endsWith(".weight_shape")){if(t.endsWith(".weight_packed")){const i=t.slice(0,-14),n=s.shape,r=n[0],u=n[1],o=new Int32Array(s.data.buffer,s.data.byteOffset,s.data.byteLength/4),h=new Int32Array(r*u);for(let c=0;c<r;c++)for(let m=0;m<u;m++)h[m*r+c]=o[c*u+m];e[`${i}.qweight`]={dtype:"I32",shape:[u,r],data:new Uint8Array(h.buffer)};continue}if(t.endsWith(".weight_scale")){const i=t.slice(0,-13),n=s.shape,r=n[0],u=n[1],o=new Uint16Array(s.data.buffer,s.data.byteOffset,s.data.byteLength/2),h=Math.ceil(r/2),c=new Uint32Array(u*h);for(let m=0;m<u;m++)for(let f=0;f<r;f+=2){const p=o[f*u+m],w=f+1<r?o[(f+1)*u+m]:0,B=K(p),q=K(w);c[m*h+(f>>1)]=B|q<<16}e[`${i}.scales`]={dtype:"I32",shape:[u,h],data:new Uint8Array(c.buffer)};continue}e[t]=s}return e}async postProcessWeights(){const a=this.textCfg.linear_num_key_heads||0,e=this.hiddenSize,t=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,s=this.layerTypes.indexOf("linear_attention"),i=s>=0?`model.language_model.layers.${s}.linear_attn`:"";if(this.abQuantized=i&&!!this.weights[`${i}.in_proj_a.qweight`],this.abQuantized){const g=this.textCfg.linear_num_value_heads??a;this.linAlpha=this.gpu.createBuffer("lin_alpha",g*4,t),this.linBeta=this.gpu.createBuffer("lin_beta",g*4,t)}else{this.linABWeight={};const g=this.textCfg.linear_num_value_heads??a,P=e*2,S=g*P,b=2*S;for(let v=0;v<this.numLayers;v++)if(this.layerTypes[v]==="linear_attention"){const C=`model.language_model.layers.${v}.linear_attn`,D=this.weights[`${C}.in_proj_a.weight`],k=this.weights[`${C}.in_proj_b.weight`];if(D&&k){const O=this.gpu.createBuffer(`ab_merged_${v}`,b,t),$=this.gpu.device.createCommandEncoder();$.copyBufferToBuffer(D,0,O,0,S),$.copyBufferToBuffer(k,0,O,S,S),this.gpu.device.queue.submit([$.finish()]),this.linABWeight[v]=O}}}{const g=[];for(let b=0;b<this.numLayers;b++){if(this.layerTypes[b]==="linear_attention"){const v=`model.language_model.layers.${b}.linear_attn`,C=this.textCfg.linear_num_key_heads||0,D=this.textCfg.linear_key_head_dim||128,k=this.textCfg.linear_value_head_dim||128,O=this.textCfg.linear_num_value_heads??C,$=O*k,z=O/C*k,M=C*(D+D+z);g.push({prefix:`${v}.in_proj_qkv`,K:e,N:M}),g.push({prefix:`${v}.in_proj_z`,K:e,N:$}),g.push({prefix:`${v}.out_proj`,K:$,N:e})}else{const v=`model.language_model.layers.${b}.self_attn`,C=this.numHeads*this.headDim*2,D=this.numKVHeads*this.headDim;g.push({prefix:`${v}.q_proj`,K:e,N:C}),g.push({prefix:`${v}.k_proj`,K:e,N:D}),g.push({prefix:`${v}.v_proj`,K:e,N:D}),g.push({prefix:`${v}.o_proj`,K:this.numHeads*this.headDim,N:e})}g.push({prefix:`model.language_model.layers.${b}.mlp.gate_proj`,K:e,N:this.intermediateSize}),g.push({prefix:`model.language_model.layers.${b}.mlp.up_proj`,K:e,N:this.intermediateSize}),g.push({prefix:`model.language_model.layers.${b}.mlp.down_proj`,K:this.intermediateSize,N:e})}let P=0;const S=performance.now();for(const{prefix:b,K:v,N:C}of g)if(!this.weights[`${b}.qweight`]&&this.weights[`${b}.weight`]){const{qweight:D,scales:k}=await this._quantizeBF16ToINT4(this.weights[`${b}.weight`],v,C,this.groupSize,b.replace(/\./g,"_"));this.weights[`${b}.qweight`]=D,this.weights[`${b}.scales`]=k,P++}P>0&&console.log(`[QUANT] GPU-quantized ${P} BF16 projections to INT4 in ${(performance.now()-S).toFixed(0)}ms`)}this._mergedGateUp={};const n=this.intermediateSize,r=e/8,u=r*n*4,h=r/(this.groupSize/8)*n*2;for(let g=0;g<this.numLayers;g++){const P=`model.language_model.layers.${g}.mlp`,S=this.getQWeight(`${P}.gate_proj`),b=this.getQWeight(`${P}.up_proj`);if(S.qweight&&b.qweight){const v=this.gpu.createBuffer(`merged_qw_${g}`,u*2,t),C=this.gpu.createBuffer(`merged_sc_${g}`,h*2,t),D=this.gpu.device.createCommandEncoder();D.copyBufferToBuffer(S.qweight,0,v,0,u),D.copyBufferToBuffer(b.qweight,0,v,u,u),D.copyBufferToBuffer(S.scales,0,C,0,h),D.copyBufferToBuffer(b.scales,0,C,h,h),this.gpu.device.queue.submit([D.finish()]),this._mergedGateUp[g]={qweight:v,scales:C}}}this._fusedMLPParams={};const c=16+512*16;for(let g=0;g<this.numLayers;g++){const P=`model.language_model.layers.${g}.post_attention_layernorm.weight`,S=this._normWeightRaw?.[P];if(!S||!this._mergedGateUp[g])continue;const b=new ArrayBuffer(c),v=new Uint32Array(b),C=new Float32Array(b);v[0]=e,v[1]=n,v[2]=this.groupSize,C[3]=this.rmsEps;for(let D=0;D<S.length;D++)v[4+D]=S[D];this._fusedMLPParams[g]=this.gpu.createBufferFromData(`fused_mlp_params_${g}`,new Uint32Array(b),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}this._fusedSQKParams={};const m=this.headDim,f=this.numHeads,p=this.numKVHeads,w=f*m,B=p*m,q=(w+B)/2,_=Math.ceil(q/4),l=32+_*16,d=this.mropeSection[1]*3,y=this.mropeSection[2]*3,U=`fused_split_qknorm_kvstore_${_}`;H[U]||(H[U]=W(_,this.ropeTheta,d,y,this.partialDim)),this.pipelines[U]||(this.pipelines[U]=this.gpu.getOrCreatePipeline(U,H[U])),this._splitQKNormShaderKey=U;for(let g=0;g<this.numLayers;g++){if(this.layerTypes[g]!=="full_attention")continue;const P=`model.language_model.layers.${g}.self_attn`,S=`${P}.q_norm.weight`,b=`${P}.k_norm.weight`,v=this._normWeightRaw?.[S],C=this._normWeightRaw?.[b],D=new ArrayBuffer(l),k=new DataView(D);if(k.setUint32(0,f,!0),k.setUint32(4,p,!0),k.setUint32(8,m,!0),k.setFloat32(12,this.rmsEps,!0),k.setUint32(16,0,!0),k.setUint32(20,0,!0),k.setUint32(24,0,!0),k.setUint32(28,0,!0),v)for(let $=0;$<w/2;$++){const z=Math.floor($/4),M=$%4;k.setUint32(32+z*16+M*4,v[$],!0)}if(C){const $=w/2;for(let z=0;z<B/2;z++){const M=$+z,G=Math.floor(M/4),R=M%4;k.setUint32(32+G*16+R*4,C[z],!0)}}const O=this.gpu.device.createBuffer({size:l,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:`fused_sqk_${g}`});this.gpu.device.queue.writeBuffer(O,0,new Uint8Array(D)),this._fusedSQKParams[g]=O}await this._quantizeLmHead()}async _quantizeLmHead(){const e=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight";if(!this.weights[e])return;const t=this.hiddenSize,s=this.vocabSize,i=this.groupSize,n=t/8,r=t/i,u=performance.now(),o=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,h=this.weights[e];this._lmHeadQWeight=this.gpu.createBuffer("lmhead_qweight",n*s*4,o);const c=this.gpu.createBuffer("lmhead_scales_f32",r*s*4,o),m=Math.ceil(r*s/2)*4;this._lmHeadScales=this.gpu.createBuffer("lmhead_scales",m,o);const f=this.gpu.getOrCreatePipeline("quantize_bf16_to_int4",(await A(async()=>{const{SHADERS:D}=await import("./gpu-ops-BbLjsC0p.js").then(k=>k.b);return{SHADERS:D}},[])).SHADERS.quantize_bf16_to_int4),p=this.gpu.createBufferFromData("quant_params",new Uint32Array([t,s,i]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),w=this.gpu.createBindGroup(f,0,[h,this._lmHeadQWeight,c,p]),B=65535,q=Math.min(s,B),_=Math.ceil(s/B),l=this.gpu.device.createCommandEncoder(),d=l.beginComputePass();d.setPipeline(f),d.setBindGroup(0,w),d.dispatchWorkgroups(q,_),d.end(),this.gpu.device.queue.submit([l.finish()]);const y=this.gpu.getOrCreatePipeline("pack_f32_to_f16_pairs",(await A(async()=>{const{SHADERS:D}=await import("./gpu-ops-BbLjsC0p.js").then(k=>k.b);return{SHADERS:D}},[])).SHADERS.pack_f32_to_f16_pairs),U=Math.ceil(r*s/2),g=this.gpu.createBufferFromData("pack_params",new Uint32Array([U]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),P=this.gpu.createBindGroup(y,0,[c,this._lmHeadScales,g]),S=this.gpu.device.createCommandEncoder(),b=S.beginComputePass();b.setPipeline(y),b.setBindGroup(0,P),b.dispatchWorkgroups(Math.ceil(U/256)),b.end(),this.gpu.device.queue.submit([S.finish()]),await this.gpu.device.queue.onSubmittedWorkDone(),c.destroy(),p.destroy(),g.destroy();const v=(n*s*4/1e6).toFixed(0),C=(m/1e6).toFixed(0);console.log(`[QUANT] lm_head INT4 (GPU): ${(performance.now()-u).toFixed(0)}ms, ${v}MB qw + ${C}MB sc`)}async _quantizeBF16ToINT4(a,e,t,s,i){const n=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,r=e/8,u=e/s,o=this.gpu.createBuffer(`${i}_qweight`,r*t*4,n),h=this.gpu.createBuffer(`${i}_scales_f32`,u*t*4,n),c=Math.ceil(u*t/2)*4,m=this.gpu.createBuffer(`${i}_scales`,c,n),f=this.gpu.getOrCreatePipeline("quantize_bf16_to_int4",(await A(async()=>{const{SHADERS:v}=await import("./gpu-ops-BbLjsC0p.js").then(C=>C.b);return{SHADERS:v}},[])).SHADERS.quantize_bf16_to_int4),p=this.gpu.createBufferFromData(`${i}_qp`,new Uint32Array([e,t,s]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),w=this.gpu.createBindGroup(f,0,[a,o,h,p]),B=65535,q=Math.min(t,B),_=Math.ceil(t/B),l=this.gpu.device.createCommandEncoder(),d=l.beginComputePass();d.setPipeline(f),d.setBindGroup(0,w),d.dispatchWorkgroups(q,_),d.end(),this.gpu.device.queue.submit([l.finish()]);const y=this.gpu.getOrCreatePipeline("pack_f32_to_f16_pairs",(await A(async()=>{const{SHADERS:v}=await import("./gpu-ops-BbLjsC0p.js").then(C=>C.b);return{SHADERS:v}},[])).SHADERS.pack_f32_to_f16_pairs),U=Math.ceil(u*t/2),g=this.gpu.createBufferFromData(`${i}_pp`,new Uint32Array([U]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),P=this.gpu.createBindGroup(y,0,[h,m,g]),S=this.gpu.device.createCommandEncoder(),b=S.beginComputePass();return b.setPipeline(y),b.setBindGroup(0,P),b.dispatchWorkgroups(Math.ceil(U/256)),b.end(),this.gpu.device.queue.submit([S.finish()]),await this.gpu.device.queue.onSubmittedWorkDone(),h.destroy(),p.destroy(),g.destroy(),{qweight:o,scales:m}}initBuffers(a){a||(a=this.textCfg.max_position_embeddings||4096),this.maxSeqLen=a;const e=this.gpu,t=this.hiddenSize,s=this.intermediateSize,i=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC;this.hidden=e.createBuffer("hidden_a",t*4,i),this.hiddenB=e.createBuffer("hidden_b",t*4,i),this.normed=e.createBuffer("normed",t*4,i),this.normedB=e.createBuffer("normed_b",t*4,i),this.mlpIntermediate=e.createBuffer("mlp_inter",s*4,i),this.mlpOut=e.createBuffer("mlp_out",t*4,i),this.logits=e.createBuffer("logits",this.vocabSize*4,i),this.argmaxResult=e.createBuffer("argmax_result",8,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC),this._argmaxReadback=e.device.createBuffer({size:8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"argmax_readback"}),this._topkResult=e.createBuffer("topk_result",2048,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this._repMaxTokens=64,this._recentTokens=new Uint32Array(this._repMaxTokens),this._recentTokenCount=0,this._gpuRecentTokens=e.createBuffer("recent_tokens",this._repMaxTokens*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST);for(let q=0;q<this.numLayers;q++)this.layerTypes[q]==="full_attention"&&(this.kvCache[q]={keys:e.createBuffer(`kv_k_${q}`,a*this.numKVHeads*this.headDim*4),values:e.createBuffer(`kv_v_${q}`,a*this.numKVHeads*this.headDim*4)});this.qProjFull=e.createBuffer("q_proj_full",this.numHeads*this.headDim*2*4,i),this.qProj=e.createBuffer("q_proj_out",this.numHeads*this.headDim*4,i),this.qGate=e.createBuffer("q_gate",this.numHeads*this.headDim*4,i),this.kProj=e.createBuffer("k_proj_out",this.numKVHeads*this.headDim*4,i),this.vProj=e.createBuffer("v_proj_out",this.numKVHeads*this.headDim*4,i);const n=this.textCfg.linear_num_key_heads,r=this.textCfg.linear_key_head_dim,u=this.textCfg.linear_value_head_dim,o=this.textCfg.linear_num_value_heads??n,c=o/n*u,m=n*(r+r+c),f=o*u;this.linValueDim=f,this.linValueHeads=o,this.linQKV=e.createBuffer("lin_qkv",m*4,i),this.linZ=e.createBuffer("lin_z",f*4,i),this.linOut=e.createBuffer("lin_out",f*4,i);const p=Math.max(t,this.numHeads*this.headDim,f)*4;this.attnOut=e.createBuffer("attn_out",p,i),this._maxGqaSplits=64;const w=this.numHeads*this._maxGqaSplits*(this.headDim+2)*4;this._gqaPartials=e.createBuffer("gqa_partials",w,i),this.linState={},this.linConvHist={};for(let q=0;q<this.numLayers;q++)this.layerTypes[q]==="linear_attention"&&(this.linState[q]=e.createBuffer(`lin_state_${q}`,n*r*c*4,i),this.linConvHist[q]=e.createBuffer(`lin_conv_hist_${q}`,3*m*4,i));this.zeroBuf=e.createBuffer("zero_buf",t*4,i),this.useSplitK=!1,this.splitKSplits=1;const B=Math.max(t,s);this.splitKPartials=e.createBuffer("splitk_partials",B*this.splitKSplits*4,i),this.paramBufs={},this._gqaParamBuf=e.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"gqa_params"}),this._kvStoreParamBuf=e.device.createBuffer({size:16,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"kv_store_params"}),this._gqaData=new Uint8Array(32),this._gqaDv=new DataView(this._gqaData.buffer),this._kvData=new Uint8Array(16),this._kvDv=new DataView(this._kvData.buffer)}initVision(){const a=this.config.vision_config;if(!a){this.vision=null;return}const e=this.gpu,t=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,s=a.hidden_size,i=a.intermediate_size,n=a.out_hidden_size,r=a.depth,u=a.num_heads,o=s/u,h=a.patch_size,c=a.temporal_patch_size,m=a.spatial_merge_size,f=3*c*h*h,p=4096,w=s*m*m;this.vision={V:s,Vi:i,Vo:n,depth:r,heads:u,headDim:o,patchSize:h,temporalPatchSize:c,mergeSize:m,patchInputDim:f,maxVitTokens:p,mergedHidden:w,numPosEmbeddings:a.num_position_embeddings,numGridPerSide:Math.round(Math.sqrt(a.num_position_embeddings)),hidden:e.createBuffer("vit_hidden",p*s*4,t),hiddenB:e.createBuffer("vit_hidden_b",p*s*4,t),normed:e.createBuffer("vit_normed",p*s*4,t),q:e.createBuffer("vit_q",p*s*4,t),k:e.createBuffer("vit_k",p*s*4,t),v:e.createBuffer("vit_v",p*s*4,t),attnOut:e.createBuffer("vit_attn_out",p*s*4,t),mlpInter:e.createBuffer("vit_mlp_inter",p*i*4,t),mlpOut:e.createBuffer("vit_mlp_out",p*s*4,t),cos:e.createBuffer("vit_cos",p*o*4,t),sin:e.createBuffer("vit_sin",p*o*4,t),posEmbed:e.createBuffer("vit_pos_embed",p*s*4,t),merged:e.createBuffer("vit_merged",p/4*n*4,t),mergerNormed:e.createBuffer("vit_merger_normed",p*s*4,t),mergerInter:e.createBuffer("vit_merger_inter",p/4*w*4,t),patchInput:e.createBuffer("vit_patch_input",p*f*4,t),qkv:e.createBuffer("vit_qkv",p*3*s*4,t)},this.imageTokenId=this.config.image_token_id,this.visionStartTokenId=this.config.vision_start_token_id,this.visionEndTokenId=this.config.vision_end_token_id,console.log(`[VISION] Initialized: depth=${r}, hidden=${s}, heads=${u}, headDim=${o}, out=${n}`),console.log(`[VISION] Buffer allocation: ~${((p*s*4*12+p*i*4)/1024/1024).toFixed(0)} MB transient`)}async _readVisionPosEmbed(){const a=this.vision,e=this.weights["model.visual.pos_embed.weight"];if(!e)throw new Error("Vision pos_embed weight not found");const t=a.numPosEmbeddings,s=a.V,i=t*s*2,n=this.gpu.createReadbackBuffer("vit_pos_embed_readback",i),r=this.gpu.device.createCommandEncoder();r.copyBufferToBuffer(e,0,n,0,i),this.gpu.device.queue.submit([r.finish()]),await n.mapAsync(GPUMapMode.READ);const u=new Uint16Array(n.getMappedRange().slice(0));n.unmap(),n.destroy();const o=new Float32Array(t*s);for(let h=0;h<u.length;h++){const c=u[h]<<16,m=new ArrayBuffer(4);new Uint32Array(m)[0]=c,o[h]=new Float32Array(m)[0]}this._vitPosEmbedF32=o,console.log(`[VISION] Read pos_embed: ${t}×${s} (${(i/1024).toFixed(0)} KB)`)}_interpolatePosEmbed(a,e){const t=this.vision,s=t.V,i=t.numGridPerSide,n=this._vitPosEmbedF32,r=t.mergeSize,u=a*e,o=new Float32Array(u*s);for(let p=0;p<a;p++){const w=a===1?0:p*(i-1)/(a-1),B=Math.min(Math.floor(w),i-1),q=Math.min(B+1,i-1),_=w-B;for(let l=0;l<e;l++){const d=e===1?0:l*(i-1)/(e-1),y=Math.min(Math.floor(d),i-1),U=Math.min(y+1,i-1),g=d-y,P=B*i+y,S=B*i+U,b=q*i+y,v=q*i+U,C=(1-_)*(1-g),D=(1-_)*g,k=_*(1-g),O=_*g,$=p*e+l;for(let z=0;z<s;z++)o[$*s+z]=C*n[P*s+z]+D*n[S*s+z]+k*n[b*s+z]+O*n[v*s+z]}}const h=a/r,c=e/r,m=new Float32Array(u*s);let f=0;for(let p=0;p<h;p++)for(let w=0;w<c;w++)for(let B=0;B<r;B++)for(let q=0;q<r;q++){const _=p*r+B,l=w*r+q,d=_*e+l;m.set(o.subarray(d*s,d*s+s),f*s),f++}return m}_computeVisionRoPE(a,e){const t=this.vision,s=t.headDim/2,i=t.mergeSize,n=a/i,r=e/i,u=a*e,o=Math.max(a,e),h=new Float32Array(o*s);for(let p=0;p<o;p++)for(let w=0;w<s;w++){const B=1/Math.pow(1e4,2*w/t.headDim);h[p*s+w]=p*B}const c=new Float32Array(u*t.headDim),m=new Float32Array(u*t.headDim);let f=0;for(let p=0;p<n;p++)for(let w=0;w<r;w++)for(let B=0;B<i;B++)for(let q=0;q<i;q++){const _=p*i+B,l=w*i+q;for(let U=0;U<s;U++){const g=h[_*s+U];h[l*s+U];const P=f*t.headDim;U<s/2&&(c[P+U]=Math.cos(g),m[P+U]=Math.sin(g),c[P+s+U]=Math.cos(g),m[P+s+U]=Math.sin(g))}const d=f*t.headDim,y=s/2;for(let U=0;U<y;U++){const g=h[_*s+U],P=h[l*s+U];c[d+U]=Math.cos(g),c[d+y+U]=Math.cos(P),c[d+s+U]=Math.cos(g),c[d+s+y+U]=Math.cos(P),m[d+U]=Math.sin(g),m[d+y+U]=Math.sin(P),m[d+s+U]=Math.sin(g),m[d+s+y+U]=Math.sin(P)}f++}return{cos:c,sin:m}}async preprocessImage(a){const e=this.vision,t=e.patchSize,s=e.mergeSize,i=t*s,n=256*256,r=1280*1280,u=new Image;await new Promise((v,C)=>{u.onload=v,u.onerror=C,u.src=a});let{width:o,height:h}=u,c=1;o*h>r?c=Math.sqrt(r/(o*h)):o*h<n&&(c=Math.sqrt(n/(o*h)));let m=Math.round(o*c/i)*i,f=Math.round(h*c/i)*i;m=Math.max(i,m),f=Math.max(i,f);const w=new OffscreenCanvas(m,f).getContext("2d");w.drawImage(u,0,0,m,f);const q=w.getImageData(0,0,m,f).data,_=f/t,l=m/t,d=_*l,y=_/s,U=l/s,g=e.temporalPatchSize,P=3*g*t*t,S=new Float32Array(d*P);let b=0;for(let v=0;v<y;v++)for(let C=0;C<U;C++)for(let D=0;D<s;D++)for(let k=0;k<s;k++){const O=v*s+D,$=C*s+k,z=O*t,M=$*t,G=b*P;for(let R=0;R<g;R++)for(let F=0;F<3;F++)for(let T=0;T<t;T++)for(let x=0;x<t;x++){const V=((z+T)*m+(M+x))*4+F,j=q[V]/127.5-1,L=((F*g+R)*t+T)*t+x;S[G+L]=j}b++}return console.log(`[VISION] Preprocessed: ${o}x${h} → ${m}x${f}, ${d} patches (${_}x${l}), merge→${d/4} tokens`),{pixels:S,gridH:_,gridW:l,numPatches:d,imgW:m,imgH:f}}async visionForward(a){const e=this.vision,t=this.gpu,{pixels:s,gridH:i,gridW:n,numPatches:r}=a;this._vitPosEmbedF32||await this._readVisionPosEmbed(),t.device.queue.writeBuffer(e.patchInput,0,s);const u=this._interpolatePosEmbed(i,n);t.device.queue.writeBuffer(e.posEmbed,0,u);const{cos:o,sin:h}=this._computeVisionRoPE(i,n);t.device.queue.writeBuffer(e.cos,0,o),t.device.queue.writeBuffer(e.sin,0,h),t.beginBatch();const c=this.weights["model.visual.patch_embed.proj.weight"],m=this.weights["model.visual.patch_embed.proj.bias"],f=this.makeUniform("vit_patch_params",[e.patchInputDim,e.V]);t.dispatch(this.pipelines.vit_bf16_matvec_bias,[t.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[e.patchInput,c,m,e.hidden,f])],Math.ceil(e.V/32),r);const p=this.makeUniform("vit_add_len",[r*e.V]);t.dispatch(this.pipelines.vit_add,[t.createBindGroup(this.pipelines.vit_add,0,[e.hidden,e.posEmbed,p])],Math.ceil(r*e.V/256));for(let b=0;b<e.depth;b++)t.endBatch(),t.beginBatch(),this._vitBlock(b,r);const w=this.weights["model.visual.merger.norm.weight"],B=this.weights["model.visual.merger.norm.bias"],q=this.makeUniform("vit_merger_ln_params",[e.V,1e-6]);t.dispatch(this.pipelines.vit_layernorm,[t.createBindGroup(this.pipelines.vit_layernorm,0,[e.hidden,w,B,e.mergerNormed,q])],r);const _=r/4,l=this.weights["model.visual.merger.linear_fc1.weight"],d=this.weights["model.visual.merger.linear_fc1.bias"],y=this.makeUniform("vit_merger_fc1_params",[e.mergedHidden,e.mergedHidden]);t.dispatch(this.pipelines.vit_bf16_matvec_bias,[t.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[e.mergerNormed,l,d,e.mergerInter,y])],Math.ceil(e.mergedHidden/32),_);const U=this.makeUniform("vit_gelu_len",[_*e.mergedHidden]);t.dispatch(this.pipelines.vit_gelu,[t.createBindGroup(this.pipelines.vit_gelu,0,[e.mergerInter,U])],Math.ceil(_*e.mergedHidden/256));const g=this.weights["model.visual.merger.linear_fc2.weight"],P=this.weights["model.visual.merger.linear_fc2.bias"],S=this.makeUniform("vit_merger_fc2_params",[e.mergedHidden,e.Vo]);return t.dispatch(this.pipelines.vit_bf16_matvec_bias,[t.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[e.mergerInter,g,P,e.merged,S])],Math.ceil(e.Vo/32),_),t.endBatch(),await t.device.queue.onSubmittedWorkDone(),console.log(`[VISION] Forward done: ${r} patches → ${_} merged tokens (dim=${e.Vo})`),{numMergedTokens:_,embedBuffer:e.merged}}_vitBlock(a,e){const t=this.vision,s=this.gpu,i=`model.visual.blocks.${a}`,n=this.weights[`${i}.norm1.weight`],r=this.weights[`${i}.norm1.bias`],u=this.makeUniform(`vit_ln1_${a}`,[t.V,1e-6]);s.dispatch(this.pipelines.vit_layernorm,[s.createBindGroup(this.pipelines.vit_layernorm,0,[t.hidden,n,r,t.normed,u])],e);const o=this.weights[`${i}.attn.qkv.weight`],h=this.weights[`${i}.attn.qkv.bias`],c=t.V,m=c*c*2,f=c*2,p=this.makeUniform(`vit_qkv_mv_${a}`,[c,c]);for(let $=0;$<3;$++){const z=[t.q,t.k,t.v][$],M=s.createBindGroupWithOffsets(this.pipelines.vit_bf16_matvec_bias,0,[t.normed,{buffer:o,offset:$*m,size:m},{buffer:h,offset:$*f,size:f},z,p]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[M],Math.ceil(c/32),e)}const w=this.makeUniform(`vit_rope_${a}`,[e,t.heads,t.headDim]);s.dispatch(this.pipelines.vit_rope,[s.createBindGroup(this.pipelines.vit_rope,0,[t.q,t.k,t.attnOut,t.mlpOut,t.cos,t.sin,w])],Math.ceil(e*t.heads*t.headDim/256));const B=1/Math.sqrt(t.headDim),q=this.makeUniform(`vit_attn_${a}`,[e,t.heads,t.headDim,B]);s.dispatch(this.pipelines.vit_attention,[s.createBindGroup(this.pipelines.vit_attention,0,[t.attnOut,t.mlpOut,t.v,t.q,q])],e,t.heads);const _=this.weights[`${i}.attn.proj.weight`],l=this.weights[`${i}.attn.proj.bias`],d=this.makeUniform(`vit_proj_${a}`,[c,c]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[s.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[t.q,_,l,t.mlpOut,d])],Math.ceil(c/32),e);const y=this.makeUniform(`vit_res1_${a}`,[e*c]);s.dispatch(this.pipelines.vit_add,[s.createBindGroup(this.pipelines.vit_add,0,[t.hidden,t.mlpOut,y])],Math.ceil(e*c/256));const U=this.weights[`${i}.norm2.weight`],g=this.weights[`${i}.norm2.bias`];s.dispatch(this.pipelines.vit_layernorm,[s.createBindGroup(this.pipelines.vit_layernorm,0,[t.hidden,U,g,t.normed,u])],e);const P=this.weights[`${i}.mlp.linear_fc1.weight`],S=this.weights[`${i}.mlp.linear_fc1.bias`],b=this.makeUniform(`vit_mlp_fc1_${a}`,[c,t.Vi]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[s.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[t.normed,P,S,t.mlpInter,b])],Math.ceil(t.Vi/32),e);const v=this.makeUniform(`vit_gelu_${a}`,[e*t.Vi]);s.dispatch(this.pipelines.vit_gelu_tanh,[s.createBindGroup(this.pipelines.vit_gelu_tanh,0,[t.mlpInter,v])],Math.ceil(e*t.Vi/256));const C=this.weights[`${i}.mlp.linear_fc2.weight`],D=this.weights[`${i}.mlp.linear_fc2.bias`],k=this.makeUniform(`vit_mlp_fc2_${a}`,[t.Vi,c]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[s.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[t.mlpInter,C,D,t.mlpOut,k])],Math.ceil(c/32),e);const O=this.makeUniform(`vit_res2_${a}`,[e*c]);s.dispatch(this.pipelines.vit_add,[s.createBindGroup(this.pipelines.vit_add,0,[t.hidden,t.mlpOut,O])],Math.ceil(e*c/256))}computeMultimodalPositions(a,e,t,s){const i=this.vision.mergeSize,n=t/i,r=s/i,u=a.length,o=new Array(3);for(let f=0;f<3;f++)o[f]=new Int32Array(u);let h=0,c=0;for(let f=0;f<u;f++)if(a[f]===this.imageTokenId){const w=c,B=Math.floor(w/r),q=w%r;o[0][f]=h,o[1][f]=h+B,o[2][f]=h+q,c++,c===e&&(h+=Math.max(n,r))}else o[0][f]=h,o[1][f]=h,o[2][f]=h,h++;const m=h-u;return{positionIds3D:o,ropeDelta:m}}initB2Buffers(){const a=this.gpu,e=this.hiddenSize,t=this.intermediateSize,s=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC;this.b2={},this.b2.hidden=a.createBuffer("b2_hidden_a",2*e*4,s),this.b2.hiddenB=a.createBuffer("b2_hidden_b",2*e*4,s),this.b2.normed=a.createBuffer("b2_normed",2*e*4,s),this.b2.mlpIntermediate=a.createBuffer("b2_mlp_inter",2*t*4,s),this.b2.mlpOut=a.createBuffer("b2_mlp_out",2*e*4,s),this.b2.logits=a.createBuffer("b2_logits",2*this.vocabSize*4,s),this.b2.zeroBuf=a.createBuffer("b2_zero_buf",2*e*4,s),this.b2.qProjFull=a.createBuffer("b2_q_proj_full",2*this.numHeads*this.headDim*2*4,s),this.b2.qProj=a.createBuffer("b2_q_proj_out",2*this.numHeads*this.headDim*4,s),this.b2.qGate=a.createBuffer("b2_q_gate",2*this.numHeads*this.headDim*4,s),this.b2.kProj=a.createBuffer("b2_k_proj_out",2*this.numKVHeads*this.headDim*4,s),this.b2.vProj=a.createBuffer("b2_v_proj_out",2*this.numKVHeads*this.headDim*4,s);const i=this.textCfg.linear_num_key_heads,n=this.textCfg.linear_key_head_dim,r=this.textCfg.linear_value_head_dim,h=this.linValueHeads/i*r,c=i*(n+n+h),m=this.linValueDim;this.b2.linQKV=a.createBuffer("b2_lin_qkv",2*c*4,s),this.b2.linZ=a.createBuffer("b2_lin_z",2*m*4,s),this.b2.linOut=a.createBuffer("b2_lin_out",2*m*4,s);const f=Math.max(e,this.numHeads*this.headDim,m)*4;if(this.b2.attnOut=a.createBuffer("b2_attn_out",2*f,s),this.abQuantized){const p=this.textCfg.linear_num_value_heads??i;this.b2.linAlpha=a.createBuffer("b2_lin_alpha",2*p*4,s),this.b2.linBeta=a.createBuffer("b2_lin_beta",2*p*4,s)}this.b2.topkResult0=a.createBuffer("b2_topk0",2048,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this.b2.topkResult1=a.createBuffer("b2_topk1",2048,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this.b2.topkReadback0=a.device.createBuffer({size:2048,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_topk_rb0"}),this.b2.topkReadback1=a.device.createBuffer({size:2048,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_topk_rb1"}),this.b2.argmaxResult0=a.createBuffer("b2_argmax0",8,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC),this.b2.argmaxResult1=a.createBuffer("b2_argmax1",8,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC),this.b2.argmaxReadback0=a.device.createBuffer({size:8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_argmax_rb0"}),this.b2.argmaxReadback1=a.device.createBuffer({size:8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_argmax_rb1"}),this.b2._gqaParamBuf0=a.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"b2_gqa_params0"}),this.b2._gqaParamBuf1=a.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"b2_gqa_params1"}),this.b2._dims={H:e,I:t,linQKVDim:c,valueDim:m,linHeads:i,linKeyDim:n,linValDim:r,linEVD:h,qProjSize:this.numHeads*this.headDim,qProjFullSize:this.numHeads*this.headDim*2,kProjSize:this.numKVHeads*this.headDim,vProjSize:this.numKVHeads*this.headDim,attnOutElems:f/4},console.log(`[B2] Allocated batch=2 buffers (${((2*(e+e+e+t+e+this.vocabSize)*4+2*f)/1e6).toFixed(1)} MB activations)`)}runWithOffsets(a,e,t,s=1){const i=this.pipelines[a],n=this.gpu.createBindGroupWithOffsets(i,0,e);this.gpu.dispatch(i,[n],t,s)}makeUniform(a,e){const t=new ArrayBuffer(Math.max(16,Math.ceil(e.length*4/16)*16)),s=new DataView(t);for(let r=0;r<e.length;r++)Number.isInteger(e[r])?s.setUint32(r*4,e[r],!0):s.setFloat32(r*4,e[r],!0);const i=a+"_"+e.join("_");if(this.paramBufs[i])return this.paramBufs[i];const n=this.gpu.device.createBuffer({size:t.byteLength,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:a});return this.gpu.device.queue.writeBuffer(n,0,new Uint8Array(t)),this.paramBufs[i]=n,n}_makeMixedUniform(a,e){const t=new ArrayBuffer(Math.max(16,Math.ceil(e.length*4/16)*16)),s=new DataView(t);for(let n=0;n<e.length;n++){const r=e[n];r.u!==void 0?s.setUint32(n*4,r.u,!0):s.setFloat32(n*4,r.f,!0)}this._mixedUniformBufs||(this._mixedUniformBufs={});let i=this._mixedUniformBufs[a];return i||(i=this.gpu.device.createBuffer({size:t.byteLength,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:a}),this._mixedUniformBufs[a]=i),this.gpu.device.queue.writeBuffer(i,0,new Uint8Array(t)),i}makeUniformTyped(a,e,t){const s=e.length+t.length,i=new ArrayBuffer(Math.max(16,Math.ceil(s*4/16)*16)),n=new DataView(i);let r=0;for(const h of e)n.setUint32(r,h,!0),r+=4;for(const h of t)n.setFloat32(r,h,!0),r+=4;const u=a+"_"+[...e,...t].join("_");if(this.paramBufs[u])return this.paramBufs[u];const o=this.gpu.device.createBuffer({size:i.byteLength,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:a});return this.gpu.device.queue.writeBuffer(o,0,new Uint8Array(i)),this.paramBufs[u]=o,o}makeFusedNormMLPUniform(a){const e=`fused_anorm_mlp_${a}`;if(this.paramBufs[e])return this.paramBufs[e];const t=this.hiddenSize,s=this.intermediateSize,i=16+640*16,n=new ArrayBuffer(i),r=new DataView(n);r.setUint32(0,t,!0),r.setUint32(4,s,!0),r.setUint32(8,this.groupSize,!0),r.setFloat32(12,this.rmsEps,!0);const u=`model.language_model.layers.${a}.post_attention_layernorm.weight`,o=this._normWeightRaw?.[u];if(!o)throw new Error(`Norm weight not cached for layer ${a}`);for(let c=0;c<t/2;c++){const m=Math.floor(c/4),f=c%4;r.setUint32(16+m*16+f*4,o[c],!0)}const h=this.gpu.device.createBuffer({size:i,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:e});return this.gpu.device.queue.writeBuffer(h,0,new Uint8Array(n)),this.paramBufs[e]=h,h}fusedNormGptqOp(a,e,t,s,i,n,r,u){const o=this.getQWeight(s);if(!o.qweight)return null;const h=this.makeFusedNormGPTQUniform(i,r),c=u?"fused_norm_gptq":"fused_norm_gptq_noadd",m=u?"fnorm_gptq_":"fnorm_gptqna_",f=u?[a,e,o.qweight,o.scales,t,h]:[a,o.qweight,o.scales,t,h];return this.prepOpCached(`${m}${s}`,c,f,this.wg4(r))}makeFusedNormGPTQUniform(a,e){const t=`fused_norm_gptq_${a}_${e}`;if(this.paramBufs[t])return this.paramBufs[t];const s=this.hiddenSize,i=16+512*16,n=new ArrayBuffer(i),r=new DataView(n);r.setUint32(0,s,!0),r.setUint32(4,e,!0),r.setUint32(8,this.groupSize,!0),r.setFloat32(12,this.rmsEps,!0);const u=this._normWeightRaw?.[a];if(!u)throw new Error(`Norm weight not cached: ${a}`);for(let h=0;h<s/2;h++){const c=Math.floor(h/4),m=h%4;r.setUint32(16+c*16+m*4,u[h],!0)}const o=this.gpu.device.createBuffer({size:i,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:t});return this.gpu.device.queue.writeBuffer(o,0,new Uint8Array(n)),this.paramBufs[t]=o,o}run(a,e,t,s=1){const i=this.pipelines[a],n=this.gpu.createBindGroup(i,0,e);this.gpu.dispatch(i,[n],t,s)}runCached(a,e,t,s,i=1){const n=this.pipelines[e],r=this.gpu.getCachedBindGroup(a,n,0,t);this.gpu.dispatch(n,[r],s,i)}prepOp(a,e,t,s=1){const i=this.pipelines[a],n=this.gpu.createBindGroup(i,0,e);return{pipeline:i,bindGroups:[n],workgroupsX:t,workgroupsY:s}}prepOpCached(a,e,t,s,i=1){const n=this.pipelines[e],r=this.gpu.getCachedBindGroup(a,n,0,t);return{pipeline:n,bindGroups:[r],workgroupsX:s,workgroupsY:i}}wg(a){return Math.ceil(a/256)}wg4(a){return Math.ceil(a/32)}wg8(a){return Math.ceil(a/8)}getQWeight(a){return{qweight:this.weights[a+".qweight"],scales:this.weights[a+".scales"],qzeros:this.weights[a+".qzeros"]}}embedding(a){const e=this.weights["model.language_model.embed_tokens.weight"],t=this.makeUniform("emb_params",[a,this.hiddenSize]);this.run("embedding",[e,this.hidden,t],this.wg(this.hiddenSize))}embeddingFromVisionBuffer(a,e){const t=this.hiddenSize;this.gpu.copyBuffer(a,this.hidden,t*4,e*t*4,0)}embeddingFromArgmax(){const a=this.weights["model.language_model.embed_tokens.weight"];this._embedArgmaxParams||(this._embedArgmaxParams=this.makeUniform("emb_argmax_params",[this.hiddenSize])),this.runCached("embed_from_argmax","embed_from_argmax",[a,this.hidden,this.argmaxResult,this._embedArgmaxParams],this.wg(this.hiddenSize))}_dispatchLmHead(){if(this._lmHeadQWeight){const a=this.hiddenSize,e=this.vocabSize,s=a/this.groupSize%4===0,i=this.makeUniform(`mv_${a}_${e}`,[a,e,this.groupSize]);if(s){const n=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";this.runCached("lm_head",n,[this.normed,this._lmHeadQWeight,this._lmHeadScales,this.logits,i],this.wg8(e))}else{const n=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";this.runCached("lm_head",n,[this.normed,this._lmHeadQWeight,this._lmHeadScales,this.logits,i],this.wg4(e))}}else{const e=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight",t=this.weights[e],s=this.makeUniform("lmhead_params",[this.hiddenSize,this.vocabSize]);this.runCached("lm_head","bf16_matvec",[this.normed,t,this.logits,s],this.wg(this.vocabSize))}}rmsNorm(a,e,t,s){const i=this.makeUniform("rmsnorm_params",[this.hiddenSize,this.rmsEps]);s?this.runCached(s,"rmsnorm",[a,t,e,i],1):this.run("rmsnorm",[a,t,e,i],1)}gptqMatvec(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return;const r=s/this.groupSize,u=r%4===0;if(this.useSplitK&&u){let o=this.splitKSplits;for(;o>1&&r%(o*4)!==0;)o>>=1;if(o>1){const h=this.makeUniform(`mv_sk_${s}_${i}_${o}`,[s,i,this.groupSize,o]);this.runCached(`sk4tf16_${t}`,"gptq_matvec_4t_f16_sk",[a,n.qweight,n.scales,this.splitKPartials,h],this.wg8(i),o);const c=this.makeUniform(`rsk_${i}_${o}`,[i,o]);this.runCached(`rsk_${t}`,"reduce_splitk",[this.splitKPartials,e,c],this.wg(i));return}}if(u){const o=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t",h=this.gpu.hasF16?"gptq4tf16_":"gptq4t_",c=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);this.runCached(`${h}${t}`,o,[a,n.qweight,n.scales,e,c],this.wg8(i))}else{const o=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec",h=this.gpu.hasF16?"gptqf16_":"gptq_",c=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);this.runCached(`${h}${t}`,o,[a,n.qweight,n.scales,e,c],this.wg4(i))}}gptqMatvecOp(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return null;if(s/this.groupSize%4===0){const o=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t",h=this.gpu.hasF16?"gptq4tf16_":"gptq4t_",c=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);return this.prepOpCached(`${h}${t}`,o,[a,n.qweight,n.scales,e,c],this.wg8(i))}else{const o=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec",h=this.gpu.hasF16?"gptqf16_":"gptq_",c=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);return this.prepOpCached(`${h}${t}`,o,[a,n.qweight,n.scales,e,c],this.wg4(i))}}bf16Matvec(a,e,t,s,i,n){const r=this.makeUniform(`bf16mv_${s}_${i}`,[s,i]);n?this.runCached(n,"bf16_matvec",[a,t,e,r],this.wg(i)):this.run("bf16_matvec",[a,t,e,r],this.wg(i))}siluMul(a,e,t,s,i){const n=this.makeUniform(`silu_${s}`,[s]);i?this.runCached(i,"silu_mul",[a,e,t,n],this.wg(s)):this.run("silu_mul",[a,e,t,n],this.wg(s))}addVectors(a,e,t,s){const i=this.makeUniform(`add_${t}`,[t]);s?this.runCached(s,"add",[a,e,i],this.wg(t)):this.run("add",[a,e,i],this.wg(t))}addAndRmsNorm(a,e,t,s,i){const n=this.makeUniform("add_rmsnorm_params",[this.hiddenSize,this.rmsEps]);i?this.runCached(i,"add_rmsnorm",[a,e,s,t,n],1):this.run("add_rmsnorm",[a,e,s,t,n],1)}mlp(a,e){const t=`model.language_model.layers.${e}.mlp`,s=this.hiddenSize,i=this.intermediateSize,n=this.getQWeight(`${t}.gate_proj`),r=this.getQWeight(`${t}.up_proj`),u=this.makeUniform(`fused_mv_${s}_${i}`,[s,i,this.groupSize]);if(s/this.groupSize%4===0){const c=this.gpu.hasF16?"fused_gate_up_silu_4t_f16":"fused_gate_up_silu_4t",m=this.gpu.hasF16?"fused_gus4t16_":"fused_gus4t_";this.runCached(`${m}${e}`,c,[a,n.qweight,n.scales,r.qweight,r.scales,this.mlpIntermediate,u],this.wg8(i))}else{const c=this.gpu.hasF16?"fused_gate_up_silu_f16":"fused_gate_up_silu",m=this.gpu.hasF16?"fused_gus16_":"fused_gus_";this.runCached(`${m}${e}`,c,[a,n.qweight,n.scales,r.qweight,r.scales,this.mlpIntermediate,u],this.wg4(i))}this.gptqMatvec(this.mlpIntermediate,this.mlpOut,`${t}.down_proj`,i,s)}fullAttentionFused(a,e,t,s,i,n,r,u){i=i||this.normed;const o=`model.language_model.layers.${t}.self_attn`,h=this.hiddenSize,c=this.headDim,m=this.numHeads,f=this.numKVHeads,p=m/f,w=t===0,B=`model.language_model.layers.${t}.input_layernorm.weight`,q=this.weights[B];if(w){const z=this.makeUniform("add_rmsnorm_params",[h,this.rmsEps]);this.runCached(`fa_norm_${t}`,"rmsnorm",[a,q,i,z],1)}else{const z=this.makeUniform("add_rmsnorm_params",[h,this.rmsEps]);this.runCached(`fa_norm_${t}`,"add_rmsnorm_ro",[a,e,q,i,z],1)}const _=this.gptqMatvecOp(i,this.qProjFull,`${o}.q_proj`,h,m*c*2),l=this.gptqMatvecOp(i,this.kProj,`${o}.k_proj`,h,f*c),d=this.gptqMatvecOp(i,this.vProj,`${o}.v_proj`,h,f*c);this.gpu.dispatchMulti([_,l,d].filter(Boolean));const y=this.kvCache[t],U=this._fusedSQKParams[t],g=u??s;this._gqaDv.setUint32(0,g,!0),this._gqaDv.setUint32(4,s,!0),this._gqaDv.setUint32(8,n??s,!0),this._gqaDv.setUint32(12,r??s,!0),this.gpu.device.queue.writeBuffer(U,16,this._gqaData,0,16),this.runCached(`fa_sqk_${t}`,this._splitQKNormShaderKey,[this.qProjFull,this.kProj,this.vProj,this.qProj,this.qGate,y.keys,y.values,U],m+f);const P=(u??s)+1,S=this._forceMinSplits||1,b=Math.max(S,Math.min(Math.max(1,Math.ceil(P/32)),this._maxGqaSplits)),v=b>1?this._gqaPartials:this.attnOut;if(this._gqaDv.setUint32(0,P,!0),this._gqaDv.setUint32(4,c,!0),this._gqaDv.setUint32(8,f,!0),this._gqaDv.setUint32(12,m,!0),this._gqaDv.setUint32(16,p,!0),this._gqaDv.setUint32(20,b,!0),this.gpu.device.queue.writeBuffer(this._gqaParamBuf,0,this._gqaData),this.run("gqa_attention_head",[this.qProj,y.keys,y.values,v,this._gqaParamBuf],m,b),b>1){this._gqaReduceParams||(this._gqaReduceParams=this.gpu.device.createBuffer({size:16,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"gqa_reduce_params"}));const z=new Uint8Array(16),M=new DataView(z.buffer);M.setUint32(0,c,!0),M.setUint32(4,b,!0),M.setUint32(8,m,!0),this.gpu.device.queue.writeBuffer(this._gqaReduceParams,0,z),this.run("gqa_reduce",[this._gqaPartials,this.attnOut,this._gqaReduceParams],m)}const C=this.getQWeight(`${o}.o_proj`),D=m*c,O=D/this.groupSize%4===0,$=this.makeUniform(`fused_sig_mv_${D}_${h}`,[D,h,this.groupSize]);if(O){const z=this.gpu.hasF16?"fused_sigmoid_gptq_4t_f16":"fused_sigmoid_gptq_4t",M=this.gpu.hasF16?"fused_sig4t16_oproj_":"fused_sig4t_oproj_";this.runCached(`${M}${t}`,z,[this.attnOut,this.qGate,C.qweight,C.scales,this.qProj,$],this.wg8(h))}else{const z=this.gpu.hasF16?"fused_sigmoid_gptq_f16":"fused_sigmoid_gptq",M=this.gpu.hasF16?"fused_sig16_oproj_":"fused_sig_oproj_";this.runCached(`${M}${t}`,z,[this.attnOut,this.qGate,C.qweight,C.scales,this.qProj,$],this.wg4(h))}}linearAttentionFused(a,e,t,s){s=s||this.normed;const i=`model.language_model.layers.${t}.linear_attn`,n=this.hiddenSize,r=this.textCfg.linear_num_key_heads,u=this.textCfg.linear_key_head_dim,o=this.textCfg.linear_value_head_dim,h=this.linValueHeads,m=h/r*o,f=r*(u+u+m),p=this.linValueDim,w=t===0,B=`model.language_model.layers.${t}.input_layernorm.weight`,q=this.weights[B];if(w){const b=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);this.runCached(`la_norm_${t}`,"rmsnorm",[a,q,s,b],1)}else{const b=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);this.runCached(`la_norm_${t}`,"add_rmsnorm_ro",[a,e,q,s,b],1)}{const b=[this.gptqMatvecOp(s,this.linQKV,`${i}.in_proj_qkv`,n,f),this.gptqMatvecOp(s,this.linZ,`${i}.in_proj_z`,n,p)];this.abQuantized&&(b.push(this.gptqMatvecOp(s,this.linAlpha,`${i}.in_proj_a`,n,h)),b.push(this.gptqMatvecOp(s,this.linBeta,`${i}.in_proj_b`,n,h))),this.gpu.dispatchMulti(b.filter(Boolean))}const _=this.weights[`${i}.conv1d.weight`],l=this.weights[`${i}.A_log`],d=this.weights[`${i}.dt_bias`],y=this.weights[`${i}.norm.weight`];if(this.abQuantized){const b=`fused_cdn_q_${r}_${u}_${o}_${f}_${h}`;let v=this.paramBufs[b];if(!v){const C=new ArrayBuffer(32),D=new DataView(C);D.setUint32(0,r,!0),D.setUint32(4,u,!0),D.setUint32(8,o,!0),D.setUint32(12,f,!0),D.setFloat32(16,this.rmsEps,!0),D.setUint32(20,0,!0),D.setUint32(24,h,!0),v=this.gpu.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"fused_cdn_q_params"}),this.gpu.device.queue.writeBuffer(v,0,new Uint8Array(C)),this.paramBufs[b]=v}this.runCached(`fused_cdn_${t}`,"fused_conv_deltanet_norm",[this.linQKV,this.linConvHist[t],_,this.linState[t],this.linOut,this.linAlpha,this.linBeta,l,d,y,v],r)}else{const b=`fused_cdn_ext_${r}_${u}_${o}_${f}_${n}_${h}`;let v=this.paramBufs[b];if(!v){const D=new ArrayBuffer(32),k=new DataView(D);k.setUint32(0,r,!0),k.setUint32(4,u,!0),k.setUint32(8,o,!0),k.setUint32(12,f,!0),k.setFloat32(16,this.rmsEps,!0),k.setUint32(20,n,!0),k.setUint32(24,h,!0),v=this.gpu.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"fused_cdn_ext_params"}),this.gpu.device.queue.writeBuffer(v,0,new Uint8Array(D)),this.paramBufs[b]=v}const C=this.linABWeight[t];this.runCached(`fused_cdn_${t}`,"fused_conv_deltanet_norm",[this.linQKV,this.linConvHist[t],_,this.linState[t],this.linOut,s,C,l,d,y,v],r)}const U=this.getQWeight(`${i}.out_proj`),P=p/this.groupSize%4===0,S=this.makeUniform(`fused_silu_mv_${p}_${n}`,[p,n,this.groupSize]);if(P){const b=this.gpu.hasF16?"fused_silu_gptq_4t_f16":"fused_silu_gptq_4t",v=this.gpu.hasF16?"fused_silu4t16_oproj_":"fused_silu4t_oproj_";this.runCached(`${v}${t}`,b,[this.linZ,this.linOut,U.qweight,U.scales,this.attnOut,S],this.wg8(n))}else{const b=this.gpu.hasF16?"fused_silu_gptq_f16":"fused_silu_gptq",v=this.gpu.hasF16?"fused_silu16_oproj_":"fused_silu_oproj_";this.runCached(`${v}${t}`,b,[this.linZ,this.linOut,U.qweight,U.scales,this.attnOut,S],this.wg4(n))}}fusedNormMLP(a,e,t,s,i,n){n=n||this.normed;const r=this.hiddenSize,u=`model.language_model.layers.${i}.post_attention_layernorm.weight`,o=this.weights[u],h=this.makeUniform("add_rmsnorm_params",[r,this.rmsEps]),c=`mlp_norm_${i}_${a===this.hidden?"a":"b"}`;this.runCached(c,"three_way_add_rmsnorm",[a,t,s,o,e,n,h],1),this.mlp(n,i)}decoderLayer(a,e,t,s,i,n,r,u){let o;this.layerTypes[a]==="full_attention"?(this.fullAttentionFused(t,i,a,e,void 0,n,r,u),o=this.qProj):(this.linearAttentionFused(t,i,a),o=this.attnOut),this.fusedNormMLP(t,s,i,o,a)}async forward(a,e){this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embedding(a);let t=this.hidden,s=this.hiddenB,i=this.zeroBuf;for(let o=0;o<this.numLayers;o++){this.decoderLayer(o,e,t,s,i),i=this.mlpOut;const h=t;t=s,s=h}this.mtp&&this._mtpSaveHidden(t,this.mlpOut);const n=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm(t,this.mlpOut,this.normed,n,"add_final_norm"),this._dispatchLmHead();const r=this.temperature??.7;if(r>0){const o=this.repetitionPenalty??1,h=this.presencePenalty??0;if(this._recentTokenCount>0&&(o>1||h>0)){this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4);const p=this._makeMixedUniform("penalty_params",[{u:this._recentTokenCount},{f:o},{f:h},{u:0}]);this.run("rep_penalty",[this.logits,this._gpuRecentTokens,p],Math.ceil(this._recentTokenCount/256))}const c=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,c],1);const m=Math.random()*4294967295>>>0,f=this._makeMixedUniform("sample_params",[{f:r},{u:this.topK??20},{f:this.topP??.8},{u:m}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,f],1)}else{const o=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,o],1)}return this.gpu._passCount!==void 0&&!this._passCountLogged&&(console.log(`[PERF] ${this.gpu._passCount} compute passes per token`),this._passCountLogged=!0),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8),this.gpu.endBatch(),this._readAndSample()}forwardSubmit(a,e){this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embedding(a);let t=this.hidden,s=this.hiddenB,i=this.zeroBuf;for(let o=0;o<this.numLayers;o++){this.decoderLayer(o,e,t,s,i),i=this.mlpOut;const h=t;t=s,s=h}this.mtp&&this._mtpSaveHidden(t,this.mlpOut);const n=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm(t,this.mlpOut,this.normed,n,"add_final_norm"),this._dispatchLmHead();const r=this.temperature??.7;if(r>0){const o=this.repetitionPenalty??1,h=this.presencePenalty??0;if(this._recentTokenCount>0&&(o>1||h>0)){this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4);const p=this._makeMixedUniform("penalty_params",[{u:this._recentTokenCount},{f:o},{f:h},{u:0}]);this.run("rep_penalty",[this.logits,this._gpuRecentTokens,p],Math.ceil(this._recentTokenCount/256))}const c=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,c],1);const m=Math.random()*4294967295>>>0,f=this._makeMixedUniform("sample_params",[{f:r},{u:this.topK??20},{f:this.topP??.8},{u:m}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,f],1)}else{const o=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,o],1)}this.gpu._passCount!==void 0&&!this._passCountLogged&&(console.log(`[PERF] ${this.gpu._passCount} compute passes per token`),this._passCountLogged=!0),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8),this.gpu.endBatch()}_buildReplayFromRecording(){const a=this.gpu.stopRecording(),e=this.pipelines.gqa_attention_head,t=[];for(const s of a)if(s.multi)for(const i of s.ops)t.push({p:i.pipeline,bg:i.bindGroup,x:i.wgX,y:i.wgY,gqa:i.pipeline===e?1:0});else t.push({p:s.pipeline,bg:s.bindGroup,x:s.wgX,y:s.wgY,gqa:s.pipeline===e?1:0});this._replayFlat=t,console.log(`[PERF] Recorded ${t.length} flat replay ops for core forward`)}_replayCoreForward(a){const e=this.gpu,t=a-this._ropeDelta,s=t+1,i=Math.min(Math.max(1,Math.ceil(s/32)),this._maxGqaSplits);this._gqaDv.setUint32(0,t,!0),this._gqaDv.setUint32(4,a,!0),this._gqaDv.setUint32(8,a,!0),this._gqaDv.setUint32(12,a,!0);for(let o=0;o<this.numLayers;o++)this.layerTypes[o]==="full_attention"&&e.device.queue.writeBuffer(this._fusedSQKParams[o],16,this._gqaData,0,16);this._gqaDv.setUint32(0,s,!0),this._gqaDv.setUint32(4,this.headDim,!0),this._gqaDv.setUint32(8,this.numKVHeads,!0),this._gqaDv.setUint32(12,this.numHeads,!0),this._gqaDv.setUint32(16,this.numHeads/this.numKVHeads,!0),this._gqaDv.setUint32(20,i,!0),e.device.queue.writeBuffer(this._gqaParamBuf,0,this._gqaData,0,24),this._gqaReduceParams&&(this._gqaDv.setUint32(0,this.headDim,!0),this._gqaDv.setUint32(4,i,!0),this._gqaDv.setUint32(8,this.numHeads,!0),this._gqaDv.setUint32(12,0,!0),e.device.queue.writeBuffer(this._gqaReduceParams,0,this._gqaData,0,16));const n=e._singlePass,r=this._replayFlat,u=r.length;for(let o=0;o<u;o++){const h=r[o];n.setPipeline(h.p),n.setBindGroup(0,h.bg),n.dispatchWorkgroups(h.x,h.gqa?i:h.y)}}embeddingB2(a,e){const t=this.hiddenSize,s=this.weights["model.language_model.embed_tokens.weight"],i=this.makeUniform("emb_params",[a,t]);this.runWithOffsets("embedding",[s,{buffer:this.b2.hidden,offset:0,size:t*4},i],this.wg(t));const n=this.makeUniform(`emb_params_${e}`,[e,t]);this.runWithOffsets("embedding",[s,{buffer:this.b2.hidden,offset:t*4,size:t*4},n],this.wg(t))}gptqMatvecB2(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return;const r=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]),u=this.gpu.hasF16?"gptq_matmul_b2_f16":"gptq_matmul_b2",o=this.gpu.hasF16?"b2gptqf16_":"b2gptq_";this.runCached(`${o}${t}`,u,[a,n.qweight,n.scales,e,r],this.wg4(i))}gptqMatvecB2Op(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return null;const r=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]),u=this.gpu.hasF16?"gptq_matmul_b2_f16":"gptq_matmul_b2",o=this.gpu.hasF16?"b2gptqf16_":"b2gptq_";return this.prepOpCached(`${o}${t}`,u,[a,n.qweight,n.scales,e,r],this.wg4(i))}fullAttentionB2(a,e,t,s){const i=`model.language_model.layers.${t}.self_attn`,n=this.hiddenSize,r=this.b2._dims,u=this.headDim,o=this.numHeads,h=this.numKVHeads,c=o/h,m=t===0,f=`model.language_model.layers.${t}.input_layernorm.weight`,p=this.weights[f],w=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);m?(this.runWithOffsets("rmsnorm",[{buffer:a,offset:0,size:n*4},p,{buffer:this.b2.normed,offset:0,size:n*4},w],1),this.runWithOffsets("rmsnorm",[{buffer:a,offset:n*4,size:n*4},p,{buffer:this.b2.normed,offset:n*4,size:n*4},w],1)):this.run("add_rmsnorm_ro_b2",[a,e,p,this.b2.normed,w],2);const B=this.gptqMatvecB2Op(this.b2.normed,this.b2.qProjFull,`${i}.q_proj`,n,o*u*2),q=this.gptqMatvecB2Op(this.b2.normed,this.b2.kProj,`${i}.k_proj`,n,h*u),_=this.gptqMatvecB2Op(this.b2.normed,this.b2.vProj,`${i}.v_proj`,n,h*u);this.gpu.dispatchMulti([B,q,_].filter(Boolean));const l=this.kvCache[t],d=this._fusedSQKParams[t],y=r.qProjFullSize*4,U=r.kProjSize*4,g=r.vProjSize*4,P=r.qProjSize*4;this._gqaDv.setUint32(0,s,!0),this._gqaDv.setUint32(4,s,!0),this._gqaDv.setUint32(8,s,!0),this._gqaDv.setUint32(12,s,!0),this.gpu.device.queue.writeBuffer(d,16,this._gqaData,0,16),this.runWithOffsets(this._splitQKNormShaderKey,[{buffer:this.b2.qProjFull,offset:0,size:y},{buffer:this.b2.kProj,offset:0,size:U},{buffer:this.b2.vProj,offset:0,size:g},{buffer:this.b2.qProj,offset:0,size:P},{buffer:this.b2.qGate,offset:0,size:P},l.keys,l.values,d],o+h),this._gqaDv.setUint32(0,s+1,!0),this._gqaDv.setUint32(4,s+1,!0),this._gqaDv.setUint32(8,s+1,!0),this._gqaDv.setUint32(12,s+1,!0),this.gpu.device.queue.writeBuffer(d,16,this._gqaData,0,16),this.runWithOffsets(this._splitQKNormShaderKey,[{buffer:this.b2.qProjFull,offset:y,size:y},{buffer:this.b2.kProj,offset:U,size:U},{buffer:this.b2.vProj,offset:g,size:g},{buffer:this.b2.qProj,offset:P,size:P},{buffer:this.b2.qGate,offset:P,size:P},l.keys,l.values,d],o+h);const S=s+1,b=s+2;this._gqaDv.setUint32(0,S,!0),this._gqaDv.setUint32(4,u,!0),this._gqaDv.setUint32(8,h,!0),this._gqaDv.setUint32(12,o,!0),this._gqaDv.setUint32(16,c,!0),this._gqaDv.setUint32(20,this.partialDim,!0),this._gqaDv.setUint32(24,s,!0),this._gqaDv.setFloat32(28,this.ropeTheta,!0),this.gpu.device.queue.writeBuffer(this.b2._gqaParamBuf0,0,this._gqaData),this.runWithOffsets("gqa_attention_head",[{buffer:this.b2.qProj,offset:0,size:P},l.keys,l.values,{buffer:this.b2.attnOut,offset:0,size:r.qProjSize*4},this.b2._gqaParamBuf0],o),this._gqaDv.setUint32(0,b,!0),this._gqaDv.setUint32(24,s+1,!0),this.gpu.device.queue.writeBuffer(this.b2._gqaParamBuf1,0,this._gqaData),this.runWithOffsets("gqa_attention_head",[{buffer:this.b2.qProj,offset:P,size:P},l.keys,l.values,{buffer:this.b2.attnOut,offset:r.qProjSize*4,size:r.qProjSize*4},this.b2._gqaParamBuf1],o);const v=this.getQWeight(`${i}.o_proj`),C=o*u,D=this.makeUniform(`fused_sig_mv_${C}_${n}`,[C,n,this.groupSize]);this.runCached(`b2_fused_sig_oproj_${t}`,"fused_sigmoid_gptq_b2_f16",[this.b2.attnOut,this.b2.qGate,v.qweight,v.scales,this.b2.qProj,D],this.wg4(n))}linearAttentionB2(a,e,t){const s=`model.language_model.layers.${t}.linear_attn`,i=this.hiddenSize,n=this.b2._dims,r=n.linHeads,u=n.linKeyDim,o=n.linValDim;n.linEVD;const h=n.linQKVDim,c=n.valueDim,m=t===0,f=`model.language_model.layers.${t}.input_layernorm.weight`,p=this.weights[f],w=this.makeUniform("add_rmsnorm_params",[i,this.rmsEps]);m?(this.runWithOffsets("rmsnorm",[{buffer:a,offset:0,size:i*4},p,{buffer:this.b2.normed,offset:0,size:i*4},w],1),this.runWithOffsets("rmsnorm",[{buffer:a,offset:i*4,size:i*4},p,{buffer:this.b2.normed,offset:i*4,size:i*4},w],1)):this.run("add_rmsnorm_ro_b2",[a,e,p,this.b2.normed,w],2);{const P=[this.gptqMatvecB2Op(this.b2.normed,this.b2.linQKV,`${s}.in_proj_qkv`,i,h),this.gptqMatvecB2Op(this.b2.normed,this.b2.linZ,`${s}.in_proj_z`,i,c)];this.abQuantized&&(P.push(this.gptqMatvecB2Op(this.b2.normed,this.b2.linAlpha,`${s}.in_proj_a`,i,this.linValueHeads)),P.push(this.gptqMatvecB2Op(this.b2.normed,this.b2.linBeta,`${s}.in_proj_b`,i,this.linValueHeads))),this.gpu.dispatchMulti(P.filter(Boolean))}const B=this.weights[`${s}.conv1d.weight`],q=this.weights[`${s}.A_log`],_=this.weights[`${s}.dt_bias`],l=this.weights[`${s}.norm.weight`],d=h*4,y=c*4;if(this.abQuantized){const P=this.linValueHeads,S=P*4,b=`fused_cdn_q_${r}_${u}_${o}_${h}_${P}`,v=this.paramBufs[b];this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:0,size:d},this.linConvHist[t],B,this.linState[t],{buffer:this.b2.linOut,offset:0,size:y},{buffer:this.b2.linAlpha,offset:0,size:S},{buffer:this.b2.linBeta,offset:0,size:S},q,_,l,v],r),this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:d,size:d},this.linConvHist[t],B,this.linState[t],{buffer:this.b2.linOut,offset:y,size:y},{buffer:this.b2.linAlpha,offset:S,size:S},{buffer:this.b2.linBeta,offset:S,size:S},q,_,l,v],r)}else{const P=`fused_cdn_ext_${r}_${u}_${o}_${h}_${i}_${this.linValueHeads}`,S=this.paramBufs[P],b=this.linABWeight[t];this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:0,size:d},this.linConvHist[t],B,this.linState[t],{buffer:this.b2.linOut,offset:0,size:y},{buffer:this.b2.normed,offset:0,size:i*4},b,q,_,l,S],r),this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:d,size:d},this.linConvHist[t],B,this.linState[t],{buffer:this.b2.linOut,offset:y,size:y},{buffer:this.b2.normed,offset:i*4,size:i*4},b,q,_,l,S],r)}const U=this.getQWeight(`${s}.out_proj`),g=this.makeUniform(`fused_silu_mv_${c}_${i}`,[c,i,this.groupSize]);this.runCached(`b2_fused_silu_oproj_${t}`,"fused_silu_gptq_b2_f16",[this.b2.linZ,this.b2.linOut,U.qweight,U.scales,this.b2.attnOut,g],this.wg4(i))}fusedNormMLPB2(a,e,t,s,i){const n=this.hiddenSize,r=this.intermediateSize,u=`model.language_model.layers.${i}.mlp`,o=`model.language_model.layers.${i}.post_attention_layernorm.weight`,h=this.weights[o],c=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);this.run("three_way_add_rmsnorm_b2",[a,t,s,h,e,this.b2.normed,c],2);const m=this.getQWeight(`${u}.gate_proj`),f=this.getQWeight(`${u}.up_proj`),p=this.makeUniform(`fused_mv_${n}_${r}`,[n,r,this.groupSize]);this.runCached(`b2_fused_gus_${i}`,"fused_gate_up_silu_b2_f16",[this.b2.normed,m.qweight,m.scales,f.qweight,f.scales,this.b2.mlpIntermediate,p],this.wg4(r)),this.gptqMatvecB2(this.b2.mlpIntermediate,this.b2.mlpOut,`${u}.down_proj`,r,n)}decoderLayerB2(a,e,t,s,i){let n;this.layerTypes[a]==="full_attention"?(this.fullAttentionB2(t,i,a,e),n=this.b2.qProj):(this.linearAttentionB2(t,i,a),n=this.b2.attnOut),this.fusedNormMLPB2(t,s,i,n,a)}_dispatchLmHeadB2(){if(this._lmHeadQWeight){const a=this.hiddenSize,e=this.vocabSize,t=this.makeUniform(`mv_${a}_${e}`,[a,e,this.groupSize]),s=this.gpu.hasF16?"gptq_matmul_b2_f16":"gptq_matmul_b2";this.runCached("b2_lm_head",s,[this.b2.normed,this._lmHeadQWeight,this._lmHeadScales,this.b2.logits,t],this.wg4(e))}else{const e=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight",t=this.weights[e],s=this.hiddenSize,i=this.vocabSize,n=this.makeUniform("lmhead_params",[s,i]);this.runWithOffsets("bf16_matvec",[{buffer:this.b2.normed,offset:0,size:s*4},t,{buffer:this.b2.logits,offset:0,size:i*4},n],this.wg(i)),this.runWithOffsets("bf16_matvec",[{buffer:this.b2.normed,offset:s*4,size:s*4},t,{buffer:this.b2.logits,offset:i*4,size:i*4},n],this.wg(i))}}async forwardB2(a,e,t){this.b2||this.initB2Buffers(),this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embeddingB2(a,e);let s=this.b2.hidden,i=this.b2.hiddenB,n=this.b2.zeroBuf;for(let f=0;f<this.numLayers;f++){this.decoderLayerB2(f,t,s,i,n),n=this.b2.mlpOut;const p=s;s=i,i=p}if(this.mtp){const f=this.hiddenSize;this.gpu.copyBuffer(s,this.mtp.savedHidden,f*4,f*4,0),this.gpu.copyBuffer(this.b2.mlpOut,this.mtp.savedMlpOut,f*4,f*4,0)}const r=this.weights["model.language_model.norm.weight"],u=this.makeUniform("add_rmsnorm_params",[this.hiddenSize,this.rmsEps]);this.run("add_rmsnorm_b2",[s,this.b2.mlpOut,r,this.b2.normed,u],2),this._dispatchLmHeadB2();const h=(this.temperature??.7)>0,c=this.vocabSize,m=this.makeUniform("argmax_params",[c]);return h?(this.runWithOffsets("topk_extract",[{buffer:this.b2.logits,offset:0,size:c*4},this.b2.topkResult0,m],1),this.runWithOffsets("topk_extract",[{buffer:this.b2.logits,offset:c*4,size:c*4},this.b2.topkResult1,m],1)):(this.runWithOffsets("argmax",[{buffer:this.b2.logits,offset:0,size:c*4},this.b2.argmaxResult0,m],1),this.runWithOffsets("argmax",[{buffer:this.b2.logits,offset:c*4,size:c*4},this.b2.argmaxResult1,m],1)),this.gpu._passCount!==void 0&&!this._b2PassCountLogged&&(console.log(`[PERF] B2: ${this.gpu._passCount} compute passes per 2-token step`),this._b2PassCountLogged=!0),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),h?(this.gpu.copyBuffer(this.b2.topkResult0,this.b2.topkReadback0,2048),this.gpu.copyBuffer(this.b2.topkResult1,this.b2.topkReadback1,2048)):(this.gpu.copyBuffer(this.b2.argmaxResult0,this.b2.argmaxReadback0,8),this.gpu.copyBuffer(this.b2.argmaxResult1,this.b2.argmaxReadback1,8)),this.gpu.endBatch(),this._readAndSampleB2(h)}async _readAndSampleB2(a){const e=[0,0];a?(await Promise.all([this.b2.topkReadback0.mapAsync(GPUMapMode.READ),this.b2.topkReadback1.mapAsync(GPUMapMode.READ)]),e[0]=this._sampleFromTopkReadback(this.b2.topkReadback0),e[1]=this._sampleFromTopkReadback(this.b2.topkReadback1)):(await Promise.all([this.b2.argmaxReadback0.mapAsync(GPUMapMode.READ),this.b2.argmaxReadback1.mapAsync(GPUMapMode.READ)]),e[0]=new Uint32Array(this.b2.argmaxReadback0.getMappedRange().slice(0))[0],e[1]=new Uint32Array(this.b2.argmaxReadback1.getMappedRange().slice(0))[0],this.b2.argmaxReadback0.unmap(),this.b2.argmaxReadback1.unmap());for(const t of e)this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=t:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=t);return e}_sampleFromTopkReadback(a){const e=a.getMappedRange(),t=new Uint32Array(e),s=new Float32Array(e),i=new Uint32Array(256),n=new Float32Array(256);for(let o=0;o<256;o++)i[o]=t[o*2],n[o]=s[o*2+1];a.unmap();const r=this.presencePenalty??0,u=this.repetitionPenalty??1;if((r>0||u>1)&&this._recentTokenCount>0){const o=new Set;for(let h=0;h<this._recentTokenCount;h++)o.add(this._recentTokens[h]);for(let h=0;h<256;h++)o.has(i[h])&&(r>0&&(n[h]-=r),u>1&&(n[h]=n[h]>0?n[h]/u:n[h]*u))}return this._sampleFromArrays(i,n,256)}async _readAndSample(){await this._argmaxReadback.mapAsync(GPUMapMode.READ);const a=new Uint32Array(this._argmaxReadback.getMappedRange().slice(0));this._argmaxReadback.unmap();const e=a[0];return this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=e:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=e),e}_sampleFromArrays(a,e,t){const s=this.temperature??.7,i=this.topP??.8,n=this.topK??20,r=Math.min(n,t),u=this._selBuf||(this._selBuf=new Uint32Array(64)),o=this._selValBuf||(this._selValBuf=new Float32Array(64)),h=this._usedBuf||(this._usedBuf=new Uint8Array(256));h.fill(0);for(let l=0;l<r;l++){let d=-1,y=-1/0;for(let U=0;U<t;U++)!h[U]&&e[U]>y&&(y=e[U],d=U);if(d<0)break;u[l]=a[d],o[l]=y,h[d]=1}const c=o[0],m=this._probBuf||(this._probBuf=new Float32Array(64));let f=0;for(let l=0;l<r;l++)m[l]=Math.exp((o[l]-c)/s),f+=m[l];for(let l=0;l<r;l++)m[l]/=f;let p=0,w=r;for(let l=0;l<r;l++)if(p+=m[l],p>=i){w=l+1;break}let B=0;for(let l=0;l<w;l++)B+=m[l];const q=Math.random()*B;let _=0;for(let l=0;l<w;l++)if(_+=m[l],_>=q)return u[l];return u[0]}_sample(a){const e=this.temperature??.7,t=this.topP??.8,s=this.topK??20,i=a.length;if(e<=0){let _=0,l=a[0];for(let d=1;d<i;d++)a[d]>l&&(l=a[d],_=d);return _}const n=Math.max(s,64),r=new Int32Array(n).fill(-1),u=new Float32Array(n).fill(-1/0);let o=-1/0;for(let _=0;_<i;_++){const l=a[_];if(l>o&&(o=l),l>u[n-1]){let d=n-1;for(;d>0&&l>u[d-1];)u[d]=u[d-1],r[d]=r[d-1],d--;u[d]=l,r[d]=_}}const h=Math.min(s,n),c=new Float32Array(h);let m=0;for(let _=0;_<h&&!(r[_]<0);_++)c[_]=Math.exp((u[_]-o)/e),m+=c[_];for(let _=0;_<h;_++)c[_]/=m;let f=0,p=h;for(let _=0;_<h;_++){if(r[_]<0){p=_;break}if(f+=c[_],f>=t){p=_+1;break}}let w=0;for(let _=0;_<p;_++)w+=c[_];const B=Math.random()*w;let q=0;for(let _=0;_<p;_++)if(q+=c[_],q>=B)return r[_];return r[0]}async generate(a,e=512,t,s){this.seqLen=0,this._recentTokenCount=0,this._replayFlat=null,this._ropeDelta=s?.ropeDelta??0;const i=this.maxSeqLen||4096;if(e=Math.min(e,i-a.length),e<=0)throw new Error(`Prompt (${a.length} tokens) exceeds context size (${i})`);const n=[...a],r=this.textCfg.linear_num_key_heads,u=this.textCfg.linear_key_head_dim,o=this.textCfg.linear_value_head_dim,m=this.linValueHeads/r*o,f=r*(u+u+m);for(let P=0;P<this.numLayers;P++)if(this.layerTypes[P]==="linear_attention"){const S=r*u*m*4,b=3*f*4;this.gpu.device.queue.writeBuffer(this.linState[P],0,new Uint8Array(S)),this.gpu.device.queue.writeBuffer(this.linConvHist[P],0,new Uint8Array(b))}let p=null;if(s){let P=0;const S=s.imageTokenId,b=s.positionIds3D;for(let v=0;v<a.length;v++){const C=b[0][v],D=b[1][v],k=b[2][v];this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),a[v]===S?this.embeddingFromVisionBuffer(s.embedBuffer,P++):this.embedding(a[v]);let O=this.hidden,$=this.hiddenB,z=this.zeroBuf;for(let M=0;M<this.numLayers;M++){this.decoderLayer(M,C,O,$,z,D,k,v),z=this.mlpOut;const G=O;O=$,$=G}if(v===a.length-1){const M=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm(O,this.mlpOut,this.normed,M,"add_final_norm"),this._dispatchLmHead();const G=this.temperature??.7;if(G>0){const R=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,R],1);const F=Math.random()*4294967295>>>0,T=this._makeMixedUniform("sample_params",[{f:G},{u:this.topK??20},{f:this.topP??.8},{u:F}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,T],1)}else{const R=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,R],1)}this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8)}this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.endBatch(),this.seqLen=v+1}p=await this._readAndSample()}else for(let P=0;P<a.length;P++)p=await this.forward(a[P],P),this.seqLen=P+1;n.push(p);const w=this.config.eos_token_id??this.textCfg.eos_token_id,B=Array.isArray(w)?w:w!=null?[w]:[248044,248046];if(t?.(p,0)||B.includes(p))return n;this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=p:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=p),this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4);const _=8;this._tokenHistoryBuf||(this._tokenHistoryBuf=this.gpu.createBuffer("token_history",_*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this._tokenHistoryReadback=this.gpu.device.createBuffer({size:_*4,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"token_history_readback"}));let l=0,d=0,y=1,U=p,g=!1;for(;y<e;){const P=performance.now(),S=Math.min(_,e-y);for(let k=0;k<S;k++){const O=this.seqLen+k+this._ropeDelta;if(this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),k===0?this.embedding(U):this.embeddingFromArgmax(),g)this._replayCoreForward(O);else{let G=this.hidden,R=this.hiddenB,F=this.zeroBuf;k===0&&!this._replayFlat&&(this._forceMinSplits=2,this.gpu.startRecording());for(let x=0;x<this.numLayers;x++){this.decoderLayer(x,O,G,R,F),F=this.mlpOut;const V=G;G=R,R=V}const T=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm(G,this.mlpOut,this.normed,T,"add_final_norm"),this._dispatchLmHead(),k===0&&this.gpu._recording&&(this._forceMinSplits=0,this._buildReplayFromRecording())}const $=this.temperature??.7;if($>0){const G=this.repetitionPenalty??1,R=this.presencePenalty??0,F=this._recentTokenCount+k;if(F>0&&(G>1||R>0)){const j=this._makeMixedUniform("penalty_params",[{u:Math.min(F,this._repMaxTokens)},{f:G},{f:R},{u:0}]);this.run("rep_penalty",[this.logits,this._gpuRecentTokens,j],Math.ceil(Math.min(F,this._repMaxTokens)/256))}const T=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,T],1);const x=Math.random()*4294967295>>>0,V=this._makeMixedUniform("sample_params",[{f:$},{u:this.topK??20},{f:this.topP??.8},{u:x}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,V],1)}else{const G=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,G],1)}const z=(this._recentTokenCount+k)%this._repMaxTokens,M=this.makeUniform(`append_${k}`,[z,k]);this.run("append_token",[this.argmaxResult,this._gpuRecentTokens,this._tokenHistoryBuf,M],1),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.endBatch()}!g&&this._replayFlat&&(g=!0);const b=this.gpu.device.createCommandEncoder();b.copyBufferToBuffer(this._tokenHistoryBuf,0,this._tokenHistoryReadback,0,S*4),this.gpu.device.queue.submit([b.finish()]),await this._tokenHistoryReadback.mapAsync(GPUMapMode.READ);const v=new Uint32Array(this._tokenHistoryReadback.getMappedRange().slice(0,S*4));this._tokenHistoryReadback.unmap();const C=performance.now();l+=C-P,d+=S;let D=!1;for(let k=0;k<S;k++){const O=v[k];n.push(O),this.seqLen++,this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=O:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=O);const $=t?.(O,y);if(y++,$||B.includes(O)){D=!0;break}}if(d%50<_&&console.log(`[T @${d}] ${(l/d).toFixed(1)}ms/tok (batch=${_})`),D)break;U=v[S-1],this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4)}return d>0&&console.log(`[T final @${d}] ${(l/d).toFixed(1)}ms/tok (batch=${_})`),n}async _quantizeBF16Weight(a,e,t,s){const i=this.groupSize,n=e/8,r=e/i,u=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,o=this.gpu.createBuffer(`${s}_qweight`,n*t*4,u),h=this.gpu.createBuffer(`${s}_scales_f32`,r*t*4,u),c=Math.ceil(r*t/2)*4,m=this.gpu.createBuffer(`${s}_scales`,c,u),f=this.gpu.getOrCreatePipeline("quantize_bf16_to_int4",(await A(async()=>{const{SHADERS:v}=await import("./gpu-ops-BbLjsC0p.js").then(C=>C.b);return{SHADERS:v}},[])).SHADERS.quantize_bf16_to_int4),p=this.gpu.createBufferFromData(`${s}_qparams`,new Uint32Array([e,t,i]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),w=this.gpu.createBindGroup(f,0,[a,o,h,p]),B=65535,q=Math.min(t,B),_=Math.ceil(t/B),l=this.gpu.device.createCommandEncoder(),d=l.beginComputePass();d.setPipeline(f),d.setBindGroup(0,w),d.dispatchWorkgroups(q,_),d.end(),this.gpu.device.queue.submit([l.finish()]);const y=this.gpu.getOrCreatePipeline("pack_f32_to_f16_pairs",(await A(async()=>{const{SHADERS:v}=await import("./gpu-ops-BbLjsC0p.js").then(C=>C.b);return{SHADERS:v}},[])).SHADERS.pack_f32_to_f16_pairs),U=Math.ceil(r*t/2),g=this.gpu.createBufferFromData(`${s}_pparams`,new Uint32Array([U]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),P=this.gpu.createBindGroup(y,0,[h,m,g]),S=this.gpu.device.createCommandEncoder(),b=S.beginComputePass();return b.setPipeline(y),b.setBindGroup(0,P),b.dispatchWorkgroups(Math.ceil(U/256)),b.end(),this.gpu.device.queue.submit([S.finish()]),await this.gpu.device.queue.onSubmittedWorkDone(),h.destroy(),p.destroy(),g.destroy(),{qweight:o,scales:m}}async initMTP(a){if(this.mtp)return;const e=this.hiddenSize,t=this.intermediateSize,s=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,i=performance.now(),n="Qwen/Qwen3.5-2B";console.log(`[MTP] Downloading MTP weights from ${n}...`);const{loadMTPWeights:r}=await A(async()=>{const{loadMTPWeights:B}=await import("./safetensors-loader-CwGm5mJX.js");return{loadMTPWeights:B}},[]),u=await r(n,a);console.log(`[MTP] Downloaded ${Object.keys(u).length} MTP tensors`),this.mtp={weights:{},qweights:{}};const o={};for(const[B,q]of Object.entries(u)){const _=this.gpu.createBufferFromData(`mtp_${B}`,q.data);o[B]=_,this.mtp.weights[B]=_}const h=[{name:"mtp.fc.weight",K:e*2,N:e},{name:"mtp.layers.0.self_attn.q_proj.weight",K:e,N:this.numHeads*this.headDim*2},{name:"mtp.layers.0.self_attn.k_proj.weight",K:e,N:this.numKVHeads*this.headDim},{name:"mtp.layers.0.self_attn.v_proj.weight",K:e,N:this.numKVHeads*this.headDim},{name:"mtp.layers.0.self_attn.o_proj.weight",K:e,N:e},{name:"mtp.layers.0.mlp.gate_proj.weight",K:e,N:t},{name:"mtp.layers.0.mlp.up_proj.weight",K:e,N:t},{name:"mtp.layers.0.mlp.down_proj.weight",K:t,N:e}];for(const{name:B,K:q,N:_}of h){const{qweight:l,scales:d}=await this._quantizeBF16Weight(o[B],q,_,`mtp_${B}`);this.mtp.qweights[B]={qweight:l,scales:d},o[B].destroy(),delete this.mtp.weights[B]}this.mtp.normRaw={};const c=["mtp.pre_fc_norm_hidden.weight","mtp.pre_fc_norm_embedding.weight","mtp.layers.0.input_layernorm.weight","mtp.layers.0.post_attention_layernorm.weight","mtp.norm.weight","mtp.layers.0.self_attn.q_norm.weight","mtp.layers.0.self_attn.k_norm.weight"];for(const B of c){const q=u[B];q&&(this.mtp.normRaw[B]=new Uint32Array(q.data.buffer.slice(q.data.byteOffset,q.data.byteOffset+q.data.byteLength)))}const f=4096*this.numKVHeads*this.headDim*4;this.mtp.kvCache={keys:this.gpu.createBuffer("mtp_kv_keys",f,s),values:this.gpu.createBuffer("mtp_kv_values",f,s)},this.mtp.concatBuf=this.gpu.createBuffer("mtp_concat",e*2*4,s),this.mtp.fcOut=this.gpu.createBuffer("mtp_fc_out",e*4,s),this.mtp.savedHidden=this.gpu.createBuffer("mtp_saved_hidden",e*4,s),this.mtp.savedMlpOut=this.gpu.createBuffer("mtp_saved_mlp_out",e*4,s),this.mtp.seqLen=0,this._buildMTPSplitQKNormUniform(),this._buildMTPFusedMLPUniform(),await this._buildTrimmedLmHead(8e3);const p=((performance.now()-i)/1e3).toFixed(1),w=h.length;console.log(`[MTP] Initialized in ${p}s: ${w} INT4 weights, 1 KV cache layer`)}_buildMTPSplitQKNormUniform(){const a=this.numHeads,e=this.numKVHeads,t=this.headDim,s=a*t,i=e*t,n=(s+i)/2,u=32+Math.ceil(n/4)*16,o=new ArrayBuffer(u),h=new DataView(o);h.setUint32(0,a,!0),h.setUint32(4,e,!0),h.setUint32(8,t,!0),h.setFloat32(12,this.rmsEps,!0),h.setUint32(16,0,!0),h.setUint32(20,0,!0),h.setUint32(24,0,!0),h.setUint32(28,0,!0);const c=this.mtp.normRaw["mtp.layers.0.self_attn.q_norm.weight"],m=this.mtp.normRaw["mtp.layers.0.self_attn.k_norm.weight"];if(c)for(let p=0;p<s/2;p++){const w=Math.floor(p/4),B=p%4;h.setUint32(32+w*16+B*4,c[p],!0)}if(m){const p=s/2;for(let w=0;w<i/2;w++){const B=p+w,q=Math.floor(B/4),_=B%4;h.setUint32(32+q*16+_*4,m[w],!0)}}const f=this.gpu.device.createBuffer({size:u,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"mtp_fused_sqk"});this.gpu.device.queue.writeBuffer(f,0,new Uint8Array(o)),this.mtp.fusedSQKParams=f}_buildMTPFusedMLPUniform(){}async _buildTrimmedLmHead(a=8e3){const e=this.hiddenSize,t=this.gpu,s=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,i=new Uint32Array(a);for(let y=0;y<a;y++)i[y]=y;this.mtp.trimmedToFull=i,this.mtp.trimmedVocabSize=a;const n=t.createBufferFromData("mtp_trim_indices",i),u=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight",o=this.weights[u],h=e/2,c=a*h*4,m=t.createBuffer("mtp_trim_gathered",c,s),f=(await A(async()=>{const{SHADERS:y}=await import("./gpu-ops-BbLjsC0p.js").then(U=>U.b);return{SHADERS:y}},[])).SHADERS.gather_rows_bf16,p=t.getOrCreatePipeline("gather_rows_bf16",f),w=t.createBufferFromData("mtp_trim_gparams",new Uint32Array([h,a]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),B=t.createBindGroup(p,0,[o,n,m,w]),q=t.device.createCommandEncoder(),_=q.beginComputePass();_.setPipeline(p),_.setBindGroup(0,B),_.dispatchWorkgroups(Math.ceil(h/256),a),_.end(),t.device.queue.submit([q.finish()]);const{qweight:l,scales:d}=await this._quantizeBF16Weight(m,e,a,"mtp_trim_lmhead");this.mtp.trimmedLmHead={qweight:l,scales:d},this.mtp.trimmedLogits=t.createBuffer("mtp_trimmed_logits",a*4,s),m.destroy(),n.destroy(),w.destroy(),console.log(`[MTP] Trimmed lm_head: ${a} tokens, ${(l.size/1024/1024).toFixed(1)}MB qw + ${(d.size/1024/1024).toFixed(1)}MB sc`)}_dispatchTrimmedLmHead(){const a=this.hiddenSize,e=this.mtp.trimmedVocabSize,t=this.mtp.trimmedLmHead,i=a/this.groupSize%4===0,n=this.makeUniform(`mv_${a}_${e}`,[a,e,this.groupSize]);if(i){const r=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";this.runCached("mtp_trim_lmhead",r,[this.normed,t.qweight,t.scales,this.mtp.trimmedLogits,n],this.wg8(e))}else{const r=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";this.runCached("mtp_trim_lmhead",r,[this.normed,t.qweight,t.scales,this.mtp.trimmedLogits,n],this.wg4(e))}}_mtpGetQWeight(a){return this.mtp.qweights[a]}_mtpGptqMatvec(a,e,t,s,i){const n=this._mtpGetQWeight(t);if(!n)throw new Error(`MTP weight not found: ${t}`);const u=s/this.groupSize%4===0,o=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);if(u){const h=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";this.runCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,o],this.wg8(i))}else{const h=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";this.runCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,o],this.wg4(i))}}_mtpSaveHidden(a,e){this.mtp&&(this.gpu.copyBuffer(a,this.mtp.savedHidden,this.hiddenSize*4),this.gpu.copyBuffer(e,this.mtp.savedMlpOut,this.hiddenSize*4))}async mtpForward(a){if(!this.mtp)throw new Error("MTP not initialized. Call initMTP() first.");return this.mtpForwardSubmit(a),await this._readAndSample()}mtpForwardSubmit(a){const e=this.hiddenSize,t=this.intermediateSize,s=this.mtp.seqLen;this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embedding(a);const i=this.makeUniform("add_rmsnorm_params",[e,this.rmsEps]);this.runCached("mtp_emb_norm","rmsnorm",[this.hidden,this.mtp.weights["mtp.pre_fc_norm_embedding.weight"],this.normed,i],1),this.gpu.copyBuffer(this.normed,this.mtp.concatBuf,e*4),this.runCached("mtp_hid_norm","add_rmsnorm_ro",[this.mtp.savedHidden,this.mtp.savedMlpOut,this.mtp.weights["mtp.pre_fc_norm_hidden.weight"],this.normed,i],1),this.gpu.copyBuffer(this.normed,this.mtp.concatBuf,e*4,0,e*4),this._mtpGptqMatvec(this.mtp.concatBuf,this.mtp.fcOut,"mtp.fc.weight",e*2,e),this.runCached("mtp_input_norm","rmsnorm",[this.mtp.fcOut,this.mtp.weights["mtp.layers.0.input_layernorm.weight"],this.normed,i],1);const n="mtp.layers.0.self_attn",r=this._mtpGptqMatvecOp(this.normed,this.qProjFull,`${n}.q_proj.weight`,e,this.numHeads*this.headDim*2),u=this._mtpGptqMatvecOp(this.normed,this.kProj,`${n}.k_proj.weight`,e,this.numKVHeads*this.headDim),o=this._mtpGptqMatvecOp(this.normed,this.vProj,`${n}.v_proj.weight`,e,this.numKVHeads*this.headDim);this.gpu.dispatchMulti([r,u,o].filter(Boolean)),this._gqaDv.setUint32(0,s,!0),this._gqaDv.setUint32(4,s,!0),this._gqaDv.setUint32(8,s,!0),this._gqaDv.setUint32(12,s,!0),this.gpu.device.queue.writeBuffer(this.mtp.fusedSQKParams,16,this._gqaData,0,16),this.runCached("mtp_sqk",this._splitQKNormShaderKey,[this.qProjFull,this.kProj,this.vProj,this.qProj,this.qGate,this.mtp.kvCache.keys,this.mtp.kvCache.values,this.mtp.fusedSQKParams],this.numHeads+this.numKVHeads);const h=s+1;this._gqaDv.setUint32(0,h,!0),this._gqaDv.setUint32(4,this.headDim,!0),this._gqaDv.setUint32(8,this.numKVHeads,!0),this._gqaDv.setUint32(12,this.numHeads,!0),this._gqaDv.setUint32(16,this.numHeads/this.numKVHeads,!0),this._gqaDv.setUint32(20,this.partialDim,!0),this._gqaDv.setUint32(24,s,!0),this._gqaDv.setFloat32(28,this.ropeTheta,!0),this.gpu.device.queue.writeBuffer(this._gqaParamBuf,0,this._gqaData),this.runCached("mtp_gqa","gqa_attention_head",[this.qProj,this.mtp.kvCache.keys,this.mtp.kvCache.values,this.attnOut,this._gqaParamBuf],this.numHeads);const c=this._mtpGetQWeight(`${n}.o_proj.weight`),m=this.numHeads*this.headDim,f=this.makeUniform(`fused_sig_mv_${m}_${e}`,[m,e,this.groupSize]);m/this.groupSize%4===0&&this.gpu.hasF16?this.runCached("mtp_sig_oproj","fused_sigmoid_gptq_4t_f16",[this.attnOut,this.qGate,c.qweight,c.scales,this.qProj,f],this.wg8(e)):this.gpu.hasF16?this.runCached("mtp_sig_oproj","fused_sigmoid_gptq_f16",[this.attnOut,this.qGate,c.qweight,c.scales,this.qProj,f],this.wg4(e)):this.runCached("mtp_sig_oproj","fused_sigmoid_gptq",[this.attnOut,this.qGate,c.qweight,c.scales,this.qProj,f],this.wg4(e)),this.runCached("mtp_post_norm","add_rmsnorm_ro",[this.mtp.fcOut,this.qProj,this.mtp.weights["mtp.layers.0.post_attention_layernorm.weight"],this.normed,i],1);const B=this.gpu.hasF16?"fused_gate_up_silu_f16":"fused_gate_up_silu",q=this.makeUniform(`fused_mv_${e}_${t}`,[e,t,this.groupSize]),_=this.mtp.qweights["mtp.layers.0.mlp.gate_proj.weight"],l=this.mtp.qweights["mtp.layers.0.mlp.up_proj.weight"];this.runCached("mtp_gate_up_silu",B,[this.normed,_.qweight,_.scales,l.qweight,l.scales,this.mlpIntermediate,q],this.wg4(t)),this._mtpGptqMatvec(this.mlpIntermediate,this.mlpOut,"mtp.layers.0.mlp.down_proj.weight",t,e),this.runCached("mtp_final_norm","three_way_add_rmsnorm",[this.mtp.fcOut,this.qProj,this.mlpOut,this.mtp.weights["mtp.norm.weight"],this.hidden,this.normed,i],1),this._dispatchLmHead();const d=this.makeUniform("mtp_argmax_params",[this.vocabSize]);this.runCached("mtp_argmax","argmax",[this.logits,this.argmaxResult,d],1),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8),this.gpu.endBatch(),this.mtp.seqLen++}_mtpSnapshotDeltaNet(){if(!this.mtp._deltaNetSnapshot){const e=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC;this.mtp._deltaNetSnapshot={states:{},convs:{}};for(let t=0;t<this.numLayers;t++)this.layerTypes[t]==="linear_attention"&&(this.mtp._deltaNetSnapshot.states[t]=this.gpu.createBuffer(`mtp_snap_state_${t}`,this.linState[t].size,e),this.mtp._deltaNetSnapshot.convs[t]=this.gpu.createBuffer(`mtp_snap_conv_${t}`,this.linConvHist[t].size,e))}const a=this.mtp._deltaNetSnapshot;a.mtpSeqLen=this.mtp.seqLen;for(const e of Object.keys(a.states))this.gpu.copyBuffer(this.linState[e],a.states[e],this.linState[e].size),this.gpu.copyBuffer(this.linConvHist[e],a.convs[e],this.linConvHist[e].size)}_mtpRestoreDeltaNet(){const a=this.mtp._deltaNetSnapshot;if(a){a.mtpSeqLen!==void 0&&(this.mtp.seqLen=a.mtpSeqLen);for(const e of Object.keys(a.states))this.gpu.copyBuffer(a.states[e],this.linState[e],this.linState[e].size),this.gpu.copyBuffer(a.convs[e],this.linConvHist[e],this.linConvHist[e].size)}}async generateWithMTP(a,e=512,t){if(!this.mtp)throw new Error("MTP not initialized. Call initMTP() first.");this.seqLen=0,this.mtp.seqLen=0,this._recentTokenCount=0;const s=[...a],i=this.textCfg.linear_num_key_heads,n=this.textCfg.linear_key_head_dim,r=this.textCfg.linear_value_head_dim,o=this.linValueHeads/i*r,h=i*(n+n+o);for(let g=0;g<this.numLayers;g++)if(this.layerTypes[g]==="linear_attention"){const P=i*n*o*4,S=3*h*4;this.gpu.device.queue.writeBuffer(this.linState[g],0,new Uint8Array(P)),this.gpu.device.queue.writeBuffer(this.linConvHist[g],0,new Uint8Array(S))}let c=null;for(let g=0;g<a.length;g++)c=await this.forward(a[g],g),this.seqLen=g+1;s.push(c);const m=this.config.eos_token_id??this.textCfg.eos_token_id,f=Array.isArray(m)?m:m!=null?[m]:[248044,248046];if(t?.(c,0)||f.includes(c))return s;let w=1,B=0,q=0,_=c,l=0,d=0;for(;w<e;){const g=performance.now(),P=await this.mtpForward(_);this._mtpSnapshotDeltaNet();const S=this.seqLen,b=await this.forwardB2(_,P,this.seqLen);this.seqLen+=2;const v=b[0],C=b[1];if(v===P){B++,s.push(P),w++;let k=t?.(P,w-1);if(k||f.includes(P)||(s.push(C),w++,k=t?.(C,w-1),k||f.includes(C)))break;_=C}else{q++,this._mtpRestoreDeltaNet(),this.seqLen=S;const k=await this.forward(_,this.seqLen);if(this.seqLen++,s.push(k),w++,t?.(k,w-1)||f.includes(k))break;_=k}const D=performance.now();if(l+=D-g,d++,d%25===0){const k=B/(B+q)*100,O=w/d;console.log(`[MTP @${d}] ${(l/d).toFixed(1)}ms/step, ${(w/(l/1e3)).toFixed(0)} tok/s, accept=${k.toFixed(0)}%, ${O.toFixed(1)} tok/step`)}}const y=B/Math.max(1,B+q)*100;return console.log(`[MTP final] ${(w/((l||1)/1e3)).toFixed(0)} tok/s, accept=${y.toFixed(0)}% (${B}/${B+q}), ${w} tokens`),s}_mtpGptqMatvecOp(a,e,t,s,i){const n=this._mtpGetQWeight(t);if(!n)return null;const u=s/this.groupSize%4===0,o=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);if(u){const h=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";return this.prepOpCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,o],this.wg8(i))}else{const h=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";return this.prepOpCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,o],this.wg4(i))}}}export{N as Qwen35Model};
assets/{test-DZKu3oxu.js → test-BK90_Upb.js} RENAMED
@@ -1,10 +1,10 @@
1
- const __vite__mapDeps=(i,m=__vite__mapDeps,d=(m.f||(m.f=["./qwen35-model-BwnUri7A.js","./gpu-ops-DKsrMEcC.js"])))=>i.map(i=>d[i]);
2
- import{G as nt,S as ct,_ as et}from"./gpu-ops-DKsrMEcC.js";class ot{constructor(){this.gpu=null,this.results=[],this.pipelines={}}async init(){this.gpu=new nt,await this.gpu.init();for(const[e,t]of Object.entries(ct))this.pipelines[e]=this.gpu.getOrCreatePipeline(e,t);return this}async runAll(e){const t=[["GPTQ MatVec",()=>this.testGPTQMatvec()],["GPTQ MatVec (group boundary)",()=>this.testGPTQMatvecGroups()],["GPTQ MatVec (gs=32)",()=>this.testGPTQMatvecGS32()],["GPTQ Split-K",()=>this.testGPTQSplitK()],["GPTQ F16",()=>this.testGPTQF16()],["BF16 MatVec",()=>this.testBF16Matvec()],["RMSNorm",()=>this.testRMSNorm()],["RMSNorm (1+w formula)",()=>this.testRMSNorm1PlusW()],["SiLU * Mul",()=>this.testSiLUMul()],["Vector Add",()=>this.testAdd()],["Embedding (BF16)",()=>this.testEmbedding()],["Argmax",()=>this.testArgmax()],["Argmax (large)",()=>this.testArgmaxLarge()],["Split interleaved",()=>this.testSplit()],["Sigmoid Mul",()=>this.testSigmoidMul()],["Head RMSNorm (1+w)",()=>this.testHeadRMSNorm()],["Head RMSNorm (nogated, w)",()=>this.testHeadRMSNormNogated()],["Causal Conv1d",()=>this.testCausalConv1d()],["KV Cache Store",()=>this.testKVCacheStore()],["GQA Attention (single pos)",()=>this.testGQAAttention()],["DeltaNet Recurrent",()=>this.testDeltaNetRecurrent()],["Add + RMSNorm (fused)",()=>this.testAddRMSNorm()],["Three-way Add + RMSNorm",()=>this.testThreeWayAddRMSNorm()],["GQA RoPE rotate_half",()=>this.testGQARoPERotateHalf()],["Fused sigmoid + GPTQ",()=>this.testFusedSigmoidGPTQ()],["Fused SiLU + GPTQ",()=>this.testFusedSiluGPTQ()],["Fused AddNorm+GateUp+SiLU",()=>this.testFusedAddNormGateUpSiLU()],["Fused Split+QKNorm+KVStore",()=>this.testFusedSplitQKNormKVStore()],["Add+RMSNorm B=2",()=>this.testAddRMSNormB2()],["Add+RMSNorm RO B=2",()=>this.testAddRMSNormROB2()],["Three-way Add+RMSNorm B=2",()=>this.testThreeWayAddRMSNormB2()],["Fused SiLU+GPTQ B=2",()=>this.testFusedSiluGPTQB2()],["Fused Sigmoid+GPTQ B=2",()=>this.testFusedSigmoidGPTQB2()],["Fused GateUp+SiLU B=2",()=>this.testFusedGateUpSiLUB2()],["GPTQ Matmul B=2",()=>this.testGPTQMatmulB2()],["GPTQ Matmul B=2 F16",()=>this.testGPTQMatmulB2F16()],["GPTQ Matmul B=2 4T F16",()=>this.testGPTQMatmulB2_4T_F16()]];for(const[o,s]of t)try{const r=await s();this.results.push({name:o,...r}),e?.({name:o,...r})}catch(r){this.results.push({name:o,pass:!1,error:r.message,maxErr:NaN}),e?.({name:o,pass:!1,error:r.message,maxErr:NaN})}return this.results}destroy(){this.gpu?.destroy()}makeBuffer(e,t,o=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC){return this.gpu.createBufferFromData(e,new Uint8Array(t.buffer,t.byteOffset,t.byteLength),o)}makeF32Buffer(e,t){return this.makeBuffer(e,new Float32Array(t))}makeU32Buffer(e,t){return this.makeBuffer(e,new Uint32Array(t))}makeOutputBuffer(e,t){return this.gpu.createBuffer(e,t*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC)}makeUniform(e){const t=new ArrayBuffer(Math.max(16,Math.ceil(e.length*4/16)*16)),o=new DataView(t);for(let s=0;s<e.length;s++)typeof e[s]=="number"&&!Number.isInteger(e[s])?o.setFloat32(s*4,e[s],!0):o.setUint32(s*4,e[s],!0);return this.gpu.createBufferFromData("uniform",new Uint8Array(t),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}makeUniformTyped(e,t){const o=e.length+t.length,s=new ArrayBuffer(Math.max(16,Math.ceil(o*4/16)*16)),r=new DataView(s);let a=0;for(const d of e)r.setUint32(a,d,!0),a+=4;for(const d of t)r.setFloat32(a,d,!0),a+=4;return this.gpu.createBufferFromData("uniform_typed",new Uint8Array(s),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}dispatch(e,t,o,s=1){const r=this.pipelines[e],a=this.gpu.createBindGroup(r,0,t);this.gpu.dispatch(r,[a],o,s)}async readback(e,t){return this.gpu.readBuffer(e,t*4)}async readbackU32(e,t){const o=this.gpu.createReadbackBuffer("_rb",t*4),s=this.gpu.device.createCommandEncoder();s.copyBufferToBuffer(e,0,o,0,t*4),this.gpu.device.queue.submit([s.finish()]),await o.mapAsync(GPUMapMode.READ);const r=new Uint32Array(o.getMappedRange().slice(0));return o.unmap(),o.destroy(),r}compare(e,t,o=1e-4){let s=0;const r=[];for(let a=0;a<t.length;a++){const d=Math.abs(e[a]-t[a]);s=Math.max(s,d),d>o&&r.push({idx:a,got:e[a],expected:t[a],err:d})}return{pass:r.length===0,maxErr:s,errors:r.slice(0,5)}}cpuF32ToBF16Packed(e){const t=Math.ceil(e.length/2),o=new Uint32Array(t);for(let s=0;s<e.length;s++){const a=new Uint32Array(new Float32Array([e[s]]).buffer)[0]>>>16,d=Math.floor(s/2);s%2===0?o[d]=a:o[d]|=a<<16}return o}cpuF32ToF16Packed(e){const t=Math.ceil(e.length/2),o=new Uint32Array(t);for(let s=0;s<e.length;s++){const r=this.f32ToF16(e[s]),a=Math.floor(s/2);s%2===0?o[a]=r:o[a]|=r<<16}return o}f32ToF16(e){const t=new ArrayBuffer(4);new Float32Array(t)[0]=e;const o=new Uint32Array(t)[0],s=o>>31&1;let r=o>>23&255,a=o&8388607;return r===0?s<<15:r===255?s<<15|31744|(a?512:0):(r=r-127+15,r>=31?s<<15|31744:r<=0?s<<15:s<<15|r<<10|a>>13)}f16ToF32(e){const t=e>>15&1,o=e>>10&31,s=e&1023;return o===0?s===0?t?-0:0:(t?-1:1)*Math.pow(2,-14)*(s/1024):o===31?s?NaN:t?-1/0:1/0:(t?-1:1)*Math.pow(2,o-15)*(1+s/1024)}bf16ToF32(e){const t=e<<16,o=new ArrayBuffer(4);return new Uint32Array(o)[0]=t,new Float32Array(o)[0]}async testGPTQMatvec(){const r=new Float32Array(64);for(let k=0;k<64;k++)r[k]=(k+1)*.1;const a=new Uint32Array(32),d=new Float32Array(256);for(let k=0;k<8;k++)for(let u=0;u<4;u++){let m=0;for(let g=0;g<8;g++){const i=k*8+g,c=(i+u)%15;d[i*4+u]=c-8,m|=c<<g*4}a[k*4+u]=m}const l=new Float32Array(4);for(let k=0;k<4;k++)l[k]=.5+k*.1;const w=this.cpuF32ToF16Packed(l),h=new Float32Array(4);for(let k=0;k<4;k++){let u=0;for(let m=0;m<64;m++){const i=Math.floor(m/64)*4+k,c=w[Math.floor(i/2)]>>i%2*16&65535,_=this.f16ToF32(c);u+=r[m]*d[m*4+k]*_}h[k]=u}const F=this.makeF32Buffer("gptq_input",r),n=this.makeU32Buffer("gptq_qw",a),B=this.makeU32Buffer("gptq_scales",w),f=this.makeOutputBuffer("gptq_out",4),p=this.makeUniform([64,4,64]);this.dispatch("gptq_matvec",[F,n,B,f,p],Math.ceil(4/32));const y=await this.readback(f,4);return this.compare(y,h,.01)}async testGPTQMatvecGroups(){const r=new Float32Array(128);for(let u=0;u<128;u++)r[u]=Math.sin(u*.5)*.5;const a=new Uint32Array(64),d=new Float32Array(512);for(let u=0;u<16;u++)for(let m=0;m<4;m++){let g=0;for(let i=0;i<8;i++){const c=u*8+i,_=(c*3+m*7)%15;d[c*4+m]=_-8,g|=_<<i*4}a[u*4+m]=g}const l=128/64,w=new Float32Array(l*4);for(let u=0;u<l*4;u++)w[u]=.3+u*.15;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let u=0;u<4;u++){let m=0;for(let g=0;g<128;g++){const c=Math.floor(g/64)*4+u,_=h[Math.floor(c/2)]>>c%2*16&65535,b=this.f16ToF32(_);m+=r[g]*d[g*4+u]*b}F[u]=m}const n=this.makeF32Buffer("gptq2_in",r),B=this.makeU32Buffer("gptq2_qw",a),f=this.makeU32Buffer("gptq2_sc",h),p=this.makeOutputBuffer("gptq2_out",4),y=this.makeUniform([128,4,64]);this.dispatch("gptq_matvec",[n,B,f,p,y],Math.ceil(4/32));const k=await this.readback(p,4);return this.compare(k,F,.01)}async testGPTQMatvecGS32(){const r=new Float32Array(128);for(let u=0;u<128;u++)r[u]=Math.sin(u*.5)*.5;const a=new Uint32Array(64),d=new Float32Array(512);for(let u=0;u<16;u++)for(let m=0;m<4;m++){let g=0;for(let i=0;i<8;i++){const c=u*8+i,_=(c*3+m*7)%15;d[c*4+m]=_-8,g|=_<<i*4}a[u*4+m]=g}const l=128/32,w=new Float32Array(l*4);for(let u=0;u<l*4;u++)w[u]=.3+u*.15;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let u=0;u<4;u++){let m=0;for(let g=0;g<128;g++){const c=Math.floor(g/32)*4+u,_=h[Math.floor(c/2)]>>c%2*16&65535,b=this.f16ToF32(_);m+=r[g]*d[g*4+u]*b}F[u]=m}const n=this.makeF32Buffer("gptq32_in",r),B=this.makeU32Buffer("gptq32_qw",a),f=this.makeU32Buffer("gptq32_sc",h),p=this.makeOutputBuffer("gptq32_out",4),y=this.makeUniform([128,4,32]);this.dispatch("gptq_matvec",[n,B,f,p,y],Math.ceil(4/32));const k=await this.readback(p,4);return this.compare(k,F,.01)}async testGPTQSplitK(){const a=new Float32Array(256);for(let i=0;i<256;i++)a[i]=Math.sin(i*.3)*.5;const d=new Uint32Array(1024),l=new Float32Array(256*32);for(let i=0;i<32;i++)for(let c=0;c<32;c++){let _=0;for(let b=0;b<8;b++){const N=i*8+b,x=(N*3+c*7)%15;l[N*32+c]=x-8,_|=x<<b*4}d[i*32+c]=_}const w=256/64,h=new Float32Array(w*32);for(let i=0;i<w;i++)for(let c=0;c<32;c++)h[i*32+c]=.3+(i*32+c)*.02;const F=this.cpuF32ToF16Packed(h),n=new Float32Array(32);for(let i=0;i<32;i++){let c=0;for(let _=0;_<256;_++){const N=Math.floor(_/64)*32+i,x=F[Math.floor(N/2)]>>N%2*16&65535,q=this.f16ToF32(x);c+=a[_]*l[_*32+i]*q}n[i]=c}const B=this.makeF32Buffer("sk_in",a),f=this.makeU32Buffer("sk_qw",d),p=this.makeU32Buffer("sk_sc",F),y=this.makeOutputBuffer("sk_partials",128),k=this.makeOutputBuffer("sk_out",32),u=this.makeUniform([256,32,64,4]),m=this.makeUniform([32,4]);this.dispatch("gptq_splitk",[B,f,p,y,u],Math.ceil(32/32),4),this.dispatch("reduce_splitk",[y,k,m],Math.ceil(32/256));const g=await this.readback(k,32);return this.compare(g,n,.01)}async testGPTQF16(){if(!this.gpu.hasF16||!this.pipelines.gptq_matvec_f16)return{pass:!0,maxErr:0,note:"shader-f16 not available, skipped"};const e=128,t=4,o=64,s=e/8,r=new Float32Array(e);for(let u=0;u<e;u++)r[u]=Math.sin(u*.5)*.5;const a=new Uint32Array(s*t),d=new Float32Array(e*t);for(let u=0;u<s;u++)for(let m=0;m<t;m++){let g=0;for(let i=0;i<8;i++){const c=u*8+i,_=(c+m*3)%15;d[c*t+m]=_-8,g|=_<<i*4}a[u*t+m]=g}const l=e/o,w=new Float32Array(l*t);for(let u=0;u<l;u++)for(let m=0;m<t;m++)w[u*t+m]=.5+u*.1;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(t);for(let u=0;u<t;u++){let m=0;for(let g=0;g<e;g++){const c=Math.floor(g/o)*t+u,_=h[Math.floor(c/2)]>>c%2*16&65535,b=this.f16ToF32(_);m+=r[g]*d[g*t+u]*b}F[u]=m}const n=this.makeF32Buffer("f16_in",r),B=this.makeU32Buffer("f16_qw",a),f=this.makeU32Buffer("f16_sc",h),p=this.makeOutputBuffer("f16_out",t),y=this.makeUniform([e,t,o]);this.dispatch("gptq_matvec_f16",[n,B,f,p,y],Math.ceil(t/32));const k=await this.readback(p,t);return this.compare(k,F,.5)}async testBF16Matvec(){const o=new Float32Array(8);for(let n=0;n<8;n++)o[n]=(n+1)*.2;const s=new Float32Array(32);for(let n=0;n<4;n++)for(let B=0;B<8;B++)s[n*8+B]=Math.cos(n*.5+B*.3);const r=this.cpuF32ToBF16Packed(s),a=new Float32Array(4);for(let n=0;n<4;n++){let B=0;for(let f=0;f<8;f++){const p=n*8+f,y=r[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);B+=o[f]*u}a[n]=B}const d=this.makeF32Buffer("bf16mv_in",o),l=this.makeU32Buffer("bf16mv_w",r),w=this.makeOutputBuffer("bf16mv_out",4),h=this.makeUniform([8,4]);this.dispatch("bf16_matvec",[d,l,w,h],Math.ceil(4/32));const F=await this.readback(w,4);return this.compare(F,a,.001)}async testRMSNorm(){const o=new Float32Array(16);for(let f=0;f<16;f++)o[f]=(f-8)*.3;const s=new Float32Array(16);for(let f=0;f<16;f++)s[f]=Math.sin(f*.4)*.1;const r=this.cpuF32ToBF16Packed(s);let a=0;for(let f=0;f<16;f++)a+=o[f]*o[f];const d=1/Math.sqrt(a/16+1e-6),l=new Float32Array(16);for(let f=0;f<16;f++){const p=f,y=r[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);l[f]=o[f]*d*(1+u)}const w=this.makeF32Buffer("rmsn_in",o),h=this.makeU32Buffer("rmsn_w",r),F=this.makeOutputBuffer("rmsn_out",16),n=this.makeUniformTyped([16],[1e-6]);this.dispatch("rmsnorm",[w,h,F,n],1);const B=await this.readback(F,16);return this.compare(B,l,1e-4)}async testRMSNorm1PlusW(){const o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array(8).fill(0),r=this.cpuF32ToBF16Packed(s);let a=0;for(let f=0;f<8;f++)a+=o[f]*o[f];const d=1/Math.sqrt(a/8+1e-6),l=new Float32Array(8);for(let f=0;f<8;f++)l[f]=o[f]*d;const w=this.makeF32Buffer("rmsn1_in",o),h=this.makeU32Buffer("rmsn1_w",r),F=this.makeOutputBuffer("rmsn1_out",8),n=this.makeUniformTyped([8],[1e-6]);this.dispatch("rmsnorm",[w,h,F,n],1);const B=await this.readback(F,8);return this.compare(B,l,1e-4)}async testSiLUMul(){const t=new Float32Array([0,1,-1,2,-2,.5,-.5,3]),o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array(8);for(let h=0;h<8;h++){const F=t[h]/(1+Math.exp(-t[h]));s[h]=F*o[h]}const r=this.makeF32Buffer("silu_gate",t),a=this.makeF32Buffer("silu_up",o),d=this.makeOutputBuffer("silu_out",8),l=this.makeUniform([8]);this.dispatch("silu_mul",[r,a,d,l],Math.ceil(8/32));const w=await this.readback(d,8);return this.compare(w,s,1e-5)}async testAdd(){const t=new Float32Array([1,2,3,4,5,6,7,8]),o=new Float32Array([.1,.2,.3,.4,.5,.6,.7,.8]),s=new Float32Array(8);for(let w=0;w<8;w++)s[w]=t[w]+o[w];const r=this.makeF32Buffer("add_a",t),a=this.makeF32Buffer("add_b",o),d=this.makeUniform([8]);this.dispatch("add",[r,a,d],Math.ceil(8/32));const l=await this.readback(r,8);return this.compare(l,s,1e-6)}async testEmbedding(){const o=new Float32Array(32);for(let F=0;F<o.length;F++)o[F]=(F+1)*.01;const s=this.cpuF32ToBF16Packed(o),r=2,a=new Float32Array(8);for(let F=0;F<8;F++){const n=r*8+F,B=s[Math.floor(n/2)],f=n%2===0?B&65535:B>>16&65535;a[F]=this.bf16ToF32(f)}const d=this.makeU32Buffer("emb_w",s),l=this.makeOutputBuffer("emb_out",8),w=this.makeUniform([r,8]);this.dispatch("embedding",[d,l,w],Math.ceil(8/256));const h=await this.readback(l,8);return this.compare(h,a,.001)}async testArgmax(){const t=new Float32Array(16);for(let w=0;w<16;w++)t[w]=Math.sin(w*.7)*3;const o=t.indexOf(Math.max(...t)),s=this.makeF32Buffer("argmax_in",t),r=this.makeOutputBuffer("argmax_res",2),a=this.makeUniform([16]);this.dispatch("argmax",[s,r,a],1);const l=(await this.readbackU32(r,2))[0];return{pass:l===o,maxErr:Math.abs(l-o),errors:l!==o?[{idx:0,got:l,expected:o}]:[]}}async testArgmaxLarge(){const t=new Float32Array(1024);for(let d=0;d<1024;d++)t[d]=Math.sin(d*.01)-2;t[773]=99;const o=this.makeF32Buffer("argmax_lg_in",t),s=this.makeOutputBuffer("argmax_lg_res",2),r=this.makeUniform([1024]);this.dispatch("argmax",[o,s,r],1);const a=await this.readbackU32(s,2);return{pass:a[0]===773,maxErr:Math.abs(a[0]-773),errors:a[0]!==773?[{got:a[0],expected:773}]:[]}}async testSplit(){const s=new Float32Array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]),r=new Float32Array([1,2,3,4,9,10,11,12]),a=new Float32Array([5,6,7,8,13,14,15,16]),d=this.makeF32Buffer("split_src",s),l=this.makeOutputBuffer("split_a",8),w=this.makeOutputBuffer("split_b",8),h=this.makeUniform([8,2,4]);this.dispatch("split",[d,l,w,h],Math.ceil(8/32));const F=await this.readback(l,8),n=await this.readback(w,8),B=this.compare(F,r,1e-6),f=this.compare(n,a,1e-6);return{pass:B.pass&&f.pass,maxErr:Math.max(B.maxErr,f.maxErr),errors:[...B.errors,...f.errors]}}async testSigmoidMul(){const t=new Float32Array([1,2,3,4,5,6,7,8]),o=new Float32Array([0,1,-1,2,-2,.5,-.5,3]),s=new Float32Array(8);for(let h=0;h<8;h++)s[h]=t[h]/(1+Math.exp(-o[h]));const r=this.makeF32Buffer("sigmul_x",t),a=this.makeF32Buffer("sigmul_g",o),d=this.makeOutputBuffer("sigmul_out",8),l=this.makeUniform([8]);this.dispatch("sigmoid_mul",[r,a,d,l],Math.ceil(8/32));const w=await this.readback(d,8);return this.compare(w,s,1e-5)}async testHeadRMSNorm(){const s=new Float32Array([1,2,3,4,5,6,7,8]),r=new Float32Array(4);for(let n=0;n<4;n++)r[n]=.1*(n+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(8);for(let n=0;n<2;n++){let B=0;for(let p=0;p<4;p++)B+=s[n*4+p]**2;const f=1/Math.sqrt(B/4+1e-6);for(let p=0;p<4;p++){const y=a[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);d[n*4+p]=s[n*4+p]*f*(1+u)}}const l=this.makeF32Buffer("hrmsn_x",s),w=this.makeU32Buffer("hrmsn_w",a),h=this.makeUniformTyped([2,4],[1e-6]);this.dispatch("head_rmsnorm",[l,w,h],2);const F=await this.readback(l,8);return this.compare(F,d,.001)}async testHeadRMSNormNogated(){const s=new Float32Array([1,2,3,4,5,6,7,8]),r=new Float32Array(4);for(let n=0;n<4;n++)r[n]=.5+.1*n;const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(8);for(let n=0;n<2;n++){let B=0;for(let p=0;p<4;p++)B+=s[n*4+p]**2;const f=1/Math.sqrt(B/4+1e-6);for(let p=0;p<4;p++){const y=a[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);d[n*4+p]=s[n*4+p]*f*u}}const l=this.makeF32Buffer("hrmsng_x",s),w=this.makeU32Buffer("hrmsng_w",a),h=this.makeUniformTyped([2,4],[1e-6]);this.dispatch("head_rmsnorm_nogated",[l,w,h],2);const F=await this.readback(l,8);return this.compare(F,d,.001)}async testCausalConv1d(){const t=new Float32Array([1,2,3,4]),o=new Float32Array(12);for(let y=0;y<o.length;y++)o[y]=(y+1)*.1;const s=new Float32Array(16);for(let y=0;y<s.length;y++)s[y]=.25;const r=this.cpuF32ToBF16Packed(s),a=new Float32Array(4),d=new Float32Array(12);for(let y=0;y<4;y++){const k=o[y],u=o[4+y],m=o[8+y],g=t[y],i=[];for(let _=0;_<4;_++){const b=y*4+_,N=r[Math.floor(b/2)],x=b%2===0?N&65535:N>>16&65535;i.push(this.bf16ToF32(x))}const c=i[0]*k+i[1]*u+i[2]*m+i[3]*g;a[y]=c/(1+Math.exp(-c)),d[y]=u,d[4+y]=m,d[8+y]=g}const l=this.makeF32Buffer("conv_x",t),w=this.makeF32Buffer("conv_hist",o),h=this.makeU32Buffer("conv_w",r),F=this.makeUniform([4]);this.dispatch("causal_conv1d",[l,w,h,F],Math.ceil(4/256));const n=await this.readback(l,4),B=await this.readback(w,12),f=this.compare(n,a,1e-4),p=this.compare(B,d,1e-4);return{pass:f.pass&&p.pass,maxErr:Math.max(f.maxErr,p.maxErr),errors:[...f.errors.map(y=>({...y,note:"conv output"})),...p.errors.map(y=>({...y,note:"history"}))]}}async testKVCacheStore(){const r=new Float32Array(8),a=new Float32Array(8);for(let c=0;c<8;c++)r[c]=c+1,a[c]=(c+1)*10;const l=8*8,w=new Float32Array(l),h=new Float32Array(l),F=new Float32Array(l),n=new Float32Array(l);for(let c=0;c<8;c++)F[24+c]=r[c],n[24+c]=a[c];const B=this.makeF32Buffer("kvs_k",r),f=this.makeF32Buffer("kvs_v",a),p=this.makeF32Buffer("kvs_kc",w),y=this.makeF32Buffer("kvs_vc",h),k=this.makeUniform([3,2,4]);this.dispatch("kv_cache_store",[B,f,p,y,k],Math.ceil(8/256));const u=await this.readback(p,l),m=await this.readback(y,l),g=this.compare(u,F,1e-6),i=this.compare(m,n,1e-6);return{pass:g.pass&&i.pass,maxErr:Math.max(g.maxErr,i.maxErr),errors:[...g.errors,...i.errors]}}async testGQAAttention(){const a=new Float32Array([1,0,0,0]),d=new Float32Array(4);d.set([1,0,0,0]);const l=new Float32Array(4);l.set([.5,.6,.7,.8]);const w=new Float32Array([.5,.6,.7,.8]),h=this.makeF32Buffer("gqa_q",a),F=this.makeF32Buffer("gqa_kc",d),n=this.makeF32Buffer("gqa_vc",l),B=this.makeOutputBuffer("gqa_out",4),f=new ArrayBuffer(32),p=new DataView(f);p.setUint32(0,1,!0),p.setUint32(4,4,!0),p.setUint32(8,1,!0),p.setUint32(12,1,!0),p.setUint32(16,1/1,!0),p.setUint32(20,1,!0);const y=this.gpu.createBufferFromData("gqa_params",new Uint8Array(f),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("gqa_attention_head",[h,F,n,B,y],1);const k=await this.readback(B,4);return this.compare(k,w,1e-4)}async testDeltaNetRecurrent(){const s=[.5,.5],r=[1,0],a=[.8,.2],d=new Float32Array([...s,...r,...a]),l=new Float32Array(4),w=new Float32Array([0]),h=new Float32Array([0]),F=new Float32Array([0]),n=new Float32Array([0]),B=this.cpuF32ToBF16Packed(F),f=this.cpuF32ToBF16Packed(n),p=1/(1+Math.exp(-0)),y=Math.sqrt(s[0]**2+s[1]**2),k=Math.sqrt(r[0]**2+r[1]**2),u=s.map(U=>U/y),m=r.map(U=>U/k),g=[0,0,0,0],i=[0,0],c=a.map((U,S)=>(U-i[S])*p);for(let U=0;U<2;U++)for(let S=0;S<2;S++)g[U*2+S]+=m[U]*c[S];const _=1/Math.sqrt(2),b=new Float32Array(2);for(let U=0;U<2;U++){let S=0;for(let O=0;O<2;O++)S+=g[O*2+U]*u[O]*_;b[U]=S}const N=this.makeF32Buffer("dn_qkv",d),x=this.makeF32Buffer("dn_state",l),q=this.makeOutputBuffer("dn_out",2),K=this.makeF32Buffer("dn_a",w),v=this.makeF32Buffer("dn_b",h),T=this.makeU32Buffer("dn_alog",B),V=this.makeU32Buffer("dn_dtb",f),G=this.makeUniform([1,2,2]);this.dispatch("deltanet_recurrent",[N,x,q,K,v,T,V,G],1);const C=await this.readback(q,2);return this.compare(C,b,.001)}async testAddRMSNorm(){const o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array([.1,-.2,.3,-.4,.5,-.6,.7,-.8]),r=new Float32Array(8);for(let _=0;_<8;_++)r[_]=.1*(_+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(o);for(let _=0;_<8;_++)d[_]+=s[_];let l=0;for(let _=0;_<8;_++)l+=d[_]*d[_];const w=1/Math.sqrt(l/8+1e-6),h=new Float32Array(8);for(let _=0;_<8;_++){const b=a[Math.floor(_/2)]>>_%2*16&65535,N=this.bf16ToF32(b);h[_]=d[_]*w*(1+N)}const F=this.makeF32Buffer("addnorm_h",o),n=this.makeF32Buffer("addnorm_a",s),B=this.makeU32Buffer("addnorm_w",a),f=this.makeOutputBuffer("addnorm_out",8);this.makeUniform([8]);const p=new ArrayBuffer(16),y=new DataView(p);y.setUint32(0,8,!0),y.setFloat32(4,1e-6,!0);const k=this.gpu.createBufferFromData("addnorm_params",new Uint8Array(p),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("add_rmsnorm",[F,n,B,f,k],1);const u=await this.readback(f,8),m=await this.readback(F,8),g=d,i=this.compare(u,h,.001),c=this.compare(m,g,1e-6);return{pass:i.pass&&c.pass,maxErr:Math.max(i.maxErr,c.maxErr),errors:[...i.errors,...c.errors]}}async testThreeWayAddRMSNorm(){const o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array([.1,-.2,.3,-.4,.5,-.6,.7,-.8]),r=new Float32Array([.5,.5,-.5,-.5,1,1,-1,-1]),a=new Float32Array(8);for(let N=0;N<8;N++)a[N]=.1*(N+1);const d=this.cpuF32ToBF16Packed(a),l=new Float32Array(8);for(let N=0;N<8;N++)l[N]=o[N]+s[N]+r[N];let w=0;for(let N=0;N<8;N++)w+=l[N]*l[N];const h=1/Math.sqrt(w/8+1e-6),F=new Float32Array(8);for(let N=0;N<8;N++){const x=d[Math.floor(N/2)]>>N%2*16&65535,q=this.bf16ToF32(x);F[N]=l[N]*h*(1+q)}const n=this.makeF32Buffer("twa_a",o),B=this.makeF32Buffer("twa_b",s),f=this.makeF32Buffer("twa_c",r),p=this.makeU32Buffer("twa_w",d),y=this.makeOutputBuffer("twa_hout",8),k=this.makeOutputBuffer("twa_normed",8),u=new ArrayBuffer(16),m=new DataView(u);m.setUint32(0,8,!0),m.setFloat32(4,1e-6,!0);const g=this.gpu.createBufferFromData("twa_params",new Uint8Array(u),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("three_way_add_rmsnorm",[n,B,f,p,y,k,g],1);const i=await this.readback(y,8),c=await this.readback(k,8),_=this.compare(i,l,1e-6),b=this.compare(c,F,.001);return{pass:_.pass&&b.pass,maxErr:Math.max(_.maxErr,b.maxErr),errors:[..._.errors,...b.errors]}}async testGQARoPERotateHalf(){const h=1/Math.sqrt(256),F=[];for(let D=0;D<32;D++)F.push(1/Math.pow(100,2*D/64));function n(D,M){const H=new Float32Array(D.length);for(let P=0;P<D.length;P++)H[P]=D[P];for(let P=0;P<32;P++){const Q=M*F[P],E=Math.cos(Q),J=Math.sin(Q);H[P]=D[P]*E-D[P+32]*J,H[P+32]=D[P+32]*E+D[P]*J}return H}const B=new Float32Array(256);B[0]=1,B[1]=.5,B[2]=.3,B[32]=.7,B[33]=.2;const f=new Float32Array(256);f[0]=1,f[32]=.5;const p=new Float32Array(256);p[1]=1,p[33]=.3;const y=new Float32Array(256);y[2]=1;const k=new Float32Array(256);k[0]=1;const u=new Float32Array(256);u[0]=0,u[1]=1;const m=new Float32Array(256);m[0]=0,m[2]=1;const g=n(B,2),i=n(f,0),c=n(p,1),_=n(y,2);let b=0,N=0,x=0;for(let D=0;D<256;D++)b+=g[D]*i[D],N+=g[D]*c[D],x+=g[D]*_[D];b*=h,N*=h,x*=h;const q=Math.max(b,N,x),K=Math.exp(b-q),v=Math.exp(N-q),T=Math.exp(x-q),V=K+v+T,G=K/V,C=v/V,U=T/V,S=new Float32Array(256);for(let D=0;D<256;D++)S[D]=G*k[D]+C*u[D]+U*m[D];const O=1*256,Y=new Float32Array(3*O),z=new Float32Array(3*O);Y.set(i,0),Y.set(c,O),Y.set(_,2*O),z.set(k,0),z.set(u,O),z.set(m,2*O);const X=this.makeF32Buffer("rope2_q",g),tt=this.makeF32Buffer("rope2_kc",Y),A=this.makeF32Buffer("rope2_vc",z),R=this.makeOutputBuffer("rope2_out",1*256),L=new ArrayBuffer(32),W=new DataView(L);W.setUint32(0,3,!0),W.setUint32(4,256,!0),W.setUint32(8,1,!0),W.setUint32(12,1,!0),W.setUint32(16,1/1,!0),W.setUint32(20,1,!0);const Z=this.gpu.createBufferFromData("rope2_params",new Uint8Array(L),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("gqa_attention_head",[X,tt,A,R,Z],1);const j=await this.readback(R,1*256);return this.compare(j,S,.001)}async testFusedSigmoidGPTQ(){const r=new Float32Array(64),a=new Float32Array(64);for(let m=0;m<64;m++)r[m]=(m+1)*.1,a[m]=(m-8)*.3;const d=new Uint32Array(32),l=new Float32Array(256);for(let m=0;m<8;m++)for(let g=0;g<4;g++){let i=0;for(let c=0;c<8;c++){const _=m*8+c,b=(_+g)%15;l[_*4+g]=b-8,i|=b<<c*4}d[m*4+g]=i}const w=new Float32Array(4);for(let m=0;m<4;m++)w[m]=.5+m*.1;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let m=0;m<4;m++){let g=0;for(let i=0;i<64;i++){const c=r[i]/(1+Math.exp(-a[i])),b=Math.floor(i/64)*4+m,N=h[Math.floor(b/2)]>>b%2*16&65535,x=this.f16ToF32(N);g+=c*l[i*4+m]*x}F[m]=g}const n=this.makeF32Buffer("fsg_x",r),B=this.makeF32Buffer("fsg_g",a),f=this.makeU32Buffer("fsg_qw",d),p=this.makeU32Buffer("fsg_sc",h),y=this.makeOutputBuffer("fsg_out",4),k=this.makeUniform([64,4,64]);this.dispatch("fused_sigmoid_gptq",[n,B,f,p,y,k],Math.ceil(4/32));const u=await this.readback(y,4);return this.compare(u,F,.01)}async testFusedSiluGPTQ(){const r=new Float32Array(64),a=new Float32Array(64);for(let m=0;m<64;m++)r[m]=(m-8)*.2,a[m]=(m+1)*.15;const d=new Uint32Array(32),l=new Float32Array(256);for(let m=0;m<8;m++)for(let g=0;g<4;g++){let i=0;for(let c=0;c<8;c++){const _=m*8+c,b=(_*2+g)%15;l[_*4+g]=b-8,i|=b<<c*4}d[m*4+g]=i}const w=new Float32Array(4);for(let m=0;m<4;m++)w[m]=.4+m*.2;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let m=0;m<4;m++){let g=0;for(let i=0;i<64;i++){const _=r[i]/(1+Math.exp(-r[i]))*a[i],N=Math.floor(i/64)*4+m,x=h[Math.floor(N/2)]>>N%2*16&65535,q=this.f16ToF32(x);g+=_*l[i*4+m]*q}F[m]=g}const n=this.makeF32Buffer("fslg_a",r),B=this.makeF32Buffer("fslg_b",a),f=this.makeU32Buffer("fslg_qw",d),p=this.makeU32Buffer("fslg_sc",h),y=this.makeOutputBuffer("fslg_out",4),k=this.makeUniform([64,4,64]);this.dispatch("fused_silu_gptq",[n,B,f,p,y,k],Math.ceil(4/32));const u=await this.readback(y,4);return this.compare(u,F,.01)}async testFusedAddNormGateUpSiLU(){const a=new Float32Array(64),d=new Float32Array(64),l=new Float32Array(64),w=new Float32Array(64);for(let A=0;A<64;A++)a[A]=(A+1)*.1,d[A]=(A-5)*.05,l[A]=(A%7-3)*.03,w[A]=(A%3-1)*.1;const h=new Uint32Array(32),F=new Uint32Array(32),n=new Float32Array(256),B=new Float32Array(256);for(let A=0;A<64;A++)for(let R=0;R<4;R++)n[A*4+R]=Math.sin(A*.7+R*1.3)*.5,B[A*4+R]=Math.cos(A*.3+R*.9)*.5;const f=new Uint32Array(4/2),p=new Uint32Array(4/2);for(let A=0;A<4;A++){let R=0,L=0;for(let P=0;P<64;P++)R=Math.max(R,Math.abs(n[P*4+A])),L=Math.max(L,Math.abs(B[P*4+A]));const W=R/7,Z=L/7,j=this.f32ToF16(W),D=this.f32ToF16(Z),M=this.f16ToF32(j),H=this.f16ToF32(D);A%2===0?(f[A/2]=j,p[A/2]=D):(f[(A-1)/2]|=j<<16,p[(A-1)/2]|=D<<16);for(let P=0;P<64;P++){const Q=Math.round(n[P*4+A]/M)+8,E=Math.round(B[P*4+A]/H)+8,J=Math.max(0,Math.min(15,Q)),rt=Math.max(0,Math.min(15,E)),st=Math.floor(P/8),at=P%8*4;h[st*4+A]|=J<<at,F[st*4+A]|=rt<<at}}const y=new Uint32Array(64/2);for(let A=0;A<64;A+=2){const R=this.f32ToBF16(w[A]),L=this.f32ToBF16(w[A+1]);y[A/2]=R|L<<16}const k=new Float32Array(64);let u=0;for(let A=0;A<64;A++)k[A]=a[A]+d[A]+l[A],u+=k[A]*k[A];const m=1/Math.sqrt(u/64+1e-6),g=new Float32Array(64);for(let A=0;A<64;A++){const R=this.unpackBF16(y[Math.floor(A/2)],A%2);g[A]=k[A]*m*(1+R)}const i=new Float32Array(4);for(let A=0;A<4;A++){let R=0,L=0;const W=this.f16ToF32(A%2===0?f[Math.floor(A/2)]&65535:f[Math.floor(A/2)]>>>16),Z=this.f16ToF32(A%2===0?p[Math.floor(A/2)]&65535:p[Math.floor(A/2)]>>>16);for(let j=0;j<64;j++){const D=Math.floor(j/8),M=j%8*4,H=h[D*4+A]>>>M&15,P=F[D*4+A]>>>M&15;R+=g[j]*W*(H-8),L+=g[j]*Z*(P-8)}i[A]=R/(1+Math.exp(-R))*L}const c=k,_=16+640*16,b=new ArrayBuffer(_),N=new DataView(b);N.setUint32(0,64,!0),N.setUint32(4,4,!0),N.setUint32(8,64,!0),N.setFloat32(12,1e-6,!0);for(let A=0;A<64/2;A++){const R=Math.floor(A/4),L=A%4;N.setUint32(16+R*16+L*4,y[A],!0)}const x=this.gpu.createBufferFromData("fang_params",new Uint8Array(b),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),q=new Uint32Array(64);q.set(h,0),q.set(F,32);const K=8/(64/8),v=new Uint32Array(K*4/2*2);v.set(f,0),v.set(p,K*4/2);const T=this.makeF32Buffer("fang_hin",a),V=this.makeF32Buffer("fang_mlpres",d),G=this.makeF32Buffer("fang_attnres",l),C=this.makeU32Buffer("fang_mqw",q),U=this.makeU32Buffer("fang_msc",v),S=this.makeOutputBuffer("fang_hout",64),O=this.makeOutputBuffer("fang_out",4);this.dispatch("fused_addnorm_gate_up_silu",[T,V,G,C,U,S,O,x],Math.max(1,Math.ceil(4/32)));const Y=await this.readback(O,4),z=await this.readback(S,64),X=this.compare(Y,i,.02),tt=this.compare(z,c,1e-6);return{pass:X.pass&&tt.pass,maxErr:Math.max(X.maxErr,tt.maxErr),errors:[...X.errors||[],...tt.errors||[]]}}async testFusedSplitQKNormKVStore(){const l=new Float32Array(32);for(let M=0;M<l.length;M++)l[M]=Math.sin(M*.5+1)*.3;const w=new Float32Array(8),h=new Float32Array(8);for(let M=0;M<8;M++)w[M]=Math.cos(M*.7+2)*.4,h[M]=Math.sin(M*1.1+3)*.2;const F=new Float32Array(16),n=new Float32Array(8);for(let M=0;M<16;M++)F[M]=(M%3-1)*.05;for(let M=0;M<8;M++)n[M]=(M%4-2)*.03;const B=new Uint32Array(16/2);for(let M=0;M<16;M+=2)B[M/2]=this.f32ToBF16(F[M])|this.f32ToBF16(F[M+1])<<16;const f=new Uint32Array(8/2);for(let M=0;M<8;M+=2)f[M/2]=this.f32ToBF16(n[M])|this.f32ToBF16(n[M+1])<<16;const p=new Float32Array(16),y=new Float32Array(16);for(let M=0;M<2;M++){const H=new Float32Array(8);for(let E=0;E<8;E++)H[E]=l[M*8*2+E],y[M*8+E]=l[M*8*2+8+E];let P=0;for(let E=0;E<8;E++)P+=H[E]*H[E];const Q=1/Math.sqrt(P/8+1e-6);for(let E=0;E<8;E++){const J=this.unpackBF16(B[Math.floor((M*8+E)/2)],(M*8+E)%2);p[M*8+E]=H[E]*Q*(1+J)}}const k=new Float32Array(8),u=new Float32Array(32),m=new Float32Array(32);for(let M=0;M<1;M++){let H=0;for(let Q=0;Q<8;Q++)H+=w[M*8+Q]*w[M*8+Q];const P=1/Math.sqrt(H/8+1e-6);for(let Q=0;Q<8;Q++){const E=this.unpackBF16(f[Math.floor((M*8+Q)/2)],(M*8+Q)%2),J=w[M*8+Q]*P*(1+E);k[M*8+Q]=J,u[24+M*8+Q]=J,m[24+M*8+Q]=h[M*8+Q]}}const g=32+320*16,i=new ArrayBuffer(g),c=new DataView(i);c.setUint32(0,2,!0),c.setUint32(4,1,!0),c.setUint32(8,8,!0),c.setFloat32(12,1e-6,!0),c.setUint32(16,3,!0);for(let M=0;M<16/2;M++){const H=Math.floor(M/4),P=M%4;c.setUint32(32+H*16+P*4,B[M],!0)}const _=16/2;for(let M=0;M<8/2;M++){const H=_+M,P=Math.floor(H/4),Q=H%4;c.setUint32(32+P*16+Q*4,f[M],!0)}const b=this.gpu.createBufferFromData("fsqk_params",new Uint8Array(i),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),N=this.makeF32Buffer("fsqk_qpf",l),x=this.makeF32Buffer("fsqk_kp",w),q=this.makeF32Buffer("fsqk_vp",h),K=this.makeOutputBuffer("fsqk_qp",16),v=this.makeOutputBuffer("fsqk_qg",16),T=this.makeOutputBuffer("fsqk_kc",32),V=this.makeOutputBuffer("fsqk_vc",32);this.dispatch("fused_split_qknorm_kvstore",[N,x,q,K,v,T,V,b],3);const G=await this.readback(K,16),C=await this.readback(v,16),U=await this.readback(x,8),S=await this.readback(T,32),O=await this.readback(V,32),Y=this.compare(G,p,1e-5),z=this.compare(C,y,1e-6),X=this.compare(U,k,1e-5),tt=S.slice(24,32),A=O.slice(24,32),R=u.slice(24,32),L=m.slice(24,32),W=this.compare(tt,R,1e-5),Z=this.compare(A,L,1e-6),j=Math.max(Y.maxErr,z.maxErr,X.maxErr,W.maxErr,Z.maxErr);return{pass:Y.pass&&z.pass&&X.pass&&W.pass&&Z.pass,maxErr:j,errors:[...Y.errors||[],...z.errors||[],...X.errors||[],...W.errors||[],...Z.errors||[]]}}async benchmarkGPTQ(e){const t=[{K:2048,N:2048,gs:128,label:"2048→2048 (Z/out_proj)"},{K:2048,N:6144,gs:128,label:"2048→6144 (QKV/gate+up)"},{K:6144,N:2048,gs:128,label:"6144→2048 (down_proj)"}],o=[1,2,4,8],s=50,r=3,a=10,d=[];for(const l of t){const{K:w,N:h,gs:F,label:n}=l,B=w/8,f=B/(F/8),p=new Float32Array(w);for(let b=0;b<w;b++)p[b]=Math.random()*2-1;const y=new Uint32Array(B*h);for(let b=0;b<y.length;b++)y[b]=Math.random()*4294967295>>>0;const k=new Float32Array(f*h);for(let b=0;b<k.length;b++)k[b]=Math.random()*.5;const u=this.cpuF32ToF16Packed(k),m=this.makeF32Buffer("bm_in",p),g=this.makeU32Buffer("bm_qw",y),i=this.makeU32Buffer("bm_sc",u),c=[];for(let b=0;b<s;b++)c.push(this.makeOutputBuffer(`bm_out_${b}`,h));const _=B*h*4+f*h*2;for(const b of o){if(b>1&&f%b!==0)continue;let N;if(b===1){const v=this.makeUniform([w,h,F]);for(let G=0;G<r;G++){this.gpu.beginBatch();for(let C=0;C<s;C++)this.dispatch("gptq_matvec",[m,g,i,c[C],v],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone()}const T=[];for(let G=0;G<a;G++){await this.gpu.device.queue.onSubmittedWorkDone();const C=performance.now();this.gpu.beginBatch();for(let U=0;U<s;U++)this.dispatch("gptq_matvec",[m,g,i,c[U],v],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone(),T.push(performance.now()-C)}T.sort((G,C)=>G-C),N=T.slice(1,-1).reduce((G,C)=>G+C,0)/(T.length-2)/s}else{const v=[];for(let U=0;U<s;U++)v.push(this.makeOutputBuffer(`bm_part_${U}`,h*b));const T=this.makeUniform([w,h,F,b]),V=this.makeUniform([h,b]);for(let U=0;U<r;U++){this.gpu.beginBatch();for(let S=0;S<s;S++)this.dispatch("gptq_splitk",[m,g,i,v[S],T],Math.ceil(h/32),b),this.dispatch("reduce_splitk",[v[S],c[S],V],Math.ceil(h/256));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone()}const G=[];for(let U=0;U<a;U++){await this.gpu.device.queue.onSubmittedWorkDone();const S=performance.now();this.gpu.beginBatch();for(let O=0;O<s;O++)this.dispatch("gptq_splitk",[m,g,i,v[O],T],Math.ceil(h/32),b),this.dispatch("reduce_splitk",[v[O],c[O],V],Math.ceil(h/256));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone(),G.push(performance.now()-S)}G.sort((U,S)=>U-S),N=G.slice(1,-1).reduce((U,S)=>U+S,0)/(G.length-2)/s}const x=_/1e9/(N/1e3),q=Math.ceil(h/32)*b,K={label:n,ns:b,wgs:q,avgMs:N.toFixed(4),bwGBs:x.toFixed(1)};d.push(K),e?.(K)}if(this.gpu.hasF16&&this.pipelines.gptq_matvec_f16){const b=this.makeUniform([w,h,F]);for(let T=0;T<r;T++){this.gpu.beginBatch();for(let V=0;V<s;V++)this.dispatch("gptq_matvec_f16",[m,g,i,c[V],b],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone()}const N=[];for(let T=0;T<a;T++){await this.gpu.device.queue.onSubmittedWorkDone();const V=performance.now();this.gpu.beginBatch();for(let G=0;G<s;G++)this.dispatch("gptq_matvec_f16",[m,g,i,c[G],b],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone(),N.push(performance.now()-V)}N.sort((T,V)=>T-V);const q=N.slice(1,-1).reduce((T,V)=>T+V,0)/(N.length-2)/s,K=_/1e9/(q/1e3),v={label:n,ns:"f16",wgs:Math.ceil(h/32),avgMs:q.toFixed(4),bwGBs:K.toFixed(1)};d.push(v),e?.(v)}}return d}_makeNormParams(e,t=1e-6){const o=new ArrayBuffer(16),s=new DataView(o);return s.setUint32(0,e,!0),s.setFloat32(4,t,!0),this.gpu.createBufferFromData("norm_params",new Uint8Array(o),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}_cpuRMSNorm1PlusW(e,t,o,s=1e-6){let r=0;for(let l=0;l<o;l++)r+=e[l]*e[l];const a=1/Math.sqrt(r/o+s),d=new Float32Array(o);for(let l=0;l<o;l++){const w=t[Math.floor(l/2)]>>l%2*16&65535,h=this.bf16ToF32(w);d[l]=e[l]*a*(1+h)}return d}async testAddRMSNormB2(){const o=new Float32Array(32),s=new Float32Array(32);for(let u=0;u<32;u++)o[u]=Math.sin(u*.7)*2,s[u]=Math.cos(u*.3)*.5;const r=new Float32Array(16);for(let u=0;u<16;u++)r[u]=.1*(u+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(32),l=new Float32Array(32);for(let u=0;u<2;u++){const m=new Float32Array(16);for(let i=0;i<16;i++)m[i]=o[u*16+i]+s[u*16+i];for(let i=0;i<16;i++)l[u*16+i]=m[i];const g=this._cpuRMSNorm1PlusW(m,a,16,1e-6);d.set(g,u*16)}const w=this.makeF32Buffer("b2an_h",o),h=this.makeF32Buffer("b2an_a",s),F=this.makeU32Buffer("b2an_w",a),n=this.makeOutputBuffer("b2an_out",32),B=this._makeNormParams(16,1e-6);this.dispatch("add_rmsnorm_b2",[w,h,F,n,B],2);const f=await this.readback(n,32),p=await this.readback(w,32),y=this.compare(f,d,.001),k=this.compare(p,l,1e-6);return{pass:y.pass&&k.pass,maxErr:Math.max(y.maxErr,k.maxErr),errors:[...y.errors,...k.errors]}}async testAddRMSNormROB2(){const o=new Float32Array(32),s=new Float32Array(32);for(let f=0;f<32;f++)o[f]=Math.sin(f*.5)*3,s[f]=Math.cos(f*.8)*.3;const r=new Float32Array(16);for(let f=0;f<16;f++)r[f]=.05*(f+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(32);for(let f=0;f<2;f++){const p=new Float32Array(16);for(let k=0;k<16;k++)p[k]=o[f*16+k]+s[f*16+k];const y=this._cpuRMSNorm1PlusW(p,a,16,1e-6);d.set(y,f*16)}const l=this.makeF32Buffer("b2anro_h",o),w=this.makeF32Buffer("b2anro_a",s),h=this.makeU32Buffer("b2anro_w",a),F=this.makeOutputBuffer("b2anro_out",32),n=this._makeNormParams(16,1e-6);this.dispatch("add_rmsnorm_ro_b2",[l,w,h,F,n],2);const B=await this.readback(F,32);return this.compare(B,d,.001)}async testThreeWayAddRMSNormB2(){const o=new Float32Array(32),s=new Float32Array(32),r=new Float32Array(32);for(let i=0;i<32;i++)o[i]=Math.sin(i*.4)*2,s[i]=Math.cos(i*.6)*.5,r[i]=Math.sin(i*1.1)*.3;const a=new Float32Array(16);for(let i=0;i<16;i++)a[i]=.1*(i+1);const d=this.cpuF32ToBF16Packed(a),l=new Float32Array(32),w=new Float32Array(32);for(let i=0;i<2;i++){const c=new Float32Array(16);for(let b=0;b<16;b++)c[b]=o[i*16+b]+s[i*16+b]+r[i*16+b];l.set(c,i*16);const _=this._cpuRMSNorm1PlusW(c,d,16,1e-6);w.set(_,i*16)}const h=this.makeF32Buffer("b2twa_a",o),F=this.makeF32Buffer("b2twa_b",s),n=this.makeF32Buffer("b2twa_c",r),B=this.makeU32Buffer("b2twa_w",d),f=this.makeOutputBuffer("b2twa_hout",32),p=this.makeOutputBuffer("b2twa_normed",32),y=this._makeNormParams(16,1e-6);this.dispatch("three_way_add_rmsnorm_b2",[h,F,n,B,f,p,y],2);const k=await this.readback(p,32),u=await this.readback(f,32),m=this.compare(k,w,.001),g=this.compare(u,l,1e-6);return{pass:m.pass&&g.pass,maxErr:Math.max(m.maxErr,g.maxErr),errors:[...m.errors,...g.errors]}}_makeGPTQData(e,t,o){const s=e/8,r=new Uint32Array(s*t),a=new Float32Array(e*t);for(let h=0;h<s;h++)for(let F=0;F<t;F++){let n=0;for(let B=0;B<8;B++){const f=h*8+B,p=(f*3+F*7)%15;a[f*t+F]=p-8,n|=p<<B*4}r[h*t+F]=n}const d=e/o,l=new Float32Array(d*t);for(let h=0;h<d*t;h++)l[h]=.3+h*.15;const w=this.cpuF32ToF16Packed(l);return{qweight:r,rawWeights:a,scalesPacked:w}}_cpuGPTQMatvec(e,t,o,s,r,a,d=0){const l=new Float32Array(r);for(let w=0;w<r;w++){let h=0;for(let F=0;F<s;F++){const B=Math.floor(F/a)*r+w,f=o[Math.floor(B/2)]>>B%2*16&65535,p=this.f16ToF32(f);h+=e[d+F]*t[F*r+w]*p}l[w]=h}return l}async testFusedSiluGPTQB2(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const e=128,t=8,o=64,{qweight:s,rawWeights:r,scalesPacked:a}=this._makeGPTQData(e,t,o),d=new Float32Array(2*e),l=new Float32Array(2*e);for(let k=0;k<2*e;k++)d[k]=Math.sin(k*.3)*2,l[k]=Math.cos(k*.5)*1.5;const w=new Float32Array(2*t);for(let k=0;k<2;k++){const u=new Float32Array(e);for(let g=0;g<e;g++){const i=d[k*e+g];u[g]=i/(1+Math.exp(-i))*l[k*e+g]}const m=this._cpuGPTQMatvec(u,r,a,e,t,o);w.set(m,k*t)}const h=this.makeF32Buffer("b2sl_a",d),F=this.makeF32Buffer("b2sl_b",l),n=this.makeU32Buffer("b2sl_qw",s),B=this.makeU32Buffer("b2sl_sc",a),f=this.makeOutputBuffer("b2sl_out",2*t),p=this.makeUniform([e,t,o]);this.dispatch("fused_silu_gptq_b2_f16",[h,F,n,B,f,p],Math.ceil(t/32));const y=await this.readback(f,2*t);return this.compare(y,w,.05)}async testFusedSigmoidGPTQB2(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const e=128,t=8,o=64,{qweight:s,rawWeights:r,scalesPacked:a}=this._makeGPTQData(e,t,o),d=new Float32Array(2*e),l=new Float32Array(2*e);for(let k=0;k<2*e;k++)d[k]=Math.sin(k*.4)*1.5,l[k]=Math.cos(k*.7)*2;const w=new Float32Array(2*t);for(let k=0;k<2;k++){const u=new Float32Array(e);for(let g=0;g<e;g++){const i=1/(1+Math.exp(-l[k*e+g]));u[g]=i*d[k*e+g]}const m=this._cpuGPTQMatvec(u,r,a,e,t,o);w.set(m,k*t)}const h=this.makeF32Buffer("b2sg_x",d),F=this.makeF32Buffer("b2sg_g",l),n=this.makeU32Buffer("b2sg_qw",s),B=this.makeU32Buffer("b2sg_sc",a),f=this.makeOutputBuffer("b2sg_out",2*t),p=this.makeUniform([e,t,o]);this.dispatch("fused_sigmoid_gptq_b2_f16",[h,F,n,B,f,p],Math.ceil(t/32));const y=await this.readback(f,2*t);return this.compare(y,w,.05)}async testFusedGateUpSiLUB2(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const e=128,t=8,o=64,s=e/8,r=new Uint32Array(s*t),a=new Float32Array(e*t);for(let N=0;N<s;N++)for(let x=0;x<t;x++){let q=0;for(let K=0;K<8;K++){const v=N*8+K,T=(v*3+x*7)%15;a[v*t+x]=T-8,q|=T<<K*4}r[N*t+x]=q}const d=e/o,l=new Float32Array(d*t);for(let N=0;N<d*t;N++)l[N]=.01+N%8*.005;const w=this.cpuF32ToF16Packed(l),h=new Uint32Array(s*t),F=new Float32Array(e*t);for(let N=0;N<s;N++)for(let x=0;x<t;x++){let q=0;for(let K=0;K<8;K++){const v=N*8+K,T=(v*5+x*3)%15;F[v*t+x]=T-8,q|=T<<K*4}h[N*t+x]=q}const n=e/o,B=new Float32Array(n*t);for(let N=0;N<n*t;N++)B[N]=.01+N%8*.005;const f=this.cpuF32ToF16Packed(B),p=new Float32Array(2*e);for(let N=0;N<2*e;N++)p[N]=Math.sin(N*.2)*.5;const y=new Float32Array(2*t);for(let N=0;N<2;N++){const x=this._cpuGPTQMatvec(p,a,w,e,t,o,N*e),q=this._cpuGPTQMatvec(p,F,f,e,t,o,N*e);for(let K=0;K<t;K++){const v=x[K];y[N*t+K]=v/(1+Math.exp(-v))*q[K]}}const k=this.makeF32Buffer("b2gus_in",p),u=this.makeU32Buffer("b2gus_gqw",r),m=this.makeU32Buffer("b2gus_gsc",w),g=this.makeU32Buffer("b2gus_uqw",h),i=this.makeU32Buffer("b2gus_usc",f),c=this.makeOutputBuffer("b2gus_out",2*t),_=this.makeUniform([e,t,o]);this.dispatch("fused_gate_up_silu_b2_f16",[k,u,m,g,i,c,_],Math.ceil(t/32));const b=await this.readback(c,2*t);return this.compare(b,y,.05)}_makeGPTQMatmulB2Data(e,t,o){const s=e/8,r=new Float32Array(2*e);for(let n=0;n<e;n++)r[n]=Math.sin(n*.5)*.5;for(let n=0;n<e;n++)r[e+n]=Math.cos(n*.3)*.5;const a=new Uint32Array(s*t),d=new Float32Array(e*t);for(let n=0;n<s;n++)for(let B=0;B<t;B++){let f=0;for(let p=0;p<8;p++){const y=n*8+p,k=(y*3+B*7)%15;d[y*t+B]=k-8,f|=k<<p*4}a[n*t+B]=f}const l=e/o,w=new Float32Array(l*t);for(let n=0;n<l*t;n++)w[n]=.01+n%8*.005;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(2*t);for(let n=0;n<2;n++)for(let B=0;B<t;B++){let f=0;for(let p=0;p<e;p++){const k=Math.floor(p/o)*t+B,u=h[Math.floor(k/2)]>>k%2*16&65535,m=this.f16ToF32(u);f+=r[n*e+p]*d[p*t+B]*m}F[n*t+B]=f}return{input:r,qweight:a,scalesPacked:h,expected:F,K:e,N:t,gs:o}}async testGPTQMatmulB2(){const{input:e,qweight:t,scalesPacked:o,expected:s,K:r,N:a,gs:d}=this._makeGPTQMatmulB2Data(128,8,64),l=this.makeF32Buffer("b2_input",e),w=this.makeU32Buffer("b2_qw",t),h=this.makeU32Buffer("b2_sc",o),F=this.makeOutputBuffer("b2_out",2*a),n=this.makeUniform([r,a,d]);this.dispatch("gptq_matmul_b2",[l,w,h,F,n],Math.ceil(a/32));const B=await this.readback(F,2*a);return this.compare(B,s,.02)}async testGPTQMatmulB2F16(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const{input:e,qweight:t,scalesPacked:o,expected:s,K:r,N:a,gs:d}=this._makeGPTQMatmulB2Data(128,8,64),l=this.makeF32Buffer("b2f16_input",e),w=this.makeU32Buffer("b2f16_qw",t),h=this.makeU32Buffer("b2f16_sc",o),F=this.makeOutputBuffer("b2f16_out",2*a),n=this.makeUniform([r,a,d]);this.dispatch("gptq_matmul_b2_f16",[l,w,h,F,n],Math.ceil(a/32));const B=await this.readback(F,2*a);return this.compare(B,s,.05)}async testGPTQMatmulB2_4T_F16(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const{input:e,qweight:t,scalesPacked:o,expected:s,K:r,N:a,gs:d}=this._makeGPTQMatmulB2Data(256,16,64),l=this.makeF32Buffer("b2_4t_input",e),w=this.makeU32Buffer("b2_4t_qw",t),h=this.makeU32Buffer("b2_4t_sc",o),F=this.makeOutputBuffer("b2_4t_out",2*a),n=this.makeUniform([r,a,d]);this.dispatch("gptq_matmul_b2_4t_f16",[l,w,h,F,n],Math.ceil(a/8));const B=await this.readback(F,2*a);return this.compare(B,s,.05)}f32ToBF16(e){const t=new ArrayBuffer(4);return new Uint32Array(t)[0]=new Uint32Array(new Float32Array([e]).buffer)[0],new Uint32Array(t)[0]>>>16}unpackBF16(e,t){const o=e>>>t*16&65535;return this.bf16ToF32(o)}}function $(I){const e=document.getElementById("log");e.textContent+=I+`
3
  `,e.scrollTop=e.scrollHeight}document.querySelectorAll(".tab").forEach(I=>{I.addEventListener("click",()=>{document.querySelectorAll(".tab").forEach(e=>e.classList.remove("active")),document.querySelectorAll(".panel").forEach(e=>e.classList.remove("active")),I.classList.add("active"),document.getElementById(I.dataset.panel).classList.add("active")})});document.getElementById("runTests").addEventListener("click",async()=>{const I=document.getElementById("runTests"),e=document.getElementById("testResults"),t=document.getElementById("testSummary");I.disabled=!0,e.innerHTML="",t.innerHTML="",$("Initializing WebGPU for tests...");try{const o=new ot;await o.init(),$(`WebGPU initialized. Running tests...
4
- `);let s=0,r=0;await o.runAll(a=>{const d=a.pass?"✓":"✗",l=a.pass?"pass":"fail";a.pass?s++:r++;const w=document.createElement("div");if(w.className="test-row",w.innerHTML=`
5
- <span class="test-icon ${l}">${d}</span>
6
  <span class="test-name">${a.name}</span>
7
- <span class="test-err ${l}">maxErr: ${typeof a.maxErr=="number"?a.maxErr.toExponential(2):"N/A"}</span>
8
  `,!a.pass&&a.errors?.length){const n=document.createElement("div");n.className="test-detail",n.textContent=a.errors.slice(0,3).map(B=>` idx=${B.idx??"?"}: got=${B.got?.toFixed?.(6)??B.got} expected=${B.expected?.toFixed?.(6)??B.expected}${B.note?" ("+B.note+")":""}`).join(`
9
  `),w.appendChild(n)}if(a.error){const n=document.createElement("div");n.className="test-detail",n.textContent=` Error: ${a.error}`,w.appendChild(n)}e.appendChild(w);const h=a.pass?"PASS":"FAIL",F=typeof a.maxErr=="number"?` (maxErr=${a.maxErr.toExponential(2)})`:"";$(`[${h}] ${a.name}${F}${a.error?" — "+a.error:""}`)}),t.innerHTML=`
10
  <div class="summary">
@@ -14,7 +14,7 @@ import{G as nt,S as ct,_ as et}from"./gpu-ops-DKsrMEcC.js";class ot{constructor(
14
  </div>
15
  `,$(`
16
  Done: ${s} passed, ${r} failed`),o.destroy()}catch(o){$(`Error: ${o.message}
17
- ${o.stack}`)}finally{I.disabled=!1}});document.getElementById("loadAndProfile").addEventListener("click",async()=>{const I=document.getElementById("loadAndProfile"),e=document.getElementById("profStatus"),t=document.getElementById("profCategories"),o=document.getElementById("profTopOps");I.disabled=!0,t.innerHTML="",o.innerHTML="";try{const s=document.getElementById("profRepo").value.trim();e.textContent="Loading model...",$("Profiler: loading model from "+s);const{GPUContext:r}=await et(async()=>{const{GPUContext:c}=await import("./gpu-ops-DKsrMEcC.js").then(_=>_.g);return{GPUContext:c}},[],import.meta.url),{Qwen35Model:a}=await et(async()=>{const{Qwen35Model:c}=await import("./qwen35-model-BwnUri7A.js");return{Qwen35Model:c}},__vite__mapDeps([0,1]),import.meta.url),{loadModelWeights:d,loadConfig:l,loadQuantConfig:w}=await et(async()=>{const{loadModelWeights:c,loadConfig:_,loadQuantConfig:b}=await import("./safetensors-loader-CwGm5mJX.js");return{loadModelWeights:c,loadConfig:_,loadQuantConfig:b}},[],import.meta.url),{Profiler:h}=await et(async()=>{const{Profiler:c}=await import("./profiler-DYUyiq-B.js");return{Profiler:c}},[],import.meta.url),F=new r;await F.init(),e.textContent="Fetching config...";const n=await l(s),B=await w(s);e.textContent="Downloading weights (this may take a while)...";const f=await d(s,c=>{if(c.phase==="downloading"){const _=c.total?(c.loaded/c.total*100).toFixed(0):"?";e.textContent=`Downloading: ${c.filesLoaded}/${c.filesTotal} shards (${_}%)`}else c.phase==="parsing"&&(e.textContent=`Parsing ${c.file}...`)});e.textContent="Uploading to GPU...";const p=new a(F,n,B);p.compilePipelines(),p.loadWeights(f),p.initBuffers(),e.textContent="Warming up...",await p.forward(1,0);const y=p.textCfg.linear_num_key_heads,k=p.textCfg.linear_key_head_dim,u=p.textCfg.linear_value_head_dim,m=y*(k+k+u);for(let c=0;c<p.numLayers;c++)p.layerTypes[c]==="linear_attention"&&(F.device.queue.writeBuffer(p.linState[c],0,new ArrayBuffer(y*k*u*4)),F.device.queue.writeBuffer(p.linConvHist[c],0,new ArrayBuffer(3*m*4)));p.seqLen=0,e.textContent="Profiling forward pass...",$("Profiling single forward pass...");const i=await new h(p).profileForward(1,0);e.textContent=`Done! Estimated: ${i.estimatedTokPerSec} tok/s (sync overhead inflates times)`,$(`
18
  Profile complete: ${i.total.toFixed(1)}ms total (~${i.estimatedTokPerSec} tok/s with sync overhead)`),$(`
19
  Category breakdown:`);for(const c of i.categories){const _=document.createElement("div");_.className="prof-row";const b=c.name.includes("matvec")||c.name==="lm_head"?"var(--orange)":c.name.includes("attention")||c.name.includes("deltanet")?"var(--blue)":"var(--green)";_.innerHTML=`
20
  <span class="prof-name">${c.name}</span>
@@ -37,7 +37,7 @@ Top 20 individual operations:`);for(const c of i.topOps){const _=document.create
37
  </div>
38
  </div>
39
  `,o.appendChild(_),$(` ${c.name.padEnd(35)} ${c.time.padStart(8)}ms ${c.pct.padStart(5)}%`)}F.destroy()}catch(s){e.textContent=`Error: ${s.message}`,$(`Profiler error: ${s.message}
40
- ${s.stack}`)}finally{I.disabled=!1}});document.getElementById("runBenchmark").addEventListener("click",async()=>{const I=document.getElementById("runBenchmark"),e=document.getElementById("bmResults");I.disabled=!0,e.innerHTML="<p style='color:var(--dim)'>Initializing WebGPU...</p>",$("Starting GPTQ benchmark...");try{const t=new ot;await t.init(),e.innerHTML="";let o="";const s=await t.benchmarkGPTQ(r=>{if(r.label!==o){o=r.label;const l=document.createElement("h2");l.textContent=r.label,e.appendChild(l)}const a=document.createElement("div");a.className="prof-row";const d=(r.ns===1,"");a.innerHTML=`
41
  <span class="prof-name">splits=${r.ns} (${r.wgs} WGs)</span>
42
  <span class="prof-time">${r.avgMs}ms</span>
43
  <span class="prof-pct">${r.bwGBs} GB/s</span>
 
1
+ const __vite__mapDeps=(i,m=__vite__mapDeps,d=(m.f||(m.f=["assets/qwen35-model-DrnSsmhP.js","assets/gpu-ops-BbLjsC0p.js"])))=>i.map(i=>d[i]);
2
+ import{G as nt,S as ct,_ as et}from"./gpu-ops-BbLjsC0p.js";class ot{constructor(){this.gpu=null,this.results=[],this.pipelines={}}async init(){this.gpu=new nt,await this.gpu.init();for(const[e,t]of Object.entries(ct))this.pipelines[e]=this.gpu.getOrCreatePipeline(e,t);return this}async runAll(e){const t=[["GPTQ MatVec",()=>this.testGPTQMatvec()],["GPTQ MatVec (group boundary)",()=>this.testGPTQMatvecGroups()],["GPTQ MatVec (gs=32)",()=>this.testGPTQMatvecGS32()],["GPTQ Split-K",()=>this.testGPTQSplitK()],["GPTQ F16",()=>this.testGPTQF16()],["BF16 MatVec",()=>this.testBF16Matvec()],["RMSNorm",()=>this.testRMSNorm()],["RMSNorm (1+w formula)",()=>this.testRMSNorm1PlusW()],["SiLU * Mul",()=>this.testSiLUMul()],["Vector Add",()=>this.testAdd()],["Embedding (BF16)",()=>this.testEmbedding()],["Argmax",()=>this.testArgmax()],["Argmax (large)",()=>this.testArgmaxLarge()],["Split interleaved",()=>this.testSplit()],["Sigmoid Mul",()=>this.testSigmoidMul()],["Head RMSNorm (1+w)",()=>this.testHeadRMSNorm()],["Head RMSNorm (nogated, w)",()=>this.testHeadRMSNormNogated()],["Causal Conv1d",()=>this.testCausalConv1d()],["KV Cache Store",()=>this.testKVCacheStore()],["GQA Attention (single pos)",()=>this.testGQAAttention()],["DeltaNet Recurrent",()=>this.testDeltaNetRecurrent()],["Add + RMSNorm (fused)",()=>this.testAddRMSNorm()],["Three-way Add + RMSNorm",()=>this.testThreeWayAddRMSNorm()],["GQA RoPE rotate_half",()=>this.testGQARoPERotateHalf()],["Fused sigmoid + GPTQ",()=>this.testFusedSigmoidGPTQ()],["Fused SiLU + GPTQ",()=>this.testFusedSiluGPTQ()],["Fused AddNorm+GateUp+SiLU",()=>this.testFusedAddNormGateUpSiLU()],["Fused Split+QKNorm+KVStore",()=>this.testFusedSplitQKNormKVStore()],["Add+RMSNorm B=2",()=>this.testAddRMSNormB2()],["Add+RMSNorm RO B=2",()=>this.testAddRMSNormROB2()],["Three-way Add+RMSNorm B=2",()=>this.testThreeWayAddRMSNormB2()],["Fused SiLU+GPTQ B=2",()=>this.testFusedSiluGPTQB2()],["Fused Sigmoid+GPTQ B=2",()=>this.testFusedSigmoidGPTQB2()],["Fused GateUp+SiLU B=2",()=>this.testFusedGateUpSiLUB2()],["GPTQ Matmul B=2",()=>this.testGPTQMatmulB2()],["GPTQ Matmul B=2 F16",()=>this.testGPTQMatmulB2F16()],["GPTQ Matmul B=2 4T F16",()=>this.testGPTQMatmulB2_4T_F16()]];for(const[o,s]of t)try{const r=await s();this.results.push({name:o,...r}),e?.({name:o,...r})}catch(r){this.results.push({name:o,pass:!1,error:r.message,maxErr:NaN}),e?.({name:o,pass:!1,error:r.message,maxErr:NaN})}return this.results}destroy(){this.gpu?.destroy()}makeBuffer(e,t,o=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC){return this.gpu.createBufferFromData(e,new Uint8Array(t.buffer,t.byteOffset,t.byteLength),o)}makeF32Buffer(e,t){return this.makeBuffer(e,new Float32Array(t))}makeU32Buffer(e,t){return this.makeBuffer(e,new Uint32Array(t))}makeOutputBuffer(e,t){return this.gpu.createBuffer(e,t*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC)}makeUniform(e){const t=new ArrayBuffer(Math.max(16,Math.ceil(e.length*4/16)*16)),o=new DataView(t);for(let s=0;s<e.length;s++)typeof e[s]=="number"&&!Number.isInteger(e[s])?o.setFloat32(s*4,e[s],!0):o.setUint32(s*4,e[s],!0);return this.gpu.createBufferFromData("uniform",new Uint8Array(t),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}makeUniformTyped(e,t){const o=e.length+t.length,s=new ArrayBuffer(Math.max(16,Math.ceil(o*4/16)*16)),r=new DataView(s);let a=0;for(const d of e)r.setUint32(a,d,!0),a+=4;for(const d of t)r.setFloat32(a,d,!0),a+=4;return this.gpu.createBufferFromData("uniform_typed",new Uint8Array(s),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}dispatch(e,t,o,s=1){const r=this.pipelines[e],a=this.gpu.createBindGroup(r,0,t);this.gpu.dispatch(r,[a],o,s)}async readback(e,t){return this.gpu.readBuffer(e,t*4)}async readbackU32(e,t){const o=this.gpu.createReadbackBuffer("_rb",t*4),s=this.gpu.device.createCommandEncoder();s.copyBufferToBuffer(e,0,o,0,t*4),this.gpu.device.queue.submit([s.finish()]),await o.mapAsync(GPUMapMode.READ);const r=new Uint32Array(o.getMappedRange().slice(0));return o.unmap(),o.destroy(),r}compare(e,t,o=1e-4){let s=0;const r=[];for(let a=0;a<t.length;a++){const d=Math.abs(e[a]-t[a]);s=Math.max(s,d),d>o&&r.push({idx:a,got:e[a],expected:t[a],err:d})}return{pass:r.length===0,maxErr:s,errors:r.slice(0,5)}}cpuF32ToBF16Packed(e){const t=Math.ceil(e.length/2),o=new Uint32Array(t);for(let s=0;s<e.length;s++){const a=new Uint32Array(new Float32Array([e[s]]).buffer)[0]>>>16,d=Math.floor(s/2);s%2===0?o[d]=a:o[d]|=a<<16}return o}cpuF32ToF16Packed(e){const t=Math.ceil(e.length/2),o=new Uint32Array(t);for(let s=0;s<e.length;s++){const r=this.f32ToF16(e[s]),a=Math.floor(s/2);s%2===0?o[a]=r:o[a]|=r<<16}return o}f32ToF16(e){const t=new ArrayBuffer(4);new Float32Array(t)[0]=e;const o=new Uint32Array(t)[0],s=o>>31&1;let r=o>>23&255,a=o&8388607;return r===0?s<<15:r===255?s<<15|31744|(a?512:0):(r=r-127+15,r>=31?s<<15|31744:r<=0?s<<15:s<<15|r<<10|a>>13)}f16ToF32(e){const t=e>>15&1,o=e>>10&31,s=e&1023;return o===0?s===0?t?-0:0:(t?-1:1)*Math.pow(2,-14)*(s/1024):o===31?s?NaN:t?-1/0:1/0:(t?-1:1)*Math.pow(2,o-15)*(1+s/1024)}bf16ToF32(e){const t=e<<16,o=new ArrayBuffer(4);return new Uint32Array(o)[0]=t,new Float32Array(o)[0]}async testGPTQMatvec(){const r=new Float32Array(64);for(let k=0;k<64;k++)r[k]=(k+1)*.1;const a=new Uint32Array(32),d=new Float32Array(256);for(let k=0;k<8;k++)for(let u=0;u<4;u++){let m=0;for(let g=0;g<8;g++){const i=k*8+g,c=(i+u)%15;d[i*4+u]=c-8,m|=c<<g*4}a[k*4+u]=m}const f=new Float32Array(4);for(let k=0;k<4;k++)f[k]=.5+k*.1;const w=this.cpuF32ToF16Packed(f),h=new Float32Array(4);for(let k=0;k<4;k++){let u=0;for(let m=0;m<64;m++){const i=Math.floor(m/64)*4+k,c=w[Math.floor(i/2)]>>i%2*16&65535,_=this.f16ToF32(c);u+=r[m]*d[m*4+k]*_}h[k]=u}const F=this.makeF32Buffer("gptq_input",r),n=this.makeU32Buffer("gptq_qw",a),B=this.makeU32Buffer("gptq_scales",w),l=this.makeOutputBuffer("gptq_out",4),p=this.makeUniform([64,4,64]);this.dispatch("gptq_matvec",[F,n,B,l,p],Math.ceil(4/32));const y=await this.readback(l,4);return this.compare(y,h,.01)}async testGPTQMatvecGroups(){const r=new Float32Array(128);for(let u=0;u<128;u++)r[u]=Math.sin(u*.5)*.5;const a=new Uint32Array(64),d=new Float32Array(512);for(let u=0;u<16;u++)for(let m=0;m<4;m++){let g=0;for(let i=0;i<8;i++){const c=u*8+i,_=(c*3+m*7)%15;d[c*4+m]=_-8,g|=_<<i*4}a[u*4+m]=g}const f=128/64,w=new Float32Array(f*4);for(let u=0;u<f*4;u++)w[u]=.3+u*.15;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let u=0;u<4;u++){let m=0;for(let g=0;g<128;g++){const c=Math.floor(g/64)*4+u,_=h[Math.floor(c/2)]>>c%2*16&65535,b=this.f16ToF32(_);m+=r[g]*d[g*4+u]*b}F[u]=m}const n=this.makeF32Buffer("gptq2_in",r),B=this.makeU32Buffer("gptq2_qw",a),l=this.makeU32Buffer("gptq2_sc",h),p=this.makeOutputBuffer("gptq2_out",4),y=this.makeUniform([128,4,64]);this.dispatch("gptq_matvec",[n,B,l,p,y],Math.ceil(4/32));const k=await this.readback(p,4);return this.compare(k,F,.01)}async testGPTQMatvecGS32(){const r=new Float32Array(128);for(let u=0;u<128;u++)r[u]=Math.sin(u*.5)*.5;const a=new Uint32Array(64),d=new Float32Array(512);for(let u=0;u<16;u++)for(let m=0;m<4;m++){let g=0;for(let i=0;i<8;i++){const c=u*8+i,_=(c*3+m*7)%15;d[c*4+m]=_-8,g|=_<<i*4}a[u*4+m]=g}const f=128/32,w=new Float32Array(f*4);for(let u=0;u<f*4;u++)w[u]=.3+u*.15;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let u=0;u<4;u++){let m=0;for(let g=0;g<128;g++){const c=Math.floor(g/32)*4+u,_=h[Math.floor(c/2)]>>c%2*16&65535,b=this.f16ToF32(_);m+=r[g]*d[g*4+u]*b}F[u]=m}const n=this.makeF32Buffer("gptq32_in",r),B=this.makeU32Buffer("gptq32_qw",a),l=this.makeU32Buffer("gptq32_sc",h),p=this.makeOutputBuffer("gptq32_out",4),y=this.makeUniform([128,4,32]);this.dispatch("gptq_matvec",[n,B,l,p,y],Math.ceil(4/32));const k=await this.readback(p,4);return this.compare(k,F,.01)}async testGPTQSplitK(){const a=new Float32Array(256);for(let i=0;i<256;i++)a[i]=Math.sin(i*.3)*.5;const d=new Uint32Array(1024),f=new Float32Array(256*32);for(let i=0;i<32;i++)for(let c=0;c<32;c++){let _=0;for(let b=0;b<8;b++){const N=i*8+b,x=(N*3+c*7)%15;f[N*32+c]=x-8,_|=x<<b*4}d[i*32+c]=_}const w=256/64,h=new Float32Array(w*32);for(let i=0;i<w;i++)for(let c=0;c<32;c++)h[i*32+c]=.3+(i*32+c)*.02;const F=this.cpuF32ToF16Packed(h),n=new Float32Array(32);for(let i=0;i<32;i++){let c=0;for(let _=0;_<256;_++){const N=Math.floor(_/64)*32+i,x=F[Math.floor(N/2)]>>N%2*16&65535,q=this.f16ToF32(x);c+=a[_]*f[_*32+i]*q}n[i]=c}const B=this.makeF32Buffer("sk_in",a),l=this.makeU32Buffer("sk_qw",d),p=this.makeU32Buffer("sk_sc",F),y=this.makeOutputBuffer("sk_partials",128),k=this.makeOutputBuffer("sk_out",32),u=this.makeUniform([256,32,64,4]),m=this.makeUniform([32,4]);this.dispatch("gptq_splitk",[B,l,p,y,u],Math.ceil(32/32),4),this.dispatch("reduce_splitk",[y,k,m],Math.ceil(32/256));const g=await this.readback(k,32);return this.compare(g,n,.01)}async testGPTQF16(){if(!this.gpu.hasF16||!this.pipelines.gptq_matvec_f16)return{pass:!0,maxErr:0,note:"shader-f16 not available, skipped"};const e=128,t=4,o=64,s=e/8,r=new Float32Array(e);for(let u=0;u<e;u++)r[u]=Math.sin(u*.5)*.5;const a=new Uint32Array(s*t),d=new Float32Array(e*t);for(let u=0;u<s;u++)for(let m=0;m<t;m++){let g=0;for(let i=0;i<8;i++){const c=u*8+i,_=(c+m*3)%15;d[c*t+m]=_-8,g|=_<<i*4}a[u*t+m]=g}const f=e/o,w=new Float32Array(f*t);for(let u=0;u<f;u++)for(let m=0;m<t;m++)w[u*t+m]=.5+u*.1;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(t);for(let u=0;u<t;u++){let m=0;for(let g=0;g<e;g++){const c=Math.floor(g/o)*t+u,_=h[Math.floor(c/2)]>>c%2*16&65535,b=this.f16ToF32(_);m+=r[g]*d[g*t+u]*b}F[u]=m}const n=this.makeF32Buffer("f16_in",r),B=this.makeU32Buffer("f16_qw",a),l=this.makeU32Buffer("f16_sc",h),p=this.makeOutputBuffer("f16_out",t),y=this.makeUniform([e,t,o]);this.dispatch("gptq_matvec_f16",[n,B,l,p,y],Math.ceil(t/32));const k=await this.readback(p,t);return this.compare(k,F,.5)}async testBF16Matvec(){const o=new Float32Array(8);for(let n=0;n<8;n++)o[n]=(n+1)*.2;const s=new Float32Array(32);for(let n=0;n<4;n++)for(let B=0;B<8;B++)s[n*8+B]=Math.cos(n*.5+B*.3);const r=this.cpuF32ToBF16Packed(s),a=new Float32Array(4);for(let n=0;n<4;n++){let B=0;for(let l=0;l<8;l++){const p=n*8+l,y=r[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);B+=o[l]*u}a[n]=B}const d=this.makeF32Buffer("bf16mv_in",o),f=this.makeU32Buffer("bf16mv_w",r),w=this.makeOutputBuffer("bf16mv_out",4),h=this.makeUniform([8,4]);this.dispatch("bf16_matvec",[d,f,w,h],Math.ceil(4/32));const F=await this.readback(w,4);return this.compare(F,a,.001)}async testRMSNorm(){const o=new Float32Array(16);for(let l=0;l<16;l++)o[l]=(l-8)*.3;const s=new Float32Array(16);for(let l=0;l<16;l++)s[l]=Math.sin(l*.4)*.1;const r=this.cpuF32ToBF16Packed(s);let a=0;for(let l=0;l<16;l++)a+=o[l]*o[l];const d=1/Math.sqrt(a/16+1e-6),f=new Float32Array(16);for(let l=0;l<16;l++){const p=l,y=r[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);f[l]=o[l]*d*(1+u)}const w=this.makeF32Buffer("rmsn_in",o),h=this.makeU32Buffer("rmsn_w",r),F=this.makeOutputBuffer("rmsn_out",16),n=this.makeUniformTyped([16],[1e-6]);this.dispatch("rmsnorm",[w,h,F,n],1);const B=await this.readback(F,16);return this.compare(B,f,1e-4)}async testRMSNorm1PlusW(){const o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array(8).fill(0),r=this.cpuF32ToBF16Packed(s);let a=0;for(let l=0;l<8;l++)a+=o[l]*o[l];const d=1/Math.sqrt(a/8+1e-6),f=new Float32Array(8);for(let l=0;l<8;l++)f[l]=o[l]*d;const w=this.makeF32Buffer("rmsn1_in",o),h=this.makeU32Buffer("rmsn1_w",r),F=this.makeOutputBuffer("rmsn1_out",8),n=this.makeUniformTyped([8],[1e-6]);this.dispatch("rmsnorm",[w,h,F,n],1);const B=await this.readback(F,8);return this.compare(B,f,1e-4)}async testSiLUMul(){const t=new Float32Array([0,1,-1,2,-2,.5,-.5,3]),o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array(8);for(let h=0;h<8;h++){const F=t[h]/(1+Math.exp(-t[h]));s[h]=F*o[h]}const r=this.makeF32Buffer("silu_gate",t),a=this.makeF32Buffer("silu_up",o),d=this.makeOutputBuffer("silu_out",8),f=this.makeUniform([8]);this.dispatch("silu_mul",[r,a,d,f],Math.ceil(8/32));const w=await this.readback(d,8);return this.compare(w,s,1e-5)}async testAdd(){const t=new Float32Array([1,2,3,4,5,6,7,8]),o=new Float32Array([.1,.2,.3,.4,.5,.6,.7,.8]),s=new Float32Array(8);for(let w=0;w<8;w++)s[w]=t[w]+o[w];const r=this.makeF32Buffer("add_a",t),a=this.makeF32Buffer("add_b",o),d=this.makeUniform([8]);this.dispatch("add",[r,a,d],Math.ceil(8/32));const f=await this.readback(r,8);return this.compare(f,s,1e-6)}async testEmbedding(){const o=new Float32Array(32);for(let F=0;F<o.length;F++)o[F]=(F+1)*.01;const s=this.cpuF32ToBF16Packed(o),r=2,a=new Float32Array(8);for(let F=0;F<8;F++){const n=r*8+F,B=s[Math.floor(n/2)],l=n%2===0?B&65535:B>>16&65535;a[F]=this.bf16ToF32(l)}const d=this.makeU32Buffer("emb_w",s),f=this.makeOutputBuffer("emb_out",8),w=this.makeUniform([r,8]);this.dispatch("embedding",[d,f,w],Math.ceil(8/256));const h=await this.readback(f,8);return this.compare(h,a,.001)}async testArgmax(){const t=new Float32Array(16);for(let w=0;w<16;w++)t[w]=Math.sin(w*.7)*3;const o=t.indexOf(Math.max(...t)),s=this.makeF32Buffer("argmax_in",t),r=this.makeOutputBuffer("argmax_res",2),a=this.makeUniform([16]);this.dispatch("argmax",[s,r,a],1);const f=(await this.readbackU32(r,2))[0];return{pass:f===o,maxErr:Math.abs(f-o),errors:f!==o?[{idx:0,got:f,expected:o}]:[]}}async testArgmaxLarge(){const t=new Float32Array(1024);for(let d=0;d<1024;d++)t[d]=Math.sin(d*.01)-2;t[773]=99;const o=this.makeF32Buffer("argmax_lg_in",t),s=this.makeOutputBuffer("argmax_lg_res",2),r=this.makeUniform([1024]);this.dispatch("argmax",[o,s,r],1);const a=await this.readbackU32(s,2);return{pass:a[0]===773,maxErr:Math.abs(a[0]-773),errors:a[0]!==773?[{got:a[0],expected:773}]:[]}}async testSplit(){const s=new Float32Array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]),r=new Float32Array([1,2,3,4,9,10,11,12]),a=new Float32Array([5,6,7,8,13,14,15,16]),d=this.makeF32Buffer("split_src",s),f=this.makeOutputBuffer("split_a",8),w=this.makeOutputBuffer("split_b",8),h=this.makeUniform([8,2,4]);this.dispatch("split",[d,f,w,h],Math.ceil(8/32));const F=await this.readback(f,8),n=await this.readback(w,8),B=this.compare(F,r,1e-6),l=this.compare(n,a,1e-6);return{pass:B.pass&&l.pass,maxErr:Math.max(B.maxErr,l.maxErr),errors:[...B.errors,...l.errors]}}async testSigmoidMul(){const t=new Float32Array([1,2,3,4,5,6,7,8]),o=new Float32Array([0,1,-1,2,-2,.5,-.5,3]),s=new Float32Array(8);for(let h=0;h<8;h++)s[h]=t[h]/(1+Math.exp(-o[h]));const r=this.makeF32Buffer("sigmul_x",t),a=this.makeF32Buffer("sigmul_g",o),d=this.makeOutputBuffer("sigmul_out",8),f=this.makeUniform([8]);this.dispatch("sigmoid_mul",[r,a,d,f],Math.ceil(8/32));const w=await this.readback(d,8);return this.compare(w,s,1e-5)}async testHeadRMSNorm(){const s=new Float32Array([1,2,3,4,5,6,7,8]),r=new Float32Array(4);for(let n=0;n<4;n++)r[n]=.1*(n+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(8);for(let n=0;n<2;n++){let B=0;for(let p=0;p<4;p++)B+=s[n*4+p]**2;const l=1/Math.sqrt(B/4+1e-6);for(let p=0;p<4;p++){const y=a[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);d[n*4+p]=s[n*4+p]*l*(1+u)}}const f=this.makeF32Buffer("hrmsn_x",s),w=this.makeU32Buffer("hrmsn_w",a),h=this.makeUniformTyped([2,4],[1e-6]);this.dispatch("head_rmsnorm",[f,w,h],2);const F=await this.readback(f,8);return this.compare(F,d,.001)}async testHeadRMSNormNogated(){const s=new Float32Array([1,2,3,4,5,6,7,8]),r=new Float32Array(4);for(let n=0;n<4;n++)r[n]=.5+.1*n;const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(8);for(let n=0;n<2;n++){let B=0;for(let p=0;p<4;p++)B+=s[n*4+p]**2;const l=1/Math.sqrt(B/4+1e-6);for(let p=0;p<4;p++){const y=a[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);d[n*4+p]=s[n*4+p]*l*u}}const f=this.makeF32Buffer("hrmsng_x",s),w=this.makeU32Buffer("hrmsng_w",a),h=this.makeUniformTyped([2,4],[1e-6]);this.dispatch("head_rmsnorm_nogated",[f,w,h],2);const F=await this.readback(f,8);return this.compare(F,d,.001)}async testCausalConv1d(){const t=new Float32Array([1,2,3,4]),o=new Float32Array(12);for(let y=0;y<o.length;y++)o[y]=(y+1)*.1;const s=new Float32Array(16);for(let y=0;y<s.length;y++)s[y]=.25;const r=this.cpuF32ToBF16Packed(s),a=new Float32Array(4),d=new Float32Array(12);for(let y=0;y<4;y++){const k=o[y],u=o[4+y],m=o[8+y],g=t[y],i=[];for(let _=0;_<4;_++){const b=y*4+_,N=r[Math.floor(b/2)],x=b%2===0?N&65535:N>>16&65535;i.push(this.bf16ToF32(x))}const c=i[0]*k+i[1]*u+i[2]*m+i[3]*g;a[y]=c/(1+Math.exp(-c)),d[y]=u,d[4+y]=m,d[8+y]=g}const f=this.makeF32Buffer("conv_x",t),w=this.makeF32Buffer("conv_hist",o),h=this.makeU32Buffer("conv_w",r),F=this.makeUniform([4]);this.dispatch("causal_conv1d",[f,w,h,F],Math.ceil(4/256));const n=await this.readback(f,4),B=await this.readback(w,12),l=this.compare(n,a,1e-4),p=this.compare(B,d,1e-4);return{pass:l.pass&&p.pass,maxErr:Math.max(l.maxErr,p.maxErr),errors:[...l.errors.map(y=>({...y,note:"conv output"})),...p.errors.map(y=>({...y,note:"history"}))]}}async testKVCacheStore(){const r=new Float32Array(8),a=new Float32Array(8);for(let c=0;c<8;c++)r[c]=c+1,a[c]=(c+1)*10;const f=8*8,w=new Float32Array(f),h=new Float32Array(f),F=new Float32Array(f),n=new Float32Array(f);for(let c=0;c<8;c++)F[24+c]=r[c],n[24+c]=a[c];const B=this.makeF32Buffer("kvs_k",r),l=this.makeF32Buffer("kvs_v",a),p=this.makeF32Buffer("kvs_kc",w),y=this.makeF32Buffer("kvs_vc",h),k=this.makeUniform([3,2,4]);this.dispatch("kv_cache_store",[B,l,p,y,k],Math.ceil(8/256));const u=await this.readback(p,f),m=await this.readback(y,f),g=this.compare(u,F,1e-6),i=this.compare(m,n,1e-6);return{pass:g.pass&&i.pass,maxErr:Math.max(g.maxErr,i.maxErr),errors:[...g.errors,...i.errors]}}async testGQAAttention(){const a=new Float32Array([1,0,0,0]),d=new Float32Array(4);d.set([1,0,0,0]);const f=new Float32Array(4);f.set([.5,.6,.7,.8]);const w=new Float32Array([.5,.6,.7,.8]),h=this.makeF32Buffer("gqa_q",a),F=this.makeF32Buffer("gqa_kc",d),n=this.makeF32Buffer("gqa_vc",f),B=this.makeOutputBuffer("gqa_out",4),l=new ArrayBuffer(32),p=new DataView(l);p.setUint32(0,1,!0),p.setUint32(4,4,!0),p.setUint32(8,1,!0),p.setUint32(12,1,!0),p.setUint32(16,1/1,!0),p.setUint32(20,1,!0);const y=this.gpu.createBufferFromData("gqa_params",new Uint8Array(l),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("gqa_attention_head",[h,F,n,B,y],1);const k=await this.readback(B,4);return this.compare(k,w,1e-4)}async testDeltaNetRecurrent(){const s=[.5,.5],r=[1,0],a=[.8,.2],d=new Float32Array([...s,...r,...a]),f=new Float32Array(4),w=new Float32Array([0]),h=new Float32Array([0]),F=new Float32Array([0]),n=new Float32Array([0]),B=this.cpuF32ToBF16Packed(F),l=this.cpuF32ToBF16Packed(n),p=1/(1+Math.exp(-0)),y=Math.sqrt(s[0]**2+s[1]**2),k=Math.sqrt(r[0]**2+r[1]**2),u=s.map(U=>U/y),m=r.map(U=>U/k),g=[0,0,0,0],i=[0,0],c=a.map((U,S)=>(U-i[S])*p);for(let U=0;U<2;U++)for(let S=0;S<2;S++)g[U*2+S]+=m[U]*c[S];const _=1/Math.sqrt(2),b=new Float32Array(2);for(let U=0;U<2;U++){let S=0;for(let O=0;O<2;O++)S+=g[O*2+U]*u[O]*_;b[U]=S}const N=this.makeF32Buffer("dn_qkv",d),x=this.makeF32Buffer("dn_state",f),q=this.makeOutputBuffer("dn_out",2),K=this.makeF32Buffer("dn_a",w),v=this.makeF32Buffer("dn_b",h),T=this.makeU32Buffer("dn_alog",B),V=this.makeU32Buffer("dn_dtb",l),G=this.makeUniform([1,2,2]);this.dispatch("deltanet_recurrent",[N,x,q,K,v,T,V,G],1);const C=await this.readback(q,2);return this.compare(C,b,.001)}async testAddRMSNorm(){const o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array([.1,-.2,.3,-.4,.5,-.6,.7,-.8]),r=new Float32Array(8);for(let _=0;_<8;_++)r[_]=.1*(_+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(o);for(let _=0;_<8;_++)d[_]+=s[_];let f=0;for(let _=0;_<8;_++)f+=d[_]*d[_];const w=1/Math.sqrt(f/8+1e-6),h=new Float32Array(8);for(let _=0;_<8;_++){const b=a[Math.floor(_/2)]>>_%2*16&65535,N=this.bf16ToF32(b);h[_]=d[_]*w*(1+N)}const F=this.makeF32Buffer("addnorm_h",o),n=this.makeF32Buffer("addnorm_a",s),B=this.makeU32Buffer("addnorm_w",a),l=this.makeOutputBuffer("addnorm_out",8);this.makeUniform([8]);const p=new ArrayBuffer(16),y=new DataView(p);y.setUint32(0,8,!0),y.setFloat32(4,1e-6,!0);const k=this.gpu.createBufferFromData("addnorm_params",new Uint8Array(p),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("add_rmsnorm",[F,n,B,l,k],1);const u=await this.readback(l,8),m=await this.readback(F,8),g=d,i=this.compare(u,h,.001),c=this.compare(m,g,1e-6);return{pass:i.pass&&c.pass,maxErr:Math.max(i.maxErr,c.maxErr),errors:[...i.errors,...c.errors]}}async testThreeWayAddRMSNorm(){const o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array([.1,-.2,.3,-.4,.5,-.6,.7,-.8]),r=new Float32Array([.5,.5,-.5,-.5,1,1,-1,-1]),a=new Float32Array(8);for(let N=0;N<8;N++)a[N]=.1*(N+1);const d=this.cpuF32ToBF16Packed(a),f=new Float32Array(8);for(let N=0;N<8;N++)f[N]=o[N]+s[N]+r[N];let w=0;for(let N=0;N<8;N++)w+=f[N]*f[N];const h=1/Math.sqrt(w/8+1e-6),F=new Float32Array(8);for(let N=0;N<8;N++){const x=d[Math.floor(N/2)]>>N%2*16&65535,q=this.bf16ToF32(x);F[N]=f[N]*h*(1+q)}const n=this.makeF32Buffer("twa_a",o),B=this.makeF32Buffer("twa_b",s),l=this.makeF32Buffer("twa_c",r),p=this.makeU32Buffer("twa_w",d),y=this.makeOutputBuffer("twa_hout",8),k=this.makeOutputBuffer("twa_normed",8),u=new ArrayBuffer(16),m=new DataView(u);m.setUint32(0,8,!0),m.setFloat32(4,1e-6,!0);const g=this.gpu.createBufferFromData("twa_params",new Uint8Array(u),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("three_way_add_rmsnorm",[n,B,l,p,y,k,g],1);const i=await this.readback(y,8),c=await this.readback(k,8),_=this.compare(i,f,1e-6),b=this.compare(c,F,.001);return{pass:_.pass&&b.pass,maxErr:Math.max(_.maxErr,b.maxErr),errors:[..._.errors,...b.errors]}}async testGQARoPERotateHalf(){const h=1/Math.sqrt(256),F=[];for(let D=0;D<32;D++)F.push(1/Math.pow(100,2*D/64));function n(D,M){const H=new Float32Array(D.length);for(let P=0;P<D.length;P++)H[P]=D[P];for(let P=0;P<32;P++){const Q=M*F[P],E=Math.cos(Q),J=Math.sin(Q);H[P]=D[P]*E-D[P+32]*J,H[P+32]=D[P+32]*E+D[P]*J}return H}const B=new Float32Array(256);B[0]=1,B[1]=.5,B[2]=.3,B[32]=.7,B[33]=.2;const l=new Float32Array(256);l[0]=1,l[32]=.5;const p=new Float32Array(256);p[1]=1,p[33]=.3;const y=new Float32Array(256);y[2]=1;const k=new Float32Array(256);k[0]=1;const u=new Float32Array(256);u[0]=0,u[1]=1;const m=new Float32Array(256);m[0]=0,m[2]=1;const g=n(B,2),i=n(l,0),c=n(p,1),_=n(y,2);let b=0,N=0,x=0;for(let D=0;D<256;D++)b+=g[D]*i[D],N+=g[D]*c[D],x+=g[D]*_[D];b*=h,N*=h,x*=h;const q=Math.max(b,N,x),K=Math.exp(b-q),v=Math.exp(N-q),T=Math.exp(x-q),V=K+v+T,G=K/V,C=v/V,U=T/V,S=new Float32Array(256);for(let D=0;D<256;D++)S[D]=G*k[D]+C*u[D]+U*m[D];const O=1*256,Y=new Float32Array(3*O),z=new Float32Array(3*O);Y.set(i,0),Y.set(c,O),Y.set(_,2*O),z.set(k,0),z.set(u,O),z.set(m,2*O);const X=this.makeF32Buffer("rope2_q",g),tt=this.makeF32Buffer("rope2_kc",Y),A=this.makeF32Buffer("rope2_vc",z),R=this.makeOutputBuffer("rope2_out",1*256),L=new ArrayBuffer(32),W=new DataView(L);W.setUint32(0,3,!0),W.setUint32(4,256,!0),W.setUint32(8,1,!0),W.setUint32(12,1,!0),W.setUint32(16,1/1,!0),W.setUint32(20,1,!0);const Z=this.gpu.createBufferFromData("rope2_params",new Uint8Array(L),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("gqa_attention_head",[X,tt,A,R,Z],1);const j=await this.readback(R,1*256);return this.compare(j,S,.001)}async testFusedSigmoidGPTQ(){const r=new Float32Array(64),a=new Float32Array(64);for(let m=0;m<64;m++)r[m]=(m+1)*.1,a[m]=(m-8)*.3;const d=new Uint32Array(32),f=new Float32Array(256);for(let m=0;m<8;m++)for(let g=0;g<4;g++){let i=0;for(let c=0;c<8;c++){const _=m*8+c,b=(_+g)%15;f[_*4+g]=b-8,i|=b<<c*4}d[m*4+g]=i}const w=new Float32Array(4);for(let m=0;m<4;m++)w[m]=.5+m*.1;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let m=0;m<4;m++){let g=0;for(let i=0;i<64;i++){const c=r[i]/(1+Math.exp(-a[i])),b=Math.floor(i/64)*4+m,N=h[Math.floor(b/2)]>>b%2*16&65535,x=this.f16ToF32(N);g+=c*f[i*4+m]*x}F[m]=g}const n=this.makeF32Buffer("fsg_x",r),B=this.makeF32Buffer("fsg_g",a),l=this.makeU32Buffer("fsg_qw",d),p=this.makeU32Buffer("fsg_sc",h),y=this.makeOutputBuffer("fsg_out",4),k=this.makeUniform([64,4,64]);this.dispatch("fused_sigmoid_gptq",[n,B,l,p,y,k],Math.ceil(4/32));const u=await this.readback(y,4);return this.compare(u,F,.01)}async testFusedSiluGPTQ(){const r=new Float32Array(64),a=new Float32Array(64);for(let m=0;m<64;m++)r[m]=(m-8)*.2,a[m]=(m+1)*.15;const d=new Uint32Array(32),f=new Float32Array(256);for(let m=0;m<8;m++)for(let g=0;g<4;g++){let i=0;for(let c=0;c<8;c++){const _=m*8+c,b=(_*2+g)%15;f[_*4+g]=b-8,i|=b<<c*4}d[m*4+g]=i}const w=new Float32Array(4);for(let m=0;m<4;m++)w[m]=.4+m*.2;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let m=0;m<4;m++){let g=0;for(let i=0;i<64;i++){const _=r[i]/(1+Math.exp(-r[i]))*a[i],N=Math.floor(i/64)*4+m,x=h[Math.floor(N/2)]>>N%2*16&65535,q=this.f16ToF32(x);g+=_*f[i*4+m]*q}F[m]=g}const n=this.makeF32Buffer("fslg_a",r),B=this.makeF32Buffer("fslg_b",a),l=this.makeU32Buffer("fslg_qw",d),p=this.makeU32Buffer("fslg_sc",h),y=this.makeOutputBuffer("fslg_out",4),k=this.makeUniform([64,4,64]);this.dispatch("fused_silu_gptq",[n,B,l,p,y,k],Math.ceil(4/32));const u=await this.readback(y,4);return this.compare(u,F,.01)}async testFusedAddNormGateUpSiLU(){const a=new Float32Array(64),d=new Float32Array(64),f=new Float32Array(64),w=new Float32Array(64);for(let A=0;A<64;A++)a[A]=(A+1)*.1,d[A]=(A-5)*.05,f[A]=(A%7-3)*.03,w[A]=(A%3-1)*.1;const h=new Uint32Array(32),F=new Uint32Array(32),n=new Float32Array(256),B=new Float32Array(256);for(let A=0;A<64;A++)for(let R=0;R<4;R++)n[A*4+R]=Math.sin(A*.7+R*1.3)*.5,B[A*4+R]=Math.cos(A*.3+R*.9)*.5;const l=new Uint32Array(4/2),p=new Uint32Array(4/2);for(let A=0;A<4;A++){let R=0,L=0;for(let P=0;P<64;P++)R=Math.max(R,Math.abs(n[P*4+A])),L=Math.max(L,Math.abs(B[P*4+A]));const W=R/7,Z=L/7,j=this.f32ToF16(W),D=this.f32ToF16(Z),M=this.f16ToF32(j),H=this.f16ToF32(D);A%2===0?(l[A/2]=j,p[A/2]=D):(l[(A-1)/2]|=j<<16,p[(A-1)/2]|=D<<16);for(let P=0;P<64;P++){const Q=Math.round(n[P*4+A]/M)+8,E=Math.round(B[P*4+A]/H)+8,J=Math.max(0,Math.min(15,Q)),rt=Math.max(0,Math.min(15,E)),st=Math.floor(P/8),at=P%8*4;h[st*4+A]|=J<<at,F[st*4+A]|=rt<<at}}const y=new Uint32Array(64/2);for(let A=0;A<64;A+=2){const R=this.f32ToBF16(w[A]),L=this.f32ToBF16(w[A+1]);y[A/2]=R|L<<16}const k=new Float32Array(64);let u=0;for(let A=0;A<64;A++)k[A]=a[A]+d[A]+f[A],u+=k[A]*k[A];const m=1/Math.sqrt(u/64+1e-6),g=new Float32Array(64);for(let A=0;A<64;A++){const R=this.unpackBF16(y[Math.floor(A/2)],A%2);g[A]=k[A]*m*(1+R)}const i=new Float32Array(4);for(let A=0;A<4;A++){let R=0,L=0;const W=this.f16ToF32(A%2===0?l[Math.floor(A/2)]&65535:l[Math.floor(A/2)]>>>16),Z=this.f16ToF32(A%2===0?p[Math.floor(A/2)]&65535:p[Math.floor(A/2)]>>>16);for(let j=0;j<64;j++){const D=Math.floor(j/8),M=j%8*4,H=h[D*4+A]>>>M&15,P=F[D*4+A]>>>M&15;R+=g[j]*W*(H-8),L+=g[j]*Z*(P-8)}i[A]=R/(1+Math.exp(-R))*L}const c=k,_=16+640*16,b=new ArrayBuffer(_),N=new DataView(b);N.setUint32(0,64,!0),N.setUint32(4,4,!0),N.setUint32(8,64,!0),N.setFloat32(12,1e-6,!0);for(let A=0;A<64/2;A++){const R=Math.floor(A/4),L=A%4;N.setUint32(16+R*16+L*4,y[A],!0)}const x=this.gpu.createBufferFromData("fang_params",new Uint8Array(b),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),q=new Uint32Array(64);q.set(h,0),q.set(F,32);const K=8/(64/8),v=new Uint32Array(K*4/2*2);v.set(l,0),v.set(p,K*4/2);const T=this.makeF32Buffer("fang_hin",a),V=this.makeF32Buffer("fang_mlpres",d),G=this.makeF32Buffer("fang_attnres",f),C=this.makeU32Buffer("fang_mqw",q),U=this.makeU32Buffer("fang_msc",v),S=this.makeOutputBuffer("fang_hout",64),O=this.makeOutputBuffer("fang_out",4);this.dispatch("fused_addnorm_gate_up_silu",[T,V,G,C,U,S,O,x],Math.max(1,Math.ceil(4/32)));const Y=await this.readback(O,4),z=await this.readback(S,64),X=this.compare(Y,i,.02),tt=this.compare(z,c,1e-6);return{pass:X.pass&&tt.pass,maxErr:Math.max(X.maxErr,tt.maxErr),errors:[...X.errors||[],...tt.errors||[]]}}async testFusedSplitQKNormKVStore(){const f=new Float32Array(32);for(let M=0;M<f.length;M++)f[M]=Math.sin(M*.5+1)*.3;const w=new Float32Array(8),h=new Float32Array(8);for(let M=0;M<8;M++)w[M]=Math.cos(M*.7+2)*.4,h[M]=Math.sin(M*1.1+3)*.2;const F=new Float32Array(16),n=new Float32Array(8);for(let M=0;M<16;M++)F[M]=(M%3-1)*.05;for(let M=0;M<8;M++)n[M]=(M%4-2)*.03;const B=new Uint32Array(16/2);for(let M=0;M<16;M+=2)B[M/2]=this.f32ToBF16(F[M])|this.f32ToBF16(F[M+1])<<16;const l=new Uint32Array(8/2);for(let M=0;M<8;M+=2)l[M/2]=this.f32ToBF16(n[M])|this.f32ToBF16(n[M+1])<<16;const p=new Float32Array(16),y=new Float32Array(16);for(let M=0;M<2;M++){const H=new Float32Array(8);for(let E=0;E<8;E++)H[E]=f[M*8*2+E],y[M*8+E]=f[M*8*2+8+E];let P=0;for(let E=0;E<8;E++)P+=H[E]*H[E];const Q=1/Math.sqrt(P/8+1e-6);for(let E=0;E<8;E++){const J=this.unpackBF16(B[Math.floor((M*8+E)/2)],(M*8+E)%2);p[M*8+E]=H[E]*Q*(1+J)}}const k=new Float32Array(8),u=new Float32Array(32),m=new Float32Array(32);for(let M=0;M<1;M++){let H=0;for(let Q=0;Q<8;Q++)H+=w[M*8+Q]*w[M*8+Q];const P=1/Math.sqrt(H/8+1e-6);for(let Q=0;Q<8;Q++){const E=this.unpackBF16(l[Math.floor((M*8+Q)/2)],(M*8+Q)%2),J=w[M*8+Q]*P*(1+E);k[M*8+Q]=J,u[24+M*8+Q]=J,m[24+M*8+Q]=h[M*8+Q]}}const g=32+320*16,i=new ArrayBuffer(g),c=new DataView(i);c.setUint32(0,2,!0),c.setUint32(4,1,!0),c.setUint32(8,8,!0),c.setFloat32(12,1e-6,!0),c.setUint32(16,3,!0);for(let M=0;M<16/2;M++){const H=Math.floor(M/4),P=M%4;c.setUint32(32+H*16+P*4,B[M],!0)}const _=16/2;for(let M=0;M<8/2;M++){const H=_+M,P=Math.floor(H/4),Q=H%4;c.setUint32(32+P*16+Q*4,l[M],!0)}const b=this.gpu.createBufferFromData("fsqk_params",new Uint8Array(i),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),N=this.makeF32Buffer("fsqk_qpf",f),x=this.makeF32Buffer("fsqk_kp",w),q=this.makeF32Buffer("fsqk_vp",h),K=this.makeOutputBuffer("fsqk_qp",16),v=this.makeOutputBuffer("fsqk_qg",16),T=this.makeOutputBuffer("fsqk_kc",32),V=this.makeOutputBuffer("fsqk_vc",32);this.dispatch("fused_split_qknorm_kvstore",[N,x,q,K,v,T,V,b],3);const G=await this.readback(K,16),C=await this.readback(v,16),U=await this.readback(x,8),S=await this.readback(T,32),O=await this.readback(V,32),Y=this.compare(G,p,1e-5),z=this.compare(C,y,1e-6),X=this.compare(U,k,1e-5),tt=S.slice(24,32),A=O.slice(24,32),R=u.slice(24,32),L=m.slice(24,32),W=this.compare(tt,R,1e-5),Z=this.compare(A,L,1e-6),j=Math.max(Y.maxErr,z.maxErr,X.maxErr,W.maxErr,Z.maxErr);return{pass:Y.pass&&z.pass&&X.pass&&W.pass&&Z.pass,maxErr:j,errors:[...Y.errors||[],...z.errors||[],...X.errors||[],...W.errors||[],...Z.errors||[]]}}async benchmarkGPTQ(e){const t=[{K:2048,N:2048,gs:128,label:"2048→2048 (Z/out_proj)"},{K:2048,N:6144,gs:128,label:"2048→6144 (QKV/gate+up)"},{K:6144,N:2048,gs:128,label:"6144→2048 (down_proj)"}],o=[1,2,4,8],s=50,r=3,a=10,d=[];for(const f of t){const{K:w,N:h,gs:F,label:n}=f,B=w/8,l=B/(F/8),p=new Float32Array(w);for(let b=0;b<w;b++)p[b]=Math.random()*2-1;const y=new Uint32Array(B*h);for(let b=0;b<y.length;b++)y[b]=Math.random()*4294967295>>>0;const k=new Float32Array(l*h);for(let b=0;b<k.length;b++)k[b]=Math.random()*.5;const u=this.cpuF32ToF16Packed(k),m=this.makeF32Buffer("bm_in",p),g=this.makeU32Buffer("bm_qw",y),i=this.makeU32Buffer("bm_sc",u),c=[];for(let b=0;b<s;b++)c.push(this.makeOutputBuffer(`bm_out_${b}`,h));const _=B*h*4+l*h*2;for(const b of o){if(b>1&&l%b!==0)continue;let N;if(b===1){const v=this.makeUniform([w,h,F]);for(let G=0;G<r;G++){this.gpu.beginBatch();for(let C=0;C<s;C++)this.dispatch("gptq_matvec",[m,g,i,c[C],v],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone()}const T=[];for(let G=0;G<a;G++){await this.gpu.device.queue.onSubmittedWorkDone();const C=performance.now();this.gpu.beginBatch();for(let U=0;U<s;U++)this.dispatch("gptq_matvec",[m,g,i,c[U],v],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone(),T.push(performance.now()-C)}T.sort((G,C)=>G-C),N=T.slice(1,-1).reduce((G,C)=>G+C,0)/(T.length-2)/s}else{const v=[];for(let U=0;U<s;U++)v.push(this.makeOutputBuffer(`bm_part_${U}`,h*b));const T=this.makeUniform([w,h,F,b]),V=this.makeUniform([h,b]);for(let U=0;U<r;U++){this.gpu.beginBatch();for(let S=0;S<s;S++)this.dispatch("gptq_splitk",[m,g,i,v[S],T],Math.ceil(h/32),b),this.dispatch("reduce_splitk",[v[S],c[S],V],Math.ceil(h/256));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone()}const G=[];for(let U=0;U<a;U++){await this.gpu.device.queue.onSubmittedWorkDone();const S=performance.now();this.gpu.beginBatch();for(let O=0;O<s;O++)this.dispatch("gptq_splitk",[m,g,i,v[O],T],Math.ceil(h/32),b),this.dispatch("reduce_splitk",[v[O],c[O],V],Math.ceil(h/256));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone(),G.push(performance.now()-S)}G.sort((U,S)=>U-S),N=G.slice(1,-1).reduce((U,S)=>U+S,0)/(G.length-2)/s}const x=_/1e9/(N/1e3),q=Math.ceil(h/32)*b,K={label:n,ns:b,wgs:q,avgMs:N.toFixed(4),bwGBs:x.toFixed(1)};d.push(K),e?.(K)}if(this.gpu.hasF16&&this.pipelines.gptq_matvec_f16){const b=this.makeUniform([w,h,F]);for(let T=0;T<r;T++){this.gpu.beginBatch();for(let V=0;V<s;V++)this.dispatch("gptq_matvec_f16",[m,g,i,c[V],b],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone()}const N=[];for(let T=0;T<a;T++){await this.gpu.device.queue.onSubmittedWorkDone();const V=performance.now();this.gpu.beginBatch();for(let G=0;G<s;G++)this.dispatch("gptq_matvec_f16",[m,g,i,c[G],b],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone(),N.push(performance.now()-V)}N.sort((T,V)=>T-V);const q=N.slice(1,-1).reduce((T,V)=>T+V,0)/(N.length-2)/s,K=_/1e9/(q/1e3),v={label:n,ns:"f16",wgs:Math.ceil(h/32),avgMs:q.toFixed(4),bwGBs:K.toFixed(1)};d.push(v),e?.(v)}}return d}_makeNormParams(e,t=1e-6){const o=new ArrayBuffer(16),s=new DataView(o);return s.setUint32(0,e,!0),s.setFloat32(4,t,!0),this.gpu.createBufferFromData("norm_params",new Uint8Array(o),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}_cpuRMSNorm1PlusW(e,t,o,s=1e-6){let r=0;for(let f=0;f<o;f++)r+=e[f]*e[f];const a=1/Math.sqrt(r/o+s),d=new Float32Array(o);for(let f=0;f<o;f++){const w=t[Math.floor(f/2)]>>f%2*16&65535,h=this.bf16ToF32(w);d[f]=e[f]*a*(1+h)}return d}async testAddRMSNormB2(){const o=new Float32Array(32),s=new Float32Array(32);for(let u=0;u<32;u++)o[u]=Math.sin(u*.7)*2,s[u]=Math.cos(u*.3)*.5;const r=new Float32Array(16);for(let u=0;u<16;u++)r[u]=.1*(u+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(32),f=new Float32Array(32);for(let u=0;u<2;u++){const m=new Float32Array(16);for(let i=0;i<16;i++)m[i]=o[u*16+i]+s[u*16+i];for(let i=0;i<16;i++)f[u*16+i]=m[i];const g=this._cpuRMSNorm1PlusW(m,a,16,1e-6);d.set(g,u*16)}const w=this.makeF32Buffer("b2an_h",o),h=this.makeF32Buffer("b2an_a",s),F=this.makeU32Buffer("b2an_w",a),n=this.makeOutputBuffer("b2an_out",32),B=this._makeNormParams(16,1e-6);this.dispatch("add_rmsnorm_b2",[w,h,F,n,B],2);const l=await this.readback(n,32),p=await this.readback(w,32),y=this.compare(l,d,.001),k=this.compare(p,f,1e-6);return{pass:y.pass&&k.pass,maxErr:Math.max(y.maxErr,k.maxErr),errors:[...y.errors,...k.errors]}}async testAddRMSNormROB2(){const o=new Float32Array(32),s=new Float32Array(32);for(let l=0;l<32;l++)o[l]=Math.sin(l*.5)*3,s[l]=Math.cos(l*.8)*.3;const r=new Float32Array(16);for(let l=0;l<16;l++)r[l]=.05*(l+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(32);for(let l=0;l<2;l++){const p=new Float32Array(16);for(let k=0;k<16;k++)p[k]=o[l*16+k]+s[l*16+k];const y=this._cpuRMSNorm1PlusW(p,a,16,1e-6);d.set(y,l*16)}const f=this.makeF32Buffer("b2anro_h",o),w=this.makeF32Buffer("b2anro_a",s),h=this.makeU32Buffer("b2anro_w",a),F=this.makeOutputBuffer("b2anro_out",32),n=this._makeNormParams(16,1e-6);this.dispatch("add_rmsnorm_ro_b2",[f,w,h,F,n],2);const B=await this.readback(F,32);return this.compare(B,d,.001)}async testThreeWayAddRMSNormB2(){const o=new Float32Array(32),s=new Float32Array(32),r=new Float32Array(32);for(let i=0;i<32;i++)o[i]=Math.sin(i*.4)*2,s[i]=Math.cos(i*.6)*.5,r[i]=Math.sin(i*1.1)*.3;const a=new Float32Array(16);for(let i=0;i<16;i++)a[i]=.1*(i+1);const d=this.cpuF32ToBF16Packed(a),f=new Float32Array(32),w=new Float32Array(32);for(let i=0;i<2;i++){const c=new Float32Array(16);for(let b=0;b<16;b++)c[b]=o[i*16+b]+s[i*16+b]+r[i*16+b];f.set(c,i*16);const _=this._cpuRMSNorm1PlusW(c,d,16,1e-6);w.set(_,i*16)}const h=this.makeF32Buffer("b2twa_a",o),F=this.makeF32Buffer("b2twa_b",s),n=this.makeF32Buffer("b2twa_c",r),B=this.makeU32Buffer("b2twa_w",d),l=this.makeOutputBuffer("b2twa_hout",32),p=this.makeOutputBuffer("b2twa_normed",32),y=this._makeNormParams(16,1e-6);this.dispatch("three_way_add_rmsnorm_b2",[h,F,n,B,l,p,y],2);const k=await this.readback(p,32),u=await this.readback(l,32),m=this.compare(k,w,.001),g=this.compare(u,f,1e-6);return{pass:m.pass&&g.pass,maxErr:Math.max(m.maxErr,g.maxErr),errors:[...m.errors,...g.errors]}}_makeGPTQData(e,t,o){const s=e/8,r=new Uint32Array(s*t),a=new Float32Array(e*t);for(let h=0;h<s;h++)for(let F=0;F<t;F++){let n=0;for(let B=0;B<8;B++){const l=h*8+B,p=(l*3+F*7)%15;a[l*t+F]=p-8,n|=p<<B*4}r[h*t+F]=n}const d=e/o,f=new Float32Array(d*t);for(let h=0;h<d*t;h++)f[h]=.3+h*.15;const w=this.cpuF32ToF16Packed(f);return{qweight:r,rawWeights:a,scalesPacked:w}}_cpuGPTQMatvec(e,t,o,s,r,a,d=0){const f=new Float32Array(r);for(let w=0;w<r;w++){let h=0;for(let F=0;F<s;F++){const B=Math.floor(F/a)*r+w,l=o[Math.floor(B/2)]>>B%2*16&65535,p=this.f16ToF32(l);h+=e[d+F]*t[F*r+w]*p}f[w]=h}return f}async testFusedSiluGPTQB2(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const e=128,t=8,o=64,{qweight:s,rawWeights:r,scalesPacked:a}=this._makeGPTQData(e,t,o),d=new Float32Array(2*e),f=new Float32Array(2*e);for(let k=0;k<2*e;k++)d[k]=Math.sin(k*.3)*2,f[k]=Math.cos(k*.5)*1.5;const w=new Float32Array(2*t);for(let k=0;k<2;k++){const u=new Float32Array(e);for(let g=0;g<e;g++){const i=d[k*e+g];u[g]=i/(1+Math.exp(-i))*f[k*e+g]}const m=this._cpuGPTQMatvec(u,r,a,e,t,o);w.set(m,k*t)}const h=this.makeF32Buffer("b2sl_a",d),F=this.makeF32Buffer("b2sl_b",f),n=this.makeU32Buffer("b2sl_qw",s),B=this.makeU32Buffer("b2sl_sc",a),l=this.makeOutputBuffer("b2sl_out",2*t),p=this.makeUniform([e,t,o]);this.dispatch("fused_silu_gptq_b2_f16",[h,F,n,B,l,p],Math.ceil(t/32));const y=await this.readback(l,2*t);return this.compare(y,w,.05)}async testFusedSigmoidGPTQB2(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const e=128,t=8,o=64,{qweight:s,rawWeights:r,scalesPacked:a}=this._makeGPTQData(e,t,o),d=new Float32Array(2*e),f=new Float32Array(2*e);for(let k=0;k<2*e;k++)d[k]=Math.sin(k*.4)*1.5,f[k]=Math.cos(k*.7)*2;const w=new Float32Array(2*t);for(let k=0;k<2;k++){const u=new Float32Array(e);for(let g=0;g<e;g++){const i=1/(1+Math.exp(-f[k*e+g]));u[g]=i*d[k*e+g]}const m=this._cpuGPTQMatvec(u,r,a,e,t,o);w.set(m,k*t)}const h=this.makeF32Buffer("b2sg_x",d),F=this.makeF32Buffer("b2sg_g",f),n=this.makeU32Buffer("b2sg_qw",s),B=this.makeU32Buffer("b2sg_sc",a),l=this.makeOutputBuffer("b2sg_out",2*t),p=this.makeUniform([e,t,o]);this.dispatch("fused_sigmoid_gptq_b2_f16",[h,F,n,B,l,p],Math.ceil(t/32));const y=await this.readback(l,2*t);return this.compare(y,w,.05)}async testFusedGateUpSiLUB2(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const e=128,t=8,o=64,s=e/8,r=new Uint32Array(s*t),a=new Float32Array(e*t);for(let N=0;N<s;N++)for(let x=0;x<t;x++){let q=0;for(let K=0;K<8;K++){const v=N*8+K,T=(v*3+x*7)%15;a[v*t+x]=T-8,q|=T<<K*4}r[N*t+x]=q}const d=e/o,f=new Float32Array(d*t);for(let N=0;N<d*t;N++)f[N]=.01+N%8*.005;const w=this.cpuF32ToF16Packed(f),h=new Uint32Array(s*t),F=new Float32Array(e*t);for(let N=0;N<s;N++)for(let x=0;x<t;x++){let q=0;for(let K=0;K<8;K++){const v=N*8+K,T=(v*5+x*3)%15;F[v*t+x]=T-8,q|=T<<K*4}h[N*t+x]=q}const n=e/o,B=new Float32Array(n*t);for(let N=0;N<n*t;N++)B[N]=.01+N%8*.005;const l=this.cpuF32ToF16Packed(B),p=new Float32Array(2*e);for(let N=0;N<2*e;N++)p[N]=Math.sin(N*.2)*.5;const y=new Float32Array(2*t);for(let N=0;N<2;N++){const x=this._cpuGPTQMatvec(p,a,w,e,t,o,N*e),q=this._cpuGPTQMatvec(p,F,l,e,t,o,N*e);for(let K=0;K<t;K++){const v=x[K];y[N*t+K]=v/(1+Math.exp(-v))*q[K]}}const k=this.makeF32Buffer("b2gus_in",p),u=this.makeU32Buffer("b2gus_gqw",r),m=this.makeU32Buffer("b2gus_gsc",w),g=this.makeU32Buffer("b2gus_uqw",h),i=this.makeU32Buffer("b2gus_usc",l),c=this.makeOutputBuffer("b2gus_out",2*t),_=this.makeUniform([e,t,o]);this.dispatch("fused_gate_up_silu_b2_f16",[k,u,m,g,i,c,_],Math.ceil(t/32));const b=await this.readback(c,2*t);return this.compare(b,y,.05)}_makeGPTQMatmulB2Data(e,t,o){const s=e/8,r=new Float32Array(2*e);for(let n=0;n<e;n++)r[n]=Math.sin(n*.5)*.5;for(let n=0;n<e;n++)r[e+n]=Math.cos(n*.3)*.5;const a=new Uint32Array(s*t),d=new Float32Array(e*t);for(let n=0;n<s;n++)for(let B=0;B<t;B++){let l=0;for(let p=0;p<8;p++){const y=n*8+p,k=(y*3+B*7)%15;d[y*t+B]=k-8,l|=k<<p*4}a[n*t+B]=l}const f=e/o,w=new Float32Array(f*t);for(let n=0;n<f*t;n++)w[n]=.01+n%8*.005;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(2*t);for(let n=0;n<2;n++)for(let B=0;B<t;B++){let l=0;for(let p=0;p<e;p++){const k=Math.floor(p/o)*t+B,u=h[Math.floor(k/2)]>>k%2*16&65535,m=this.f16ToF32(u);l+=r[n*e+p]*d[p*t+B]*m}F[n*t+B]=l}return{input:r,qweight:a,scalesPacked:h,expected:F,K:e,N:t,gs:o}}async testGPTQMatmulB2(){const{input:e,qweight:t,scalesPacked:o,expected:s,K:r,N:a,gs:d}=this._makeGPTQMatmulB2Data(128,8,64),f=this.makeF32Buffer("b2_input",e),w=this.makeU32Buffer("b2_qw",t),h=this.makeU32Buffer("b2_sc",o),F=this.makeOutputBuffer("b2_out",2*a),n=this.makeUniform([r,a,d]);this.dispatch("gptq_matmul_b2",[f,w,h,F,n],Math.ceil(a/32));const B=await this.readback(F,2*a);return this.compare(B,s,.02)}async testGPTQMatmulB2F16(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const{input:e,qweight:t,scalesPacked:o,expected:s,K:r,N:a,gs:d}=this._makeGPTQMatmulB2Data(128,8,64),f=this.makeF32Buffer("b2f16_input",e),w=this.makeU32Buffer("b2f16_qw",t),h=this.makeU32Buffer("b2f16_sc",o),F=this.makeOutputBuffer("b2f16_out",2*a),n=this.makeUniform([r,a,d]);this.dispatch("gptq_matmul_b2_f16",[f,w,h,F,n],Math.ceil(a/32));const B=await this.readback(F,2*a);return this.compare(B,s,.05)}async testGPTQMatmulB2_4T_F16(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const{input:e,qweight:t,scalesPacked:o,expected:s,K:r,N:a,gs:d}=this._makeGPTQMatmulB2Data(256,16,64),f=this.makeF32Buffer("b2_4t_input",e),w=this.makeU32Buffer("b2_4t_qw",t),h=this.makeU32Buffer("b2_4t_sc",o),F=this.makeOutputBuffer("b2_4t_out",2*a),n=this.makeUniform([r,a,d]);this.dispatch("gptq_matmul_b2_4t_f16",[f,w,h,F,n],Math.ceil(a/8));const B=await this.readback(F,2*a);return this.compare(B,s,.05)}f32ToBF16(e){const t=new ArrayBuffer(4);return new Uint32Array(t)[0]=new Uint32Array(new Float32Array([e]).buffer)[0],new Uint32Array(t)[0]>>>16}unpackBF16(e,t){const o=e>>>t*16&65535;return this.bf16ToF32(o)}}function $(I){const e=document.getElementById("log");e.textContent+=I+`
3
  `,e.scrollTop=e.scrollHeight}document.querySelectorAll(".tab").forEach(I=>{I.addEventListener("click",()=>{document.querySelectorAll(".tab").forEach(e=>e.classList.remove("active")),document.querySelectorAll(".panel").forEach(e=>e.classList.remove("active")),I.classList.add("active"),document.getElementById(I.dataset.panel).classList.add("active")})});document.getElementById("runTests").addEventListener("click",async()=>{const I=document.getElementById("runTests"),e=document.getElementById("testResults"),t=document.getElementById("testSummary");I.disabled=!0,e.innerHTML="",t.innerHTML="",$("Initializing WebGPU for tests...");try{const o=new ot;await o.init(),$(`WebGPU initialized. Running tests...
4
+ `);let s=0,r=0;await o.runAll(a=>{const d=a.pass?"✓":"✗",f=a.pass?"pass":"fail";a.pass?s++:r++;const w=document.createElement("div");if(w.className="test-row",w.innerHTML=`
5
+ <span class="test-icon ${f}">${d}</span>
6
  <span class="test-name">${a.name}</span>
7
+ <span class="test-err ${f}">maxErr: ${typeof a.maxErr=="number"?a.maxErr.toExponential(2):"N/A"}</span>
8
  `,!a.pass&&a.errors?.length){const n=document.createElement("div");n.className="test-detail",n.textContent=a.errors.slice(0,3).map(B=>` idx=${B.idx??"?"}: got=${B.got?.toFixed?.(6)??B.got} expected=${B.expected?.toFixed?.(6)??B.expected}${B.note?" ("+B.note+")":""}`).join(`
9
  `),w.appendChild(n)}if(a.error){const n=document.createElement("div");n.className="test-detail",n.textContent=` Error: ${a.error}`,w.appendChild(n)}e.appendChild(w);const h=a.pass?"PASS":"FAIL",F=typeof a.maxErr=="number"?` (maxErr=${a.maxErr.toExponential(2)})`:"";$(`[${h}] ${a.name}${F}${a.error?" — "+a.error:""}`)}),t.innerHTML=`
10
  <div class="summary">
 
14
  </div>
15
  `,$(`
16
  Done: ${s} passed, ${r} failed`),o.destroy()}catch(o){$(`Error: ${o.message}
17
+ ${o.stack}`)}finally{I.disabled=!1}});document.getElementById("loadAndProfile").addEventListener("click",async()=>{const I=document.getElementById("loadAndProfile"),e=document.getElementById("profStatus"),t=document.getElementById("profCategories"),o=document.getElementById("profTopOps");I.disabled=!0,t.innerHTML="",o.innerHTML="";try{const s=document.getElementById("profRepo").value.trim();e.textContent="Loading model...",$("Profiler: loading model from "+s);const{GPUContext:r}=await et(async()=>{const{GPUContext:c}=await import("./gpu-ops-BbLjsC0p.js").then(_=>_.g);return{GPUContext:c}},[]),{Qwen35Model:a}=await et(async()=>{const{Qwen35Model:c}=await import("./qwen35-model-DrnSsmhP.js");return{Qwen35Model:c}},__vite__mapDeps([0,1])),{loadModelWeights:d,loadConfig:f,loadQuantConfig:w}=await et(async()=>{const{loadModelWeights:c,loadConfig:_,loadQuantConfig:b}=await import("./safetensors-loader-CwGm5mJX.js");return{loadModelWeights:c,loadConfig:_,loadQuantConfig:b}},[]),{Profiler:h}=await et(async()=>{const{Profiler:c}=await import("./profiler-DYUyiq-B.js");return{Profiler:c}},[]),F=new r;await F.init(),e.textContent="Fetching config...";const n=await f(s),B=await w(s);e.textContent="Downloading weights (this may take a while)...";const l=await d(s,c=>{if(c.phase==="downloading"){const _=c.total?(c.loaded/c.total*100).toFixed(0):"?";e.textContent=`Downloading: ${c.filesLoaded}/${c.filesTotal} shards (${_}%)`}else c.phase==="parsing"&&(e.textContent=`Parsing ${c.file}...`)});e.textContent="Uploading to GPU...";const p=new a(F,n,B);p.compilePipelines(),p.loadWeights(l),p.initBuffers(),e.textContent="Warming up...",await p.forward(1,0);const y=p.textCfg.linear_num_key_heads,k=p.textCfg.linear_key_head_dim,u=p.textCfg.linear_value_head_dim,m=y*(k+k+u);for(let c=0;c<p.numLayers;c++)p.layerTypes[c]==="linear_attention"&&(F.device.queue.writeBuffer(p.linState[c],0,new ArrayBuffer(y*k*u*4)),F.device.queue.writeBuffer(p.linConvHist[c],0,new ArrayBuffer(3*m*4)));p.seqLen=0,e.textContent="Profiling forward pass...",$("Profiling single forward pass...");const i=await new h(p).profileForward(1,0);e.textContent=`Done! Estimated: ${i.estimatedTokPerSec} tok/s (sync overhead inflates times)`,$(`
18
  Profile complete: ${i.total.toFixed(1)}ms total (~${i.estimatedTokPerSec} tok/s with sync overhead)`),$(`
19
  Category breakdown:`);for(const c of i.categories){const _=document.createElement("div");_.className="prof-row";const b=c.name.includes("matvec")||c.name==="lm_head"?"var(--orange)":c.name.includes("attention")||c.name.includes("deltanet")?"var(--blue)":"var(--green)";_.innerHTML=`
20
  <span class="prof-name">${c.name}</span>
 
37
  </div>
38
  </div>
39
  `,o.appendChild(_),$(` ${c.name.padEnd(35)} ${c.time.padStart(8)}ms ${c.pct.padStart(5)}%`)}F.destroy()}catch(s){e.textContent=`Error: ${s.message}`,$(`Profiler error: ${s.message}
40
+ ${s.stack}`)}finally{I.disabled=!1}});document.getElementById("runBenchmark").addEventListener("click",async()=>{const I=document.getElementById("runBenchmark"),e=document.getElementById("bmResults");I.disabled=!0,e.innerHTML="<p style='color:var(--dim)'>Initializing WebGPU...</p>",$("Starting GPTQ benchmark...");try{const t=new ot;await t.init(),e.innerHTML="";let o="";const s=await t.benchmarkGPTQ(r=>{if(r.label!==o){o=r.label;const f=document.createElement("h2");f.textContent=r.label,e.appendChild(f)}const a=document.createElement("div");a.className="prof-row";const d=(r.ns===1,"");a.innerHTML=`
41
  <span class="prof-name">splits=${r.ns} (${r.wgs} WGs)</span>
42
  <span class="prof-time">${r.avgMs}ms</span>
43
  <span class="prof-pct">${r.bwGBs} GB/s</span>
index.html CHANGED
@@ -138,10 +138,10 @@
138
  .toast-error { border-color: #ef4444; color: #ef4444; }
139
  .toast-success { border-color: var(--accent); color: var(--accent); }
140
  </style>
141
- <script type="module" crossorigin src="./assets/main-CKQMLD5b.js"></script>
142
- <link rel="modulepreload" crossorigin href="./assets/gpu-ops-DKsrMEcC.js">
143
- <link rel="modulepreload" crossorigin href="./assets/qwen35-model-BwnUri7A.js">
144
- <link rel="modulepreload" crossorigin href="./assets/safetensors-loader-CwGm5mJX.js">
145
  </head>
146
  <body>
147
  <div id="app"></div>
 
138
  .toast-error { border-color: #ef4444; color: #ef4444; }
139
  .toast-success { border-color: var(--accent); color: var(--accent); }
140
  </style>
141
+ <script type="module" crossorigin src="/assets/main-Y3tn35iX.js"></script>
142
+ <link rel="modulepreload" crossorigin href="/assets/gpu-ops-BbLjsC0p.js">
143
+ <link rel="modulepreload" crossorigin href="/assets/qwen35-model-DrnSsmhP.js">
144
+ <link rel="modulepreload" crossorigin href="/assets/safetensors-loader-CwGm5mJX.js">
145
  </head>
146
  <body>
147
  <div id="app"></div>
test.html CHANGED
@@ -41,8 +41,8 @@
41
  button.run-btn:hover:not(:disabled) { filter: brightness(1.15); }
42
  #log { margin-top: 12px; padding: 12px; background: var(--bg2); border: 1px solid var(--border); border-radius: 8px; max-height: 300px; overflow-y: auto; white-space: pre-wrap; font-size: 0.7rem; color: var(--dim); }
43
  </style>
44
- <script type="module" crossorigin src="./assets/test-DZKu3oxu.js"></script>
45
- <link rel="modulepreload" crossorigin href="./assets/gpu-ops-DKsrMEcC.js">
46
  </head>
47
  <body>
48
  <h1>TensorBend Shader Tests & Profiler</h1>
 
41
  button.run-btn:hover:not(:disabled) { filter: brightness(1.15); }
42
  #log { margin-top: 12px; padding: 12px; background: var(--bg2); border: 1px solid var(--border); border-radius: 8px; max-height: 300px; overflow-y: auto; white-space: pre-wrap; font-size: 0.7rem; color: var(--dim); }
43
  </style>
44
+ <script type="module" crossorigin src="/assets/test-BK90_Upb.js"></script>
45
+ <link rel="modulepreload" crossorigin href="/assets/gpu-ops-BbLjsC0p.js">
46
  </head>
47
  <body>
48
  <h1>TensorBend Shader Tests & Profiler</h1>